1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54
55 #include "trace.h"
56 #include "trace_output.h"
57
58 /*
59 * On boot up, the ring buffer is set to the minimum size, so that
60 * we do not waste memory on systems that are not using tracing.
61 */
62 bool ring_buffer_expanded;
63
64 #ifdef CONFIG_FTRACE_STARTUP_TEST
65 /*
66 * We need to change this state when a selftest is running.
67 * A selftest will lurk into the ring-buffer to count the
68 * entries inserted during the selftest although some concurrent
69 * insertions into the ring-buffer such as trace_printk could occurred
70 * at the same time, giving false positive or negative results.
71 */
72 static bool __read_mostly tracing_selftest_running;
73
74 /*
75 * If boot-time tracing including tracers/events via kernel cmdline
76 * is running, we do not want to run SELFTEST.
77 */
78 bool __read_mostly tracing_selftest_disabled;
79
disable_tracing_selftest(const char * reason)80 void __init disable_tracing_selftest(const char *reason)
81 {
82 if (!tracing_selftest_disabled) {
83 tracing_selftest_disabled = true;
84 pr_info("Ftrace startup test is disabled due to %s\n", reason);
85 }
86 }
87 #else
88 #define tracing_selftest_running 0
89 #define tracing_selftest_disabled 0
90 #endif
91
92 /* Pipe tracepoints to printk */
93 static struct trace_iterator *tracepoint_print_iter;
94 int tracepoint_printk;
95 static bool tracepoint_printk_stop_on_boot __initdata;
96 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
97
98 /* For tracers that don't implement custom flags */
99 static struct tracer_opt dummy_tracer_opt[] = {
100 { }
101 };
102
103 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)104 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
105 {
106 return 0;
107 }
108
109 /*
110 * To prevent the comm cache from being overwritten when no
111 * tracing is active, only save the comm when a trace event
112 * occurred.
113 */
114 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
115
116 /*
117 * Kill all tracing for good (never come back).
118 * It is initialized to 1 but will turn to zero if the initialization
119 * of the tracer is successful. But that is the only place that sets
120 * this back to zero.
121 */
122 static int tracing_disabled = 1;
123
124 cpumask_var_t __read_mostly tracing_buffer_mask;
125
126 /*
127 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
128 *
129 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
130 * is set, then ftrace_dump is called. This will output the contents
131 * of the ftrace buffers to the console. This is very useful for
132 * capturing traces that lead to crashes and outputing it to a
133 * serial console.
134 *
135 * It is default off, but you can enable it with either specifying
136 * "ftrace_dump_on_oops" in the kernel command line, or setting
137 * /proc/sys/kernel/ftrace_dump_on_oops
138 * Set 1 if you want to dump buffers of all CPUs
139 * Set 2 if you want to dump the buffer of the CPU that triggered oops
140 */
141
142 enum ftrace_dump_mode ftrace_dump_on_oops;
143
144 /* When set, tracing will stop when a WARN*() is hit */
145 int __disable_trace_on_warning;
146
147 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
148 /* Map of enums to their values, for "eval_map" file */
149 struct trace_eval_map_head {
150 struct module *mod;
151 unsigned long length;
152 };
153
154 union trace_eval_map_item;
155
156 struct trace_eval_map_tail {
157 /*
158 * "end" is first and points to NULL as it must be different
159 * than "mod" or "eval_string"
160 */
161 union trace_eval_map_item *next;
162 const char *end; /* points to NULL */
163 };
164
165 static DEFINE_MUTEX(trace_eval_mutex);
166
167 /*
168 * The trace_eval_maps are saved in an array with two extra elements,
169 * one at the beginning, and one at the end. The beginning item contains
170 * the count of the saved maps (head.length), and the module they
171 * belong to if not built in (head.mod). The ending item contains a
172 * pointer to the next array of saved eval_map items.
173 */
174 union trace_eval_map_item {
175 struct trace_eval_map map;
176 struct trace_eval_map_head head;
177 struct trace_eval_map_tail tail;
178 };
179
180 static union trace_eval_map_item *trace_eval_maps;
181 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
182
183 int tracing_set_tracer(struct trace_array *tr, const char *buf);
184 static void ftrace_trace_userstack(struct trace_array *tr,
185 struct trace_buffer *buffer,
186 unsigned int trace_ctx);
187
188 #define MAX_TRACER_SIZE 100
189 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
190 static char *default_bootup_tracer;
191
192 static bool allocate_snapshot;
193 static bool snapshot_at_boot;
194
195 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
196 static int boot_instance_index;
197
198 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
199 static int boot_snapshot_index;
200
set_cmdline_ftrace(char * str)201 static int __init set_cmdline_ftrace(char *str)
202 {
203 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
204 default_bootup_tracer = bootup_tracer_buf;
205 /* We are using ftrace early, expand it */
206 ring_buffer_expanded = true;
207 return 1;
208 }
209 __setup("ftrace=", set_cmdline_ftrace);
210
set_ftrace_dump_on_oops(char * str)211 static int __init set_ftrace_dump_on_oops(char *str)
212 {
213 if (*str++ != '=' || !*str || !strcmp("1", str)) {
214 ftrace_dump_on_oops = DUMP_ALL;
215 return 1;
216 }
217
218 if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
219 ftrace_dump_on_oops = DUMP_ORIG;
220 return 1;
221 }
222
223 return 0;
224 }
225 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
226
stop_trace_on_warning(char * str)227 static int __init stop_trace_on_warning(char *str)
228 {
229 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
230 __disable_trace_on_warning = 1;
231 return 1;
232 }
233 __setup("traceoff_on_warning", stop_trace_on_warning);
234
boot_alloc_snapshot(char * str)235 static int __init boot_alloc_snapshot(char *str)
236 {
237 char *slot = boot_snapshot_info + boot_snapshot_index;
238 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
239 int ret;
240
241 if (str[0] == '=') {
242 str++;
243 if (strlen(str) >= left)
244 return -1;
245
246 ret = snprintf(slot, left, "%s\t", str);
247 boot_snapshot_index += ret;
248 } else {
249 allocate_snapshot = true;
250 /* We also need the main ring buffer expanded */
251 ring_buffer_expanded = true;
252 }
253 return 1;
254 }
255 __setup("alloc_snapshot", boot_alloc_snapshot);
256
257
boot_snapshot(char * str)258 static int __init boot_snapshot(char *str)
259 {
260 snapshot_at_boot = true;
261 boot_alloc_snapshot(str);
262 return 1;
263 }
264 __setup("ftrace_boot_snapshot", boot_snapshot);
265
266
boot_instance(char * str)267 static int __init boot_instance(char *str)
268 {
269 char *slot = boot_instance_info + boot_instance_index;
270 int left = sizeof(boot_instance_info) - boot_instance_index;
271 int ret;
272
273 if (strlen(str) >= left)
274 return -1;
275
276 ret = snprintf(slot, left, "%s\t", str);
277 boot_instance_index += ret;
278
279 return 1;
280 }
281 __setup("trace_instance=", boot_instance);
282
283
284 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
285
set_trace_boot_options(char * str)286 static int __init set_trace_boot_options(char *str)
287 {
288 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
289 return 1;
290 }
291 __setup("trace_options=", set_trace_boot_options);
292
293 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
294 static char *trace_boot_clock __initdata;
295
set_trace_boot_clock(char * str)296 static int __init set_trace_boot_clock(char *str)
297 {
298 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
299 trace_boot_clock = trace_boot_clock_buf;
300 return 1;
301 }
302 __setup("trace_clock=", set_trace_boot_clock);
303
set_tracepoint_printk(char * str)304 static int __init set_tracepoint_printk(char *str)
305 {
306 /* Ignore the "tp_printk_stop_on_boot" param */
307 if (*str == '_')
308 return 0;
309
310 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
311 tracepoint_printk = 1;
312 return 1;
313 }
314 __setup("tp_printk", set_tracepoint_printk);
315
set_tracepoint_printk_stop(char * str)316 static int __init set_tracepoint_printk_stop(char *str)
317 {
318 tracepoint_printk_stop_on_boot = true;
319 return 1;
320 }
321 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
322
ns2usecs(u64 nsec)323 unsigned long long ns2usecs(u64 nsec)
324 {
325 nsec += 500;
326 do_div(nsec, 1000);
327 return nsec;
328 }
329
330 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)331 trace_process_export(struct trace_export *export,
332 struct ring_buffer_event *event, int flag)
333 {
334 struct trace_entry *entry;
335 unsigned int size = 0;
336
337 if (export->flags & flag) {
338 entry = ring_buffer_event_data(event);
339 size = ring_buffer_event_length(event);
340 export->write(export, entry, size);
341 }
342 }
343
344 static DEFINE_MUTEX(ftrace_export_lock);
345
346 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
347
348 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
350 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
351
ftrace_exports_enable(struct trace_export * export)352 static inline void ftrace_exports_enable(struct trace_export *export)
353 {
354 if (export->flags & TRACE_EXPORT_FUNCTION)
355 static_branch_inc(&trace_function_exports_enabled);
356
357 if (export->flags & TRACE_EXPORT_EVENT)
358 static_branch_inc(&trace_event_exports_enabled);
359
360 if (export->flags & TRACE_EXPORT_MARKER)
361 static_branch_inc(&trace_marker_exports_enabled);
362 }
363
ftrace_exports_disable(struct trace_export * export)364 static inline void ftrace_exports_disable(struct trace_export *export)
365 {
366 if (export->flags & TRACE_EXPORT_FUNCTION)
367 static_branch_dec(&trace_function_exports_enabled);
368
369 if (export->flags & TRACE_EXPORT_EVENT)
370 static_branch_dec(&trace_event_exports_enabled);
371
372 if (export->flags & TRACE_EXPORT_MARKER)
373 static_branch_dec(&trace_marker_exports_enabled);
374 }
375
ftrace_exports(struct ring_buffer_event * event,int flag)376 static void ftrace_exports(struct ring_buffer_event *event, int flag)
377 {
378 struct trace_export *export;
379
380 preempt_disable_notrace();
381
382 export = rcu_dereference_raw_check(ftrace_exports_list);
383 while (export) {
384 trace_process_export(export, event, flag);
385 export = rcu_dereference_raw_check(export->next);
386 }
387
388 preempt_enable_notrace();
389 }
390
391 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)392 add_trace_export(struct trace_export **list, struct trace_export *export)
393 {
394 rcu_assign_pointer(export->next, *list);
395 /*
396 * We are entering export into the list but another
397 * CPU might be walking that list. We need to make sure
398 * the export->next pointer is valid before another CPU sees
399 * the export pointer included into the list.
400 */
401 rcu_assign_pointer(*list, export);
402 }
403
404 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)405 rm_trace_export(struct trace_export **list, struct trace_export *export)
406 {
407 struct trace_export **p;
408
409 for (p = list; *p != NULL; p = &(*p)->next)
410 if (*p == export)
411 break;
412
413 if (*p != export)
414 return -1;
415
416 rcu_assign_pointer(*p, (*p)->next);
417
418 return 0;
419 }
420
421 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)422 add_ftrace_export(struct trace_export **list, struct trace_export *export)
423 {
424 ftrace_exports_enable(export);
425
426 add_trace_export(list, export);
427 }
428
429 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)430 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
431 {
432 int ret;
433
434 ret = rm_trace_export(list, export);
435 ftrace_exports_disable(export);
436
437 return ret;
438 }
439
register_ftrace_export(struct trace_export * export)440 int register_ftrace_export(struct trace_export *export)
441 {
442 if (WARN_ON_ONCE(!export->write))
443 return -1;
444
445 mutex_lock(&ftrace_export_lock);
446
447 add_ftrace_export(&ftrace_exports_list, export);
448
449 mutex_unlock(&ftrace_export_lock);
450
451 return 0;
452 }
453 EXPORT_SYMBOL_GPL(register_ftrace_export);
454
unregister_ftrace_export(struct trace_export * export)455 int unregister_ftrace_export(struct trace_export *export)
456 {
457 int ret;
458
459 mutex_lock(&ftrace_export_lock);
460
461 ret = rm_ftrace_export(&ftrace_exports_list, export);
462
463 mutex_unlock(&ftrace_export_lock);
464
465 return ret;
466 }
467 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
468
469 /* trace_flags holds trace_options default values */
470 #define TRACE_DEFAULT_FLAGS \
471 (FUNCTION_DEFAULT_FLAGS | \
472 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
473 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
474 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
475 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
476 TRACE_ITER_HASH_PTR)
477
478 /* trace_options that are only supported by global_trace */
479 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
480 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
481
482 /* trace_flags that are default zero for instances */
483 #define ZEROED_TRACE_FLAGS \
484 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
485
486 /*
487 * The global_trace is the descriptor that holds the top-level tracing
488 * buffers for the live tracing.
489 */
490 static struct trace_array global_trace = {
491 .trace_flags = TRACE_DEFAULT_FLAGS,
492 };
493
494 LIST_HEAD(ftrace_trace_arrays);
495
trace_array_get(struct trace_array * this_tr)496 int trace_array_get(struct trace_array *this_tr)
497 {
498 struct trace_array *tr;
499 int ret = -ENODEV;
500
501 mutex_lock(&trace_types_lock);
502 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503 if (tr == this_tr) {
504 tr->ref++;
505 ret = 0;
506 break;
507 }
508 }
509 mutex_unlock(&trace_types_lock);
510
511 return ret;
512 }
513
__trace_array_put(struct trace_array * this_tr)514 static void __trace_array_put(struct trace_array *this_tr)
515 {
516 WARN_ON(!this_tr->ref);
517 this_tr->ref--;
518 }
519
520 /**
521 * trace_array_put - Decrement the reference counter for this trace array.
522 * @this_tr : pointer to the trace array
523 *
524 * NOTE: Use this when we no longer need the trace array returned by
525 * trace_array_get_by_name(). This ensures the trace array can be later
526 * destroyed.
527 *
528 */
trace_array_put(struct trace_array * this_tr)529 void trace_array_put(struct trace_array *this_tr)
530 {
531 if (!this_tr)
532 return;
533
534 mutex_lock(&trace_types_lock);
535 __trace_array_put(this_tr);
536 mutex_unlock(&trace_types_lock);
537 }
538 EXPORT_SYMBOL_GPL(trace_array_put);
539
tracing_check_open_get_tr(struct trace_array * tr)540 int tracing_check_open_get_tr(struct trace_array *tr)
541 {
542 int ret;
543
544 ret = security_locked_down(LOCKDOWN_TRACEFS);
545 if (ret)
546 return ret;
547
548 if (tracing_disabled)
549 return -ENODEV;
550
551 if (tr && trace_array_get(tr) < 0)
552 return -ENODEV;
553
554 return 0;
555 }
556
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)557 int call_filter_check_discard(struct trace_event_call *call, void *rec,
558 struct trace_buffer *buffer,
559 struct ring_buffer_event *event)
560 {
561 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562 !filter_match_preds(call->filter, rec)) {
563 __trace_event_discard_commit(buffer, event);
564 return 1;
565 }
566
567 return 0;
568 }
569
570 /**
571 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572 * @filtered_pids: The list of pids to check
573 * @search_pid: The PID to find in @filtered_pids
574 *
575 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576 */
577 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 {
580 return trace_pid_list_is_set(filtered_pids, search_pid);
581 }
582
583 /**
584 * trace_ignore_this_task - should a task be ignored for tracing
585 * @filtered_pids: The list of pids to check
586 * @filtered_no_pids: The list of pids not to be traced
587 * @task: The task that should be ignored if not filtered
588 *
589 * Checks if @task should be traced or not from @filtered_pids.
590 * Returns true if @task should *NOT* be traced.
591 * Returns false if @task should be traced.
592 */
593 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)594 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595 struct trace_pid_list *filtered_no_pids,
596 struct task_struct *task)
597 {
598 /*
599 * If filtered_no_pids is not empty, and the task's pid is listed
600 * in filtered_no_pids, then return true.
601 * Otherwise, if filtered_pids is empty, that means we can
602 * trace all tasks. If it has content, then only trace pids
603 * within filtered_pids.
604 */
605
606 return (filtered_pids &&
607 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
608 (filtered_no_pids &&
609 trace_find_filtered_pid(filtered_no_pids, task->pid));
610 }
611
612 /**
613 * trace_filter_add_remove_task - Add or remove a task from a pid_list
614 * @pid_list: The list to modify
615 * @self: The current task for fork or NULL for exit
616 * @task: The task to add or remove
617 *
618 * If adding a task, if @self is defined, the task is only added if @self
619 * is also included in @pid_list. This happens on fork and tasks should
620 * only be added when the parent is listed. If @self is NULL, then the
621 * @task pid will be removed from the list, which would happen on exit
622 * of a task.
623 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625 struct task_struct *self,
626 struct task_struct *task)
627 {
628 if (!pid_list)
629 return;
630
631 /* For forks, we only add if the forking task is listed */
632 if (self) {
633 if (!trace_find_filtered_pid(pid_list, self->pid))
634 return;
635 }
636
637 /* "self" is set for forks, and NULL for exits */
638 if (self)
639 trace_pid_list_set(pid_list, task->pid);
640 else
641 trace_pid_list_clear(pid_list, task->pid);
642 }
643
644 /**
645 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646 * @pid_list: The pid list to show
647 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648 * @pos: The position of the file
649 *
650 * This is used by the seq_file "next" operation to iterate the pids
651 * listed in a trace_pid_list structure.
652 *
653 * Returns the pid+1 as we want to display pid of zero, but NULL would
654 * stop the iteration.
655 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 {
658 long pid = (unsigned long)v;
659 unsigned int next;
660
661 (*pos)++;
662
663 /* pid already is +1 of the actual previous bit */
664 if (trace_pid_list_next(pid_list, pid, &next) < 0)
665 return NULL;
666
667 pid = next;
668
669 /* Return pid + 1 to allow zero to be represented */
670 return (void *)(pid + 1);
671 }
672
673 /**
674 * trace_pid_start - Used for seq_file to start reading pid lists
675 * @pid_list: The pid list to show
676 * @pos: The position of the file
677 *
678 * This is used by seq_file "start" operation to start the iteration
679 * of listing pids.
680 *
681 * Returns the pid+1 as we want to display pid of zero, but NULL would
682 * stop the iteration.
683 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685 {
686 unsigned long pid;
687 unsigned int first;
688 loff_t l = 0;
689
690 if (trace_pid_list_first(pid_list, &first) < 0)
691 return NULL;
692
693 pid = first;
694
695 /* Return pid + 1 so that zero can be the exit value */
696 for (pid++; pid && l < *pos;
697 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698 ;
699 return (void *)pid;
700 }
701
702 /**
703 * trace_pid_show - show the current pid in seq_file processing
704 * @m: The seq_file structure to write into
705 * @v: A void pointer of the pid (+1) value to display
706 *
707 * Can be directly used by seq_file operations to display the current
708 * pid value.
709 */
trace_pid_show(struct seq_file * m,void * v)710 int trace_pid_show(struct seq_file *m, void *v)
711 {
712 unsigned long pid = (unsigned long)v - 1;
713
714 seq_printf(m, "%lu\n", pid);
715 return 0;
716 }
717
718 /* 128 should be much more than enough */
719 #define PID_BUF_SIZE 127
720
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)721 int trace_pid_write(struct trace_pid_list *filtered_pids,
722 struct trace_pid_list **new_pid_list,
723 const char __user *ubuf, size_t cnt)
724 {
725 struct trace_pid_list *pid_list;
726 struct trace_parser parser;
727 unsigned long val;
728 int nr_pids = 0;
729 ssize_t read = 0;
730 ssize_t ret;
731 loff_t pos;
732 pid_t pid;
733
734 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735 return -ENOMEM;
736
737 /*
738 * Always recreate a new array. The write is an all or nothing
739 * operation. Always create a new array when adding new pids by
740 * the user. If the operation fails, then the current list is
741 * not modified.
742 */
743 pid_list = trace_pid_list_alloc();
744 if (!pid_list) {
745 trace_parser_put(&parser);
746 return -ENOMEM;
747 }
748
749 if (filtered_pids) {
750 /* copy the current bits to the new max */
751 ret = trace_pid_list_first(filtered_pids, &pid);
752 while (!ret) {
753 trace_pid_list_set(pid_list, pid);
754 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755 nr_pids++;
756 }
757 }
758
759 ret = 0;
760 while (cnt > 0) {
761
762 pos = 0;
763
764 ret = trace_get_user(&parser, ubuf, cnt, &pos);
765 if (ret < 0)
766 break;
767
768 read += ret;
769 ubuf += ret;
770 cnt -= ret;
771
772 if (!trace_parser_loaded(&parser))
773 break;
774
775 ret = -EINVAL;
776 if (kstrtoul(parser.buffer, 0, &val))
777 break;
778
779 pid = (pid_t)val;
780
781 if (trace_pid_list_set(pid_list, pid) < 0) {
782 ret = -1;
783 break;
784 }
785 nr_pids++;
786
787 trace_parser_clear(&parser);
788 ret = 0;
789 }
790 trace_parser_put(&parser);
791
792 if (ret < 0) {
793 trace_pid_list_free(pid_list);
794 return ret;
795 }
796
797 if (!nr_pids) {
798 /* Cleared the list of pids */
799 trace_pid_list_free(pid_list);
800 pid_list = NULL;
801 }
802
803 *new_pid_list = pid_list;
804
805 return read;
806 }
807
buffer_ftrace_now(struct array_buffer * buf,int cpu)808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809 {
810 u64 ts;
811
812 /* Early boot up does not have a buffer yet */
813 if (!buf->buffer)
814 return trace_clock_local();
815
816 ts = ring_buffer_time_stamp(buf->buffer);
817 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818
819 return ts;
820 }
821
ftrace_now(int cpu)822 u64 ftrace_now(int cpu)
823 {
824 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825 }
826
827 /**
828 * tracing_is_enabled - Show if global_trace has been enabled
829 *
830 * Shows if the global trace has been enabled or not. It uses the
831 * mirror flag "buffer_disabled" to be used in fast paths such as for
832 * the irqsoff tracer. But it may be inaccurate due to races. If you
833 * need to know the accurate state, use tracing_is_on() which is a little
834 * slower, but accurate.
835 */
tracing_is_enabled(void)836 int tracing_is_enabled(void)
837 {
838 /*
839 * For quick access (irqsoff uses this in fast path), just
840 * return the mirror variable of the state of the ring buffer.
841 * It's a little racy, but we don't really care.
842 */
843 smp_rmb();
844 return !global_trace.buffer_disabled;
845 }
846
847 /*
848 * trace_buf_size is the size in bytes that is allocated
849 * for a buffer. Note, the number of bytes is always rounded
850 * to page size.
851 *
852 * This number is purposely set to a low number of 16384.
853 * If the dump on oops happens, it will be much appreciated
854 * to not have to wait for all that output. Anyway this can be
855 * boot time and run time configurable.
856 */
857 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
858
859 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860
861 /* trace_types holds a link list of available tracers. */
862 static struct tracer *trace_types __read_mostly;
863
864 /*
865 * trace_types_lock is used to protect the trace_types list.
866 */
867 DEFINE_MUTEX(trace_types_lock);
868
869 /*
870 * serialize the access of the ring buffer
871 *
872 * ring buffer serializes readers, but it is low level protection.
873 * The validity of the events (which returns by ring_buffer_peek() ..etc)
874 * are not protected by ring buffer.
875 *
876 * The content of events may become garbage if we allow other process consumes
877 * these events concurrently:
878 * A) the page of the consumed events may become a normal page
879 * (not reader page) in ring buffer, and this page will be rewritten
880 * by events producer.
881 * B) The page of the consumed events may become a page for splice_read,
882 * and this page will be returned to system.
883 *
884 * These primitives allow multi process access to different cpu ring buffer
885 * concurrently.
886 *
887 * These primitives don't distinguish read-only and read-consume access.
888 * Multi read-only access are also serialized.
889 */
890
891 #ifdef CONFIG_SMP
892 static DECLARE_RWSEM(all_cpu_access_lock);
893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894
trace_access_lock(int cpu)895 static inline void trace_access_lock(int cpu)
896 {
897 if (cpu == RING_BUFFER_ALL_CPUS) {
898 /* gain it for accessing the whole ring buffer. */
899 down_write(&all_cpu_access_lock);
900 } else {
901 /* gain it for accessing a cpu ring buffer. */
902
903 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904 down_read(&all_cpu_access_lock);
905
906 /* Secondly block other access to this @cpu ring buffer. */
907 mutex_lock(&per_cpu(cpu_access_lock, cpu));
908 }
909 }
910
trace_access_unlock(int cpu)911 static inline void trace_access_unlock(int cpu)
912 {
913 if (cpu == RING_BUFFER_ALL_CPUS) {
914 up_write(&all_cpu_access_lock);
915 } else {
916 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917 up_read(&all_cpu_access_lock);
918 }
919 }
920
trace_access_lock_init(void)921 static inline void trace_access_lock_init(void)
922 {
923 int cpu;
924
925 for_each_possible_cpu(cpu)
926 mutex_init(&per_cpu(cpu_access_lock, cpu));
927 }
928
929 #else
930
931 static DEFINE_MUTEX(access_lock);
932
trace_access_lock(int cpu)933 static inline void trace_access_lock(int cpu)
934 {
935 (void)cpu;
936 mutex_lock(&access_lock);
937 }
938
trace_access_unlock(int cpu)939 static inline void trace_access_unlock(int cpu)
940 {
941 (void)cpu;
942 mutex_unlock(&access_lock);
943 }
944
trace_access_lock_init(void)945 static inline void trace_access_lock_init(void)
946 {
947 }
948
949 #endif
950
951 #ifdef CONFIG_STACKTRACE
952 static void __ftrace_trace_stack(struct trace_buffer *buffer,
953 unsigned int trace_ctx,
954 int skip, struct pt_regs *regs);
955 static inline void ftrace_trace_stack(struct trace_array *tr,
956 struct trace_buffer *buffer,
957 unsigned int trace_ctx,
958 int skip, struct pt_regs *regs);
959
960 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962 unsigned int trace_ctx,
963 int skip, struct pt_regs *regs)
964 {
965 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)966 static inline void ftrace_trace_stack(struct trace_array *tr,
967 struct trace_buffer *buffer,
968 unsigned long trace_ctx,
969 int skip, struct pt_regs *regs)
970 {
971 }
972
973 #endif
974
975 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)976 trace_event_setup(struct ring_buffer_event *event,
977 int type, unsigned int trace_ctx)
978 {
979 struct trace_entry *ent = ring_buffer_event_data(event);
980
981 tracing_generic_entry_update(ent, type, trace_ctx);
982 }
983
984 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)985 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986 int type,
987 unsigned long len,
988 unsigned int trace_ctx)
989 {
990 struct ring_buffer_event *event;
991
992 event = ring_buffer_lock_reserve(buffer, len);
993 if (event != NULL)
994 trace_event_setup(event, type, trace_ctx);
995
996 return event;
997 }
998
tracer_tracing_on(struct trace_array * tr)999 void tracer_tracing_on(struct trace_array *tr)
1000 {
1001 if (tr->array_buffer.buffer)
1002 ring_buffer_record_on(tr->array_buffer.buffer);
1003 /*
1004 * This flag is looked at when buffers haven't been allocated
1005 * yet, or by some tracers (like irqsoff), that just want to
1006 * know if the ring buffer has been disabled, but it can handle
1007 * races of where it gets disabled but we still do a record.
1008 * As the check is in the fast path of the tracers, it is more
1009 * important to be fast than accurate.
1010 */
1011 tr->buffer_disabled = 0;
1012 /* Make the flag seen by readers */
1013 smp_wmb();
1014 }
1015
1016 /**
1017 * tracing_on - enable tracing buffers
1018 *
1019 * This function enables tracing buffers that may have been
1020 * disabled with tracing_off.
1021 */
tracing_on(void)1022 void tracing_on(void)
1023 {
1024 tracer_tracing_on(&global_trace);
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_on);
1027
1028
1029 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 {
1032 __this_cpu_write(trace_taskinfo_save, true);
1033
1034 /* If this is the temp buffer, we need to commit fully */
1035 if (this_cpu_read(trace_buffered_event) == event) {
1036 /* Length is in event->array[0] */
1037 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038 /* Release the temp buffer */
1039 this_cpu_dec(trace_buffered_event_cnt);
1040 /* ring_buffer_unlock_commit() enables preemption */
1041 preempt_enable_notrace();
1042 } else
1043 ring_buffer_unlock_commit(buffer);
1044 }
1045
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047 const char *str, int size)
1048 {
1049 struct ring_buffer_event *event;
1050 struct trace_buffer *buffer;
1051 struct print_entry *entry;
1052 unsigned int trace_ctx;
1053 int alloc;
1054
1055 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056 return 0;
1057
1058 if (unlikely(tracing_selftest_running && tr == &global_trace))
1059 return 0;
1060
1061 if (unlikely(tracing_disabled))
1062 return 0;
1063
1064 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065
1066 trace_ctx = tracing_gen_ctx();
1067 buffer = tr->array_buffer.buffer;
1068 ring_buffer_nest_start(buffer);
1069 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070 trace_ctx);
1071 if (!event) {
1072 size = 0;
1073 goto out;
1074 }
1075
1076 entry = ring_buffer_event_data(event);
1077 entry->ip = ip;
1078
1079 memcpy(&entry->buf, str, size);
1080
1081 /* Add a newline if necessary */
1082 if (entry->buf[size - 1] != '\n') {
1083 entry->buf[size] = '\n';
1084 entry->buf[size + 1] = '\0';
1085 } else
1086 entry->buf[size] = '\0';
1087
1088 __buffer_unlock_commit(buffer, event);
1089 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090 out:
1091 ring_buffer_nest_end(buffer);
1092 return size;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_array_puts);
1095
1096 /**
1097 * __trace_puts - write a constant string into the trace buffer.
1098 * @ip: The address of the caller
1099 * @str: The constant string to write
1100 * @size: The size of the string.
1101 */
__trace_puts(unsigned long ip,const char * str,int size)1102 int __trace_puts(unsigned long ip, const char *str, int size)
1103 {
1104 return __trace_array_puts(&global_trace, ip, str, size);
1105 }
1106 EXPORT_SYMBOL_GPL(__trace_puts);
1107
1108 /**
1109 * __trace_bputs - write the pointer to a constant string into trace buffer
1110 * @ip: The address of the caller
1111 * @str: The constant string to write to the buffer to
1112 */
__trace_bputs(unsigned long ip,const char * str)1113 int __trace_bputs(unsigned long ip, const char *str)
1114 {
1115 struct ring_buffer_event *event;
1116 struct trace_buffer *buffer;
1117 struct bputs_entry *entry;
1118 unsigned int trace_ctx;
1119 int size = sizeof(struct bputs_entry);
1120 int ret = 0;
1121
1122 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123 return 0;
1124
1125 if (unlikely(tracing_selftest_running || tracing_disabled))
1126 return 0;
1127
1128 trace_ctx = tracing_gen_ctx();
1129 buffer = global_trace.array_buffer.buffer;
1130
1131 ring_buffer_nest_start(buffer);
1132 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133 trace_ctx);
1134 if (!event)
1135 goto out;
1136
1137 entry = ring_buffer_event_data(event);
1138 entry->ip = ip;
1139 entry->str = str;
1140
1141 __buffer_unlock_commit(buffer, event);
1142 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143
1144 ret = 1;
1145 out:
1146 ring_buffer_nest_end(buffer);
1147 return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_bputs);
1150
1151 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1152 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153 void *cond_data)
1154 {
1155 struct tracer *tracer = tr->current_trace;
1156 unsigned long flags;
1157
1158 if (in_nmi()) {
1159 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1161 return;
1162 }
1163
1164 if (!tr->allocated_snapshot) {
1165 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166 trace_array_puts(tr, "*** stopping trace here! ***\n");
1167 tracer_tracing_off(tr);
1168 return;
1169 }
1170
1171 /* Note, snapshot can not be used when the tracer uses it */
1172 if (tracer->use_max_tr) {
1173 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175 return;
1176 }
1177
1178 local_irq_save(flags);
1179 update_max_tr(tr, current, smp_processor_id(), cond_data);
1180 local_irq_restore(flags);
1181 }
1182
tracing_snapshot_instance(struct trace_array * tr)1183 void tracing_snapshot_instance(struct trace_array *tr)
1184 {
1185 tracing_snapshot_instance_cond(tr, NULL);
1186 }
1187
1188 /**
1189 * tracing_snapshot - take a snapshot of the current buffer.
1190 *
1191 * This causes a swap between the snapshot buffer and the current live
1192 * tracing buffer. You can use this to take snapshots of the live
1193 * trace when some condition is triggered, but continue to trace.
1194 *
1195 * Note, make sure to allocate the snapshot with either
1196 * a tracing_snapshot_alloc(), or by doing it manually
1197 * with: echo 1 > /sys/kernel/tracing/snapshot
1198 *
1199 * If the snapshot buffer is not allocated, it will stop tracing.
1200 * Basically making a permanent snapshot.
1201 */
tracing_snapshot(void)1202 void tracing_snapshot(void)
1203 {
1204 struct trace_array *tr = &global_trace;
1205
1206 tracing_snapshot_instance(tr);
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot);
1209
1210 /**
1211 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212 * @tr: The tracing instance to snapshot
1213 * @cond_data: The data to be tested conditionally, and possibly saved
1214 *
1215 * This is the same as tracing_snapshot() except that the snapshot is
1216 * conditional - the snapshot will only happen if the
1217 * cond_snapshot.update() implementation receiving the cond_data
1218 * returns true, which means that the trace array's cond_snapshot
1219 * update() operation used the cond_data to determine whether the
1220 * snapshot should be taken, and if it was, presumably saved it along
1221 * with the snapshot.
1222 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224 {
1225 tracing_snapshot_instance_cond(tr, cond_data);
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228
1229 /**
1230 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231 * @tr: The tracing instance
1232 *
1233 * When the user enables a conditional snapshot using
1234 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235 * with the snapshot. This accessor is used to retrieve it.
1236 *
1237 * Should not be called from cond_snapshot.update(), since it takes
1238 * the tr->max_lock lock, which the code calling
1239 * cond_snapshot.update() has already done.
1240 *
1241 * Returns the cond_data associated with the trace array's snapshot.
1242 */
tracing_cond_snapshot_data(struct trace_array * tr)1243 void *tracing_cond_snapshot_data(struct trace_array *tr)
1244 {
1245 void *cond_data = NULL;
1246
1247 local_irq_disable();
1248 arch_spin_lock(&tr->max_lock);
1249
1250 if (tr->cond_snapshot)
1251 cond_data = tr->cond_snapshot->cond_data;
1252
1253 arch_spin_unlock(&tr->max_lock);
1254 local_irq_enable();
1255
1256 return cond_data;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259
1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261 struct array_buffer *size_buf, int cpu_id);
1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263
tracing_alloc_snapshot_instance(struct trace_array * tr)1264 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265 {
1266 int ret;
1267
1268 if (!tr->allocated_snapshot) {
1269
1270 /* allocate spare buffer */
1271 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1272 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1273 if (ret < 0)
1274 return ret;
1275
1276 tr->allocated_snapshot = true;
1277 }
1278
1279 return 0;
1280 }
1281
free_snapshot(struct trace_array * tr)1282 static void free_snapshot(struct trace_array *tr)
1283 {
1284 /*
1285 * We don't free the ring buffer. instead, resize it because
1286 * The max_tr ring buffer has some state (e.g. ring->clock) and
1287 * we want preserve it.
1288 */
1289 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1290 set_buffer_entries(&tr->max_buffer, 1);
1291 tracing_reset_online_cpus(&tr->max_buffer);
1292 tr->allocated_snapshot = false;
1293 }
1294
1295 /**
1296 * tracing_alloc_snapshot - allocate snapshot buffer.
1297 *
1298 * This only allocates the snapshot buffer if it isn't already
1299 * allocated - it doesn't also take a snapshot.
1300 *
1301 * This is meant to be used in cases where the snapshot buffer needs
1302 * to be set up for events that can't sleep but need to be able to
1303 * trigger a snapshot.
1304 */
tracing_alloc_snapshot(void)1305 int tracing_alloc_snapshot(void)
1306 {
1307 struct trace_array *tr = &global_trace;
1308 int ret;
1309
1310 ret = tracing_alloc_snapshot_instance(tr);
1311 WARN_ON(ret < 0);
1312
1313 return ret;
1314 }
1315 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1316
1317 /**
1318 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1319 *
1320 * This is similar to tracing_snapshot(), but it will allocate the
1321 * snapshot buffer if it isn't already allocated. Use this only
1322 * where it is safe to sleep, as the allocation may sleep.
1323 *
1324 * This causes a swap between the snapshot buffer and the current live
1325 * tracing buffer. You can use this to take snapshots of the live
1326 * trace when some condition is triggered, but continue to trace.
1327 */
tracing_snapshot_alloc(void)1328 void tracing_snapshot_alloc(void)
1329 {
1330 int ret;
1331
1332 ret = tracing_alloc_snapshot();
1333 if (ret < 0)
1334 return;
1335
1336 tracing_snapshot();
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1339
1340 /**
1341 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1342 * @tr: The tracing instance
1343 * @cond_data: User data to associate with the snapshot
1344 * @update: Implementation of the cond_snapshot update function
1345 *
1346 * Check whether the conditional snapshot for the given instance has
1347 * already been enabled, or if the current tracer is already using a
1348 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1349 * save the cond_data and update function inside.
1350 *
1351 * Returns 0 if successful, error otherwise.
1352 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1353 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1354 cond_update_fn_t update)
1355 {
1356 struct cond_snapshot *cond_snapshot;
1357 int ret = 0;
1358
1359 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1360 if (!cond_snapshot)
1361 return -ENOMEM;
1362
1363 cond_snapshot->cond_data = cond_data;
1364 cond_snapshot->update = update;
1365
1366 mutex_lock(&trace_types_lock);
1367
1368 ret = tracing_alloc_snapshot_instance(tr);
1369 if (ret)
1370 goto fail_unlock;
1371
1372 if (tr->current_trace->use_max_tr) {
1373 ret = -EBUSY;
1374 goto fail_unlock;
1375 }
1376
1377 /*
1378 * The cond_snapshot can only change to NULL without the
1379 * trace_types_lock. We don't care if we race with it going
1380 * to NULL, but we want to make sure that it's not set to
1381 * something other than NULL when we get here, which we can
1382 * do safely with only holding the trace_types_lock and not
1383 * having to take the max_lock.
1384 */
1385 if (tr->cond_snapshot) {
1386 ret = -EBUSY;
1387 goto fail_unlock;
1388 }
1389
1390 local_irq_disable();
1391 arch_spin_lock(&tr->max_lock);
1392 tr->cond_snapshot = cond_snapshot;
1393 arch_spin_unlock(&tr->max_lock);
1394 local_irq_enable();
1395
1396 mutex_unlock(&trace_types_lock);
1397
1398 return ret;
1399
1400 fail_unlock:
1401 mutex_unlock(&trace_types_lock);
1402 kfree(cond_snapshot);
1403 return ret;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1406
1407 /**
1408 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1409 * @tr: The tracing instance
1410 *
1411 * Check whether the conditional snapshot for the given instance is
1412 * enabled; if so, free the cond_snapshot associated with it,
1413 * otherwise return -EINVAL.
1414 *
1415 * Returns 0 if successful, error otherwise.
1416 */
tracing_snapshot_cond_disable(struct trace_array * tr)1417 int tracing_snapshot_cond_disable(struct trace_array *tr)
1418 {
1419 int ret = 0;
1420
1421 local_irq_disable();
1422 arch_spin_lock(&tr->max_lock);
1423
1424 if (!tr->cond_snapshot)
1425 ret = -EINVAL;
1426 else {
1427 kfree(tr->cond_snapshot);
1428 tr->cond_snapshot = NULL;
1429 }
1430
1431 arch_spin_unlock(&tr->max_lock);
1432 local_irq_enable();
1433
1434 return ret;
1435 }
1436 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1437 #else
tracing_snapshot(void)1438 void tracing_snapshot(void)
1439 {
1440 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1441 }
1442 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1443 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1444 {
1445 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1448 int tracing_alloc_snapshot(void)
1449 {
1450 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1451 return -ENODEV;
1452 }
1453 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1454 void tracing_snapshot_alloc(void)
1455 {
1456 /* Give warning */
1457 tracing_snapshot();
1458 }
1459 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1460 void *tracing_cond_snapshot_data(struct trace_array *tr)
1461 {
1462 return NULL;
1463 }
1464 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1465 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1466 {
1467 return -ENODEV;
1468 }
1469 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1470 int tracing_snapshot_cond_disable(struct trace_array *tr)
1471 {
1472 return false;
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1475 #define free_snapshot(tr) do { } while (0)
1476 #endif /* CONFIG_TRACER_SNAPSHOT */
1477
tracer_tracing_off(struct trace_array * tr)1478 void tracer_tracing_off(struct trace_array *tr)
1479 {
1480 if (tr->array_buffer.buffer)
1481 ring_buffer_record_off(tr->array_buffer.buffer);
1482 /*
1483 * This flag is looked at when buffers haven't been allocated
1484 * yet, or by some tracers (like irqsoff), that just want to
1485 * know if the ring buffer has been disabled, but it can handle
1486 * races of where it gets disabled but we still do a record.
1487 * As the check is in the fast path of the tracers, it is more
1488 * important to be fast than accurate.
1489 */
1490 tr->buffer_disabled = 1;
1491 /* Make the flag seen by readers */
1492 smp_wmb();
1493 }
1494
1495 /**
1496 * tracing_off - turn off tracing buffers
1497 *
1498 * This function stops the tracing buffers from recording data.
1499 * It does not disable any overhead the tracers themselves may
1500 * be causing. This function simply causes all recording to
1501 * the ring buffers to fail.
1502 */
tracing_off(void)1503 void tracing_off(void)
1504 {
1505 tracer_tracing_off(&global_trace);
1506 }
1507 EXPORT_SYMBOL_GPL(tracing_off);
1508
disable_trace_on_warning(void)1509 void disable_trace_on_warning(void)
1510 {
1511 if (__disable_trace_on_warning) {
1512 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1513 "Disabling tracing due to warning\n");
1514 tracing_off();
1515 }
1516 }
1517
1518 /**
1519 * tracer_tracing_is_on - show real state of ring buffer enabled
1520 * @tr : the trace array to know if ring buffer is enabled
1521 *
1522 * Shows real state of the ring buffer if it is enabled or not.
1523 */
tracer_tracing_is_on(struct trace_array * tr)1524 bool tracer_tracing_is_on(struct trace_array *tr)
1525 {
1526 if (tr->array_buffer.buffer)
1527 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1528 return !tr->buffer_disabled;
1529 }
1530
1531 /**
1532 * tracing_is_on - show state of ring buffers enabled
1533 */
tracing_is_on(void)1534 int tracing_is_on(void)
1535 {
1536 return tracer_tracing_is_on(&global_trace);
1537 }
1538 EXPORT_SYMBOL_GPL(tracing_is_on);
1539
set_buf_size(char * str)1540 static int __init set_buf_size(char *str)
1541 {
1542 unsigned long buf_size;
1543
1544 if (!str)
1545 return 0;
1546 buf_size = memparse(str, &str);
1547 /*
1548 * nr_entries can not be zero and the startup
1549 * tests require some buffer space. Therefore
1550 * ensure we have at least 4096 bytes of buffer.
1551 */
1552 trace_buf_size = max(4096UL, buf_size);
1553 return 1;
1554 }
1555 __setup("trace_buf_size=", set_buf_size);
1556
set_tracing_thresh(char * str)1557 static int __init set_tracing_thresh(char *str)
1558 {
1559 unsigned long threshold;
1560 int ret;
1561
1562 if (!str)
1563 return 0;
1564 ret = kstrtoul(str, 0, &threshold);
1565 if (ret < 0)
1566 return 0;
1567 tracing_thresh = threshold * 1000;
1568 return 1;
1569 }
1570 __setup("tracing_thresh=", set_tracing_thresh);
1571
nsecs_to_usecs(unsigned long nsecs)1572 unsigned long nsecs_to_usecs(unsigned long nsecs)
1573 {
1574 return nsecs / 1000;
1575 }
1576
1577 /*
1578 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1579 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1580 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1581 * of strings in the order that the evals (enum) were defined.
1582 */
1583 #undef C
1584 #define C(a, b) b
1585
1586 /* These must match the bit positions in trace_iterator_flags */
1587 static const char *trace_options[] = {
1588 TRACE_FLAGS
1589 NULL
1590 };
1591
1592 static struct {
1593 u64 (*func)(void);
1594 const char *name;
1595 int in_ns; /* is this clock in nanoseconds? */
1596 } trace_clocks[] = {
1597 { trace_clock_local, "local", 1 },
1598 { trace_clock_global, "global", 1 },
1599 { trace_clock_counter, "counter", 0 },
1600 { trace_clock_jiffies, "uptime", 0 },
1601 { trace_clock, "perf", 1 },
1602 { ktime_get_mono_fast_ns, "mono", 1 },
1603 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1604 { ktime_get_boot_fast_ns, "boot", 1 },
1605 { ktime_get_tai_fast_ns, "tai", 1 },
1606 ARCH_TRACE_CLOCKS
1607 };
1608
trace_clock_in_ns(struct trace_array * tr)1609 bool trace_clock_in_ns(struct trace_array *tr)
1610 {
1611 if (trace_clocks[tr->clock_id].in_ns)
1612 return true;
1613
1614 return false;
1615 }
1616
1617 /*
1618 * trace_parser_get_init - gets the buffer for trace parser
1619 */
trace_parser_get_init(struct trace_parser * parser,int size)1620 int trace_parser_get_init(struct trace_parser *parser, int size)
1621 {
1622 memset(parser, 0, sizeof(*parser));
1623
1624 parser->buffer = kmalloc(size, GFP_KERNEL);
1625 if (!parser->buffer)
1626 return 1;
1627
1628 parser->size = size;
1629 return 0;
1630 }
1631
1632 /*
1633 * trace_parser_put - frees the buffer for trace parser
1634 */
trace_parser_put(struct trace_parser * parser)1635 void trace_parser_put(struct trace_parser *parser)
1636 {
1637 kfree(parser->buffer);
1638 parser->buffer = NULL;
1639 }
1640
1641 /*
1642 * trace_get_user - reads the user input string separated by space
1643 * (matched by isspace(ch))
1644 *
1645 * For each string found the 'struct trace_parser' is updated,
1646 * and the function returns.
1647 *
1648 * Returns number of bytes read.
1649 *
1650 * See kernel/trace/trace.h for 'struct trace_parser' details.
1651 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1652 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1653 size_t cnt, loff_t *ppos)
1654 {
1655 char ch;
1656 size_t read = 0;
1657 ssize_t ret;
1658
1659 if (!*ppos)
1660 trace_parser_clear(parser);
1661
1662 ret = get_user(ch, ubuf++);
1663 if (ret)
1664 goto out;
1665
1666 read++;
1667 cnt--;
1668
1669 /*
1670 * The parser is not finished with the last write,
1671 * continue reading the user input without skipping spaces.
1672 */
1673 if (!parser->cont) {
1674 /* skip white space */
1675 while (cnt && isspace(ch)) {
1676 ret = get_user(ch, ubuf++);
1677 if (ret)
1678 goto out;
1679 read++;
1680 cnt--;
1681 }
1682
1683 parser->idx = 0;
1684
1685 /* only spaces were written */
1686 if (isspace(ch) || !ch) {
1687 *ppos += read;
1688 ret = read;
1689 goto out;
1690 }
1691 }
1692
1693 /* read the non-space input */
1694 while (cnt && !isspace(ch) && ch) {
1695 if (parser->idx < parser->size - 1)
1696 parser->buffer[parser->idx++] = ch;
1697 else {
1698 ret = -EINVAL;
1699 goto out;
1700 }
1701 ret = get_user(ch, ubuf++);
1702 if (ret)
1703 goto out;
1704 read++;
1705 cnt--;
1706 }
1707
1708 /* We either got finished input or we have to wait for another call. */
1709 if (isspace(ch) || !ch) {
1710 parser->buffer[parser->idx] = 0;
1711 parser->cont = false;
1712 } else if (parser->idx < parser->size - 1) {
1713 parser->cont = true;
1714 parser->buffer[parser->idx++] = ch;
1715 /* Make sure the parsed string always terminates with '\0'. */
1716 parser->buffer[parser->idx] = 0;
1717 } else {
1718 ret = -EINVAL;
1719 goto out;
1720 }
1721
1722 *ppos += read;
1723 ret = read;
1724
1725 out:
1726 return ret;
1727 }
1728
1729 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1730 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1731 {
1732 int len;
1733
1734 if (trace_seq_used(s) <= s->seq.readpos)
1735 return -EBUSY;
1736
1737 len = trace_seq_used(s) - s->seq.readpos;
1738 if (cnt > len)
1739 cnt = len;
1740 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1741
1742 s->seq.readpos += cnt;
1743 return cnt;
1744 }
1745
1746 unsigned long __read_mostly tracing_thresh;
1747
1748 #ifdef CONFIG_TRACER_MAX_TRACE
1749 static const struct file_operations tracing_max_lat_fops;
1750
1751 #ifdef LATENCY_FS_NOTIFY
1752
1753 static struct workqueue_struct *fsnotify_wq;
1754
latency_fsnotify_workfn(struct work_struct * work)1755 static void latency_fsnotify_workfn(struct work_struct *work)
1756 {
1757 struct trace_array *tr = container_of(work, struct trace_array,
1758 fsnotify_work);
1759 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1760 }
1761
latency_fsnotify_workfn_irq(struct irq_work * iwork)1762 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1763 {
1764 struct trace_array *tr = container_of(iwork, struct trace_array,
1765 fsnotify_irqwork);
1766 queue_work(fsnotify_wq, &tr->fsnotify_work);
1767 }
1768
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1769 static void trace_create_maxlat_file(struct trace_array *tr,
1770 struct dentry *d_tracer)
1771 {
1772 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1773 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1774 tr->d_max_latency = trace_create_file("tracing_max_latency",
1775 TRACE_MODE_WRITE,
1776 d_tracer, tr,
1777 &tracing_max_lat_fops);
1778 }
1779
latency_fsnotify_init(void)1780 __init static int latency_fsnotify_init(void)
1781 {
1782 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1783 WQ_UNBOUND | WQ_HIGHPRI, 0);
1784 if (!fsnotify_wq) {
1785 pr_err("Unable to allocate tr_max_lat_wq\n");
1786 return -ENOMEM;
1787 }
1788 return 0;
1789 }
1790
1791 late_initcall_sync(latency_fsnotify_init);
1792
latency_fsnotify(struct trace_array * tr)1793 void latency_fsnotify(struct trace_array *tr)
1794 {
1795 if (!fsnotify_wq)
1796 return;
1797 /*
1798 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1799 * possible that we are called from __schedule() or do_idle(), which
1800 * could cause a deadlock.
1801 */
1802 irq_work_queue(&tr->fsnotify_irqwork);
1803 }
1804
1805 #else /* !LATENCY_FS_NOTIFY */
1806
1807 #define trace_create_maxlat_file(tr, d_tracer) \
1808 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1809 d_tracer, tr, &tracing_max_lat_fops)
1810
1811 #endif
1812
1813 /*
1814 * Copy the new maximum trace into the separate maximum-trace
1815 * structure. (this way the maximum trace is permanently saved,
1816 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1817 */
1818 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1819 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1820 {
1821 struct array_buffer *trace_buf = &tr->array_buffer;
1822 struct array_buffer *max_buf = &tr->max_buffer;
1823 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1824 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1825
1826 max_buf->cpu = cpu;
1827 max_buf->time_start = data->preempt_timestamp;
1828
1829 max_data->saved_latency = tr->max_latency;
1830 max_data->critical_start = data->critical_start;
1831 max_data->critical_end = data->critical_end;
1832
1833 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1834 max_data->pid = tsk->pid;
1835 /*
1836 * If tsk == current, then use current_uid(), as that does not use
1837 * RCU. The irq tracer can be called out of RCU scope.
1838 */
1839 if (tsk == current)
1840 max_data->uid = current_uid();
1841 else
1842 max_data->uid = task_uid(tsk);
1843
1844 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1845 max_data->policy = tsk->policy;
1846 max_data->rt_priority = tsk->rt_priority;
1847
1848 /* record this tasks comm */
1849 tracing_record_cmdline(tsk);
1850 latency_fsnotify(tr);
1851 }
1852
1853 /**
1854 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1855 * @tr: tracer
1856 * @tsk: the task with the latency
1857 * @cpu: The cpu that initiated the trace.
1858 * @cond_data: User data associated with a conditional snapshot
1859 *
1860 * Flip the buffers between the @tr and the max_tr and record information
1861 * about which task was the cause of this latency.
1862 */
1863 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1864 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1865 void *cond_data)
1866 {
1867 if (tr->stop_count)
1868 return;
1869
1870 WARN_ON_ONCE(!irqs_disabled());
1871
1872 if (!tr->allocated_snapshot) {
1873 /* Only the nop tracer should hit this when disabling */
1874 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875 return;
1876 }
1877
1878 arch_spin_lock(&tr->max_lock);
1879
1880 /* Inherit the recordable setting from array_buffer */
1881 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1882 ring_buffer_record_on(tr->max_buffer.buffer);
1883 else
1884 ring_buffer_record_off(tr->max_buffer.buffer);
1885
1886 #ifdef CONFIG_TRACER_SNAPSHOT
1887 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1888 arch_spin_unlock(&tr->max_lock);
1889 return;
1890 }
1891 #endif
1892 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1893
1894 __update_max_tr(tr, tsk, cpu);
1895
1896 arch_spin_unlock(&tr->max_lock);
1897
1898 /* Any waiters on the old snapshot buffer need to wake up */
1899 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1900 }
1901
1902 /**
1903 * update_max_tr_single - only copy one trace over, and reset the rest
1904 * @tr: tracer
1905 * @tsk: task with the latency
1906 * @cpu: the cpu of the buffer to copy.
1907 *
1908 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1909 */
1910 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1911 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1912 {
1913 int ret;
1914
1915 if (tr->stop_count)
1916 return;
1917
1918 WARN_ON_ONCE(!irqs_disabled());
1919 if (!tr->allocated_snapshot) {
1920 /* Only the nop tracer should hit this when disabling */
1921 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1922 return;
1923 }
1924
1925 arch_spin_lock(&tr->max_lock);
1926
1927 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1928
1929 if (ret == -EBUSY) {
1930 /*
1931 * We failed to swap the buffer due to a commit taking
1932 * place on this CPU. We fail to record, but we reset
1933 * the max trace buffer (no one writes directly to it)
1934 * and flag that it failed.
1935 * Another reason is resize is in progress.
1936 */
1937 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1938 "Failed to swap buffers due to commit or resize in progress\n");
1939 }
1940
1941 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1942
1943 __update_max_tr(tr, tsk, cpu);
1944 arch_spin_unlock(&tr->max_lock);
1945 }
1946
1947 #endif /* CONFIG_TRACER_MAX_TRACE */
1948
wait_on_pipe(struct trace_iterator * iter,int full)1949 static int wait_on_pipe(struct trace_iterator *iter, int full)
1950 {
1951 int ret;
1952
1953 /* Iterators are static, they should be filled or empty */
1954 if (trace_buffer_iter(iter, iter->cpu_file))
1955 return 0;
1956
1957 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1958
1959 #ifdef CONFIG_TRACER_MAX_TRACE
1960 /*
1961 * Make sure this is still the snapshot buffer, as if a snapshot were
1962 * to happen, this would now be the main buffer.
1963 */
1964 if (iter->snapshot)
1965 iter->array_buffer = &iter->tr->max_buffer;
1966 #endif
1967 return ret;
1968 }
1969
1970 #ifdef CONFIG_FTRACE_STARTUP_TEST
1971 static bool selftests_can_run;
1972
1973 struct trace_selftests {
1974 struct list_head list;
1975 struct tracer *type;
1976 };
1977
1978 static LIST_HEAD(postponed_selftests);
1979
save_selftest(struct tracer * type)1980 static int save_selftest(struct tracer *type)
1981 {
1982 struct trace_selftests *selftest;
1983
1984 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1985 if (!selftest)
1986 return -ENOMEM;
1987
1988 selftest->type = type;
1989 list_add(&selftest->list, &postponed_selftests);
1990 return 0;
1991 }
1992
run_tracer_selftest(struct tracer * type)1993 static int run_tracer_selftest(struct tracer *type)
1994 {
1995 struct trace_array *tr = &global_trace;
1996 struct tracer *saved_tracer = tr->current_trace;
1997 int ret;
1998
1999 if (!type->selftest || tracing_selftest_disabled)
2000 return 0;
2001
2002 /*
2003 * If a tracer registers early in boot up (before scheduling is
2004 * initialized and such), then do not run its selftests yet.
2005 * Instead, run it a little later in the boot process.
2006 */
2007 if (!selftests_can_run)
2008 return save_selftest(type);
2009
2010 if (!tracing_is_on()) {
2011 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2012 type->name);
2013 return 0;
2014 }
2015
2016 /*
2017 * Run a selftest on this tracer.
2018 * Here we reset the trace buffer, and set the current
2019 * tracer to be this tracer. The tracer can then run some
2020 * internal tracing to verify that everything is in order.
2021 * If we fail, we do not register this tracer.
2022 */
2023 tracing_reset_online_cpus(&tr->array_buffer);
2024
2025 tr->current_trace = type;
2026
2027 #ifdef CONFIG_TRACER_MAX_TRACE
2028 if (type->use_max_tr) {
2029 /* If we expanded the buffers, make sure the max is expanded too */
2030 if (ring_buffer_expanded)
2031 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2032 RING_BUFFER_ALL_CPUS);
2033 tr->allocated_snapshot = true;
2034 }
2035 #endif
2036
2037 /* the test is responsible for initializing and enabling */
2038 pr_info("Testing tracer %s: ", type->name);
2039 ret = type->selftest(type, tr);
2040 /* the test is responsible for resetting too */
2041 tr->current_trace = saved_tracer;
2042 if (ret) {
2043 printk(KERN_CONT "FAILED!\n");
2044 /* Add the warning after printing 'FAILED' */
2045 WARN_ON(1);
2046 return -1;
2047 }
2048 /* Only reset on passing, to avoid touching corrupted buffers */
2049 tracing_reset_online_cpus(&tr->array_buffer);
2050
2051 #ifdef CONFIG_TRACER_MAX_TRACE
2052 if (type->use_max_tr) {
2053 tr->allocated_snapshot = false;
2054
2055 /* Shrink the max buffer again */
2056 if (ring_buffer_expanded)
2057 ring_buffer_resize(tr->max_buffer.buffer, 1,
2058 RING_BUFFER_ALL_CPUS);
2059 }
2060 #endif
2061
2062 printk(KERN_CONT "PASSED\n");
2063 return 0;
2064 }
2065
do_run_tracer_selftest(struct tracer * type)2066 static int do_run_tracer_selftest(struct tracer *type)
2067 {
2068 int ret;
2069
2070 /*
2071 * Tests can take a long time, especially if they are run one after the
2072 * other, as does happen during bootup when all the tracers are
2073 * registered. This could cause the soft lockup watchdog to trigger.
2074 */
2075 cond_resched();
2076
2077 tracing_selftest_running = true;
2078 ret = run_tracer_selftest(type);
2079 tracing_selftest_running = false;
2080
2081 return ret;
2082 }
2083
init_trace_selftests(void)2084 static __init int init_trace_selftests(void)
2085 {
2086 struct trace_selftests *p, *n;
2087 struct tracer *t, **last;
2088 int ret;
2089
2090 selftests_can_run = true;
2091
2092 mutex_lock(&trace_types_lock);
2093
2094 if (list_empty(&postponed_selftests))
2095 goto out;
2096
2097 pr_info("Running postponed tracer tests:\n");
2098
2099 tracing_selftest_running = true;
2100 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2101 /* This loop can take minutes when sanitizers are enabled, so
2102 * lets make sure we allow RCU processing.
2103 */
2104 cond_resched();
2105 ret = run_tracer_selftest(p->type);
2106 /* If the test fails, then warn and remove from available_tracers */
2107 if (ret < 0) {
2108 WARN(1, "tracer: %s failed selftest, disabling\n",
2109 p->type->name);
2110 last = &trace_types;
2111 for (t = trace_types; t; t = t->next) {
2112 if (t == p->type) {
2113 *last = t->next;
2114 break;
2115 }
2116 last = &t->next;
2117 }
2118 }
2119 list_del(&p->list);
2120 kfree(p);
2121 }
2122 tracing_selftest_running = false;
2123
2124 out:
2125 mutex_unlock(&trace_types_lock);
2126
2127 return 0;
2128 }
2129 core_initcall(init_trace_selftests);
2130 #else
run_tracer_selftest(struct tracer * type)2131 static inline int run_tracer_selftest(struct tracer *type)
2132 {
2133 return 0;
2134 }
do_run_tracer_selftest(struct tracer * type)2135 static inline int do_run_tracer_selftest(struct tracer *type)
2136 {
2137 return 0;
2138 }
2139 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2140
2141 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2142
2143 static void __init apply_trace_boot_options(void);
2144
2145 /**
2146 * register_tracer - register a tracer with the ftrace system.
2147 * @type: the plugin for the tracer
2148 *
2149 * Register a new plugin tracer.
2150 */
register_tracer(struct tracer * type)2151 int __init register_tracer(struct tracer *type)
2152 {
2153 struct tracer *t;
2154 int ret = 0;
2155
2156 if (!type->name) {
2157 pr_info("Tracer must have a name\n");
2158 return -1;
2159 }
2160
2161 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2162 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2163 return -1;
2164 }
2165
2166 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2167 pr_warn("Can not register tracer %s due to lockdown\n",
2168 type->name);
2169 return -EPERM;
2170 }
2171
2172 mutex_lock(&trace_types_lock);
2173
2174 for (t = trace_types; t; t = t->next) {
2175 if (strcmp(type->name, t->name) == 0) {
2176 /* already found */
2177 pr_info("Tracer %s already registered\n",
2178 type->name);
2179 ret = -1;
2180 goto out;
2181 }
2182 }
2183
2184 if (!type->set_flag)
2185 type->set_flag = &dummy_set_flag;
2186 if (!type->flags) {
2187 /*allocate a dummy tracer_flags*/
2188 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2189 if (!type->flags) {
2190 ret = -ENOMEM;
2191 goto out;
2192 }
2193 type->flags->val = 0;
2194 type->flags->opts = dummy_tracer_opt;
2195 } else
2196 if (!type->flags->opts)
2197 type->flags->opts = dummy_tracer_opt;
2198
2199 /* store the tracer for __set_tracer_option */
2200 type->flags->trace = type;
2201
2202 ret = do_run_tracer_selftest(type);
2203 if (ret < 0)
2204 goto out;
2205
2206 type->next = trace_types;
2207 trace_types = type;
2208 add_tracer_options(&global_trace, type);
2209
2210 out:
2211 mutex_unlock(&trace_types_lock);
2212
2213 if (ret || !default_bootup_tracer)
2214 goto out_unlock;
2215
2216 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2217 goto out_unlock;
2218
2219 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2220 /* Do we want this tracer to start on bootup? */
2221 tracing_set_tracer(&global_trace, type->name);
2222 default_bootup_tracer = NULL;
2223
2224 apply_trace_boot_options();
2225
2226 /* disable other selftests, since this will break it. */
2227 disable_tracing_selftest("running a tracer");
2228
2229 out_unlock:
2230 return ret;
2231 }
2232
tracing_reset_cpu(struct array_buffer * buf,int cpu)2233 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2234 {
2235 struct trace_buffer *buffer = buf->buffer;
2236
2237 if (!buffer)
2238 return;
2239
2240 ring_buffer_record_disable(buffer);
2241
2242 /* Make sure all commits have finished */
2243 synchronize_rcu();
2244 ring_buffer_reset_cpu(buffer, cpu);
2245
2246 ring_buffer_record_enable(buffer);
2247 }
2248
tracing_reset_online_cpus(struct array_buffer * buf)2249 void tracing_reset_online_cpus(struct array_buffer *buf)
2250 {
2251 struct trace_buffer *buffer = buf->buffer;
2252
2253 if (!buffer)
2254 return;
2255
2256 ring_buffer_record_disable(buffer);
2257
2258 /* Make sure all commits have finished */
2259 synchronize_rcu();
2260
2261 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2262
2263 ring_buffer_reset_online_cpus(buffer);
2264
2265 ring_buffer_record_enable(buffer);
2266 }
2267
2268 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2269 void tracing_reset_all_online_cpus_unlocked(void)
2270 {
2271 struct trace_array *tr;
2272
2273 lockdep_assert_held(&trace_types_lock);
2274
2275 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2276 if (!tr->clear_trace)
2277 continue;
2278 tr->clear_trace = false;
2279 tracing_reset_online_cpus(&tr->array_buffer);
2280 #ifdef CONFIG_TRACER_MAX_TRACE
2281 tracing_reset_online_cpus(&tr->max_buffer);
2282 #endif
2283 }
2284 }
2285
tracing_reset_all_online_cpus(void)2286 void tracing_reset_all_online_cpus(void)
2287 {
2288 mutex_lock(&trace_types_lock);
2289 tracing_reset_all_online_cpus_unlocked();
2290 mutex_unlock(&trace_types_lock);
2291 }
2292
2293 /*
2294 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2295 * is the tgid last observed corresponding to pid=i.
2296 */
2297 static int *tgid_map;
2298
2299 /* The maximum valid index into tgid_map. */
2300 static size_t tgid_map_max;
2301
2302 #define SAVED_CMDLINES_DEFAULT 128
2303 #define NO_CMDLINE_MAP UINT_MAX
2304 /*
2305 * Preemption must be disabled before acquiring trace_cmdline_lock.
2306 * The various trace_arrays' max_lock must be acquired in a context
2307 * where interrupt is disabled.
2308 */
2309 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2310 struct saved_cmdlines_buffer {
2311 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2312 unsigned *map_cmdline_to_pid;
2313 unsigned cmdline_num;
2314 int cmdline_idx;
2315 char saved_cmdlines[];
2316 };
2317 static struct saved_cmdlines_buffer *savedcmd;
2318
2319 /* Holds the size of a cmdline and pid element */
2320 #define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s) \
2321 (TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0]))
2322
get_saved_cmdlines(int idx)2323 static inline char *get_saved_cmdlines(int idx)
2324 {
2325 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2326 }
2327
set_cmdline(int idx,const char * cmdline)2328 static inline void set_cmdline(int idx, const char *cmdline)
2329 {
2330 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2331 }
2332
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)2333 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2334 {
2335 int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2336
2337 kmemleak_free(s);
2338 free_pages((unsigned long)s, order);
2339 }
2340
allocate_cmdlines_buffer(unsigned int val)2341 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2342 {
2343 struct saved_cmdlines_buffer *s;
2344 struct page *page;
2345 int orig_size, size;
2346 int order;
2347
2348 /* Figure out how much is needed to hold the given number of cmdlines */
2349 orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
2350 order = get_order(orig_size);
2351 size = 1 << (order + PAGE_SHIFT);
2352 page = alloc_pages(GFP_KERNEL, order);
2353 if (!page)
2354 return NULL;
2355
2356 s = page_address(page);
2357 kmemleak_alloc(s, size, 1, GFP_KERNEL);
2358 memset(s, 0, sizeof(*s));
2359
2360 /* Round up to actual allocation */
2361 val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
2362 s->cmdline_num = val;
2363
2364 /* Place map_cmdline_to_pid array right after saved_cmdlines */
2365 s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN];
2366
2367 s->cmdline_idx = 0;
2368 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2369 sizeof(s->map_pid_to_cmdline));
2370 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2371 val * sizeof(*s->map_cmdline_to_pid));
2372
2373 return s;
2374 }
2375
trace_create_savedcmd(void)2376 static int trace_create_savedcmd(void)
2377 {
2378 savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2379
2380 return savedcmd ? 0 : -ENOMEM;
2381 }
2382
is_tracing_stopped(void)2383 int is_tracing_stopped(void)
2384 {
2385 return global_trace.stop_count;
2386 }
2387
tracing_start_tr(struct trace_array * tr)2388 static void tracing_start_tr(struct trace_array *tr)
2389 {
2390 struct trace_buffer *buffer;
2391 unsigned long flags;
2392
2393 if (tracing_disabled)
2394 return;
2395
2396 raw_spin_lock_irqsave(&tr->start_lock, flags);
2397 if (--tr->stop_count) {
2398 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2399 /* Someone screwed up their debugging */
2400 tr->stop_count = 0;
2401 }
2402 goto out;
2403 }
2404
2405 /* Prevent the buffers from switching */
2406 arch_spin_lock(&tr->max_lock);
2407
2408 buffer = tr->array_buffer.buffer;
2409 if (buffer)
2410 ring_buffer_record_enable(buffer);
2411
2412 #ifdef CONFIG_TRACER_MAX_TRACE
2413 buffer = tr->max_buffer.buffer;
2414 if (buffer)
2415 ring_buffer_record_enable(buffer);
2416 #endif
2417
2418 arch_spin_unlock(&tr->max_lock);
2419
2420 out:
2421 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2422 }
2423
2424 /**
2425 * tracing_start - quick start of the tracer
2426 *
2427 * If tracing is enabled but was stopped by tracing_stop,
2428 * this will start the tracer back up.
2429 */
tracing_start(void)2430 void tracing_start(void)
2431
2432 {
2433 return tracing_start_tr(&global_trace);
2434 }
2435
tracing_stop_tr(struct trace_array * tr)2436 static void tracing_stop_tr(struct trace_array *tr)
2437 {
2438 struct trace_buffer *buffer;
2439 unsigned long flags;
2440
2441 raw_spin_lock_irqsave(&tr->start_lock, flags);
2442 if (tr->stop_count++)
2443 goto out;
2444
2445 /* Prevent the buffers from switching */
2446 arch_spin_lock(&tr->max_lock);
2447
2448 buffer = tr->array_buffer.buffer;
2449 if (buffer)
2450 ring_buffer_record_disable(buffer);
2451
2452 #ifdef CONFIG_TRACER_MAX_TRACE
2453 buffer = tr->max_buffer.buffer;
2454 if (buffer)
2455 ring_buffer_record_disable(buffer);
2456 #endif
2457
2458 arch_spin_unlock(&tr->max_lock);
2459
2460 out:
2461 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2462 }
2463
2464 /**
2465 * tracing_stop - quick stop of the tracer
2466 *
2467 * Light weight way to stop tracing. Use in conjunction with
2468 * tracing_start.
2469 */
tracing_stop(void)2470 void tracing_stop(void)
2471 {
2472 return tracing_stop_tr(&global_trace);
2473 }
2474
trace_save_cmdline(struct task_struct * tsk)2475 static int trace_save_cmdline(struct task_struct *tsk)
2476 {
2477 unsigned tpid, idx;
2478
2479 /* treat recording of idle task as a success */
2480 if (!tsk->pid)
2481 return 1;
2482
2483 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2484
2485 /*
2486 * It's not the end of the world if we don't get
2487 * the lock, but we also don't want to spin
2488 * nor do we want to disable interrupts,
2489 * so if we miss here, then better luck next time.
2490 *
2491 * This is called within the scheduler and wake up, so interrupts
2492 * had better been disabled and run queue lock been held.
2493 */
2494 lockdep_assert_preemption_disabled();
2495 if (!arch_spin_trylock(&trace_cmdline_lock))
2496 return 0;
2497
2498 idx = savedcmd->map_pid_to_cmdline[tpid];
2499 if (idx == NO_CMDLINE_MAP) {
2500 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2501
2502 savedcmd->map_pid_to_cmdline[tpid] = idx;
2503 savedcmd->cmdline_idx = idx;
2504 }
2505
2506 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2507 set_cmdline(idx, tsk->comm);
2508
2509 arch_spin_unlock(&trace_cmdline_lock);
2510
2511 return 1;
2512 }
2513
__trace_find_cmdline(int pid,char comm[])2514 static void __trace_find_cmdline(int pid, char comm[])
2515 {
2516 unsigned map;
2517 int tpid;
2518
2519 if (!pid) {
2520 strcpy(comm, "<idle>");
2521 return;
2522 }
2523
2524 if (WARN_ON_ONCE(pid < 0)) {
2525 strcpy(comm, "<XXX>");
2526 return;
2527 }
2528
2529 tpid = pid & (PID_MAX_DEFAULT - 1);
2530 map = savedcmd->map_pid_to_cmdline[tpid];
2531 if (map != NO_CMDLINE_MAP) {
2532 tpid = savedcmd->map_cmdline_to_pid[map];
2533 if (tpid == pid) {
2534 strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2535 return;
2536 }
2537 }
2538 strcpy(comm, "<...>");
2539 }
2540
trace_find_cmdline(int pid,char comm[])2541 void trace_find_cmdline(int pid, char comm[])
2542 {
2543 preempt_disable();
2544 arch_spin_lock(&trace_cmdline_lock);
2545
2546 __trace_find_cmdline(pid, comm);
2547
2548 arch_spin_unlock(&trace_cmdline_lock);
2549 preempt_enable();
2550 }
2551
trace_find_tgid_ptr(int pid)2552 static int *trace_find_tgid_ptr(int pid)
2553 {
2554 /*
2555 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2556 * if we observe a non-NULL tgid_map then we also observe the correct
2557 * tgid_map_max.
2558 */
2559 int *map = smp_load_acquire(&tgid_map);
2560
2561 if (unlikely(!map || pid > tgid_map_max))
2562 return NULL;
2563
2564 return &map[pid];
2565 }
2566
trace_find_tgid(int pid)2567 int trace_find_tgid(int pid)
2568 {
2569 int *ptr = trace_find_tgid_ptr(pid);
2570
2571 return ptr ? *ptr : 0;
2572 }
2573
trace_save_tgid(struct task_struct * tsk)2574 static int trace_save_tgid(struct task_struct *tsk)
2575 {
2576 int *ptr;
2577
2578 /* treat recording of idle task as a success */
2579 if (!tsk->pid)
2580 return 1;
2581
2582 ptr = trace_find_tgid_ptr(tsk->pid);
2583 if (!ptr)
2584 return 0;
2585
2586 *ptr = tsk->tgid;
2587 return 1;
2588 }
2589
tracing_record_taskinfo_skip(int flags)2590 static bool tracing_record_taskinfo_skip(int flags)
2591 {
2592 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2593 return true;
2594 if (!__this_cpu_read(trace_taskinfo_save))
2595 return true;
2596 return false;
2597 }
2598
2599 /**
2600 * tracing_record_taskinfo - record the task info of a task
2601 *
2602 * @task: task to record
2603 * @flags: TRACE_RECORD_CMDLINE for recording comm
2604 * TRACE_RECORD_TGID for recording tgid
2605 */
tracing_record_taskinfo(struct task_struct * task,int flags)2606 void tracing_record_taskinfo(struct task_struct *task, int flags)
2607 {
2608 bool done;
2609
2610 if (tracing_record_taskinfo_skip(flags))
2611 return;
2612
2613 /*
2614 * Record as much task information as possible. If some fail, continue
2615 * to try to record the others.
2616 */
2617 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2618 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2619
2620 /* If recording any information failed, retry again soon. */
2621 if (!done)
2622 return;
2623
2624 __this_cpu_write(trace_taskinfo_save, false);
2625 }
2626
2627 /**
2628 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2629 *
2630 * @prev: previous task during sched_switch
2631 * @next: next task during sched_switch
2632 * @flags: TRACE_RECORD_CMDLINE for recording comm
2633 * TRACE_RECORD_TGID for recording tgid
2634 */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2635 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2636 struct task_struct *next, int flags)
2637 {
2638 bool done;
2639
2640 if (tracing_record_taskinfo_skip(flags))
2641 return;
2642
2643 /*
2644 * Record as much task information as possible. If some fail, continue
2645 * to try to record the others.
2646 */
2647 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2648 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2649 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2650 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2651
2652 /* If recording any information failed, retry again soon. */
2653 if (!done)
2654 return;
2655
2656 __this_cpu_write(trace_taskinfo_save, false);
2657 }
2658
2659 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2660 void tracing_record_cmdline(struct task_struct *task)
2661 {
2662 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2663 }
2664
tracing_record_tgid(struct task_struct * task)2665 void tracing_record_tgid(struct task_struct *task)
2666 {
2667 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2668 }
2669
2670 /*
2671 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2672 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2673 * simplifies those functions and keeps them in sync.
2674 */
trace_handle_return(struct trace_seq * s)2675 enum print_line_t trace_handle_return(struct trace_seq *s)
2676 {
2677 return trace_seq_has_overflowed(s) ?
2678 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2679 }
2680 EXPORT_SYMBOL_GPL(trace_handle_return);
2681
migration_disable_value(void)2682 static unsigned short migration_disable_value(void)
2683 {
2684 #if defined(CONFIG_SMP)
2685 return current->migration_disabled;
2686 #else
2687 return 0;
2688 #endif
2689 }
2690
tracing_gen_ctx_irq_test(unsigned int irqs_status)2691 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2692 {
2693 unsigned int trace_flags = irqs_status;
2694 unsigned int pc;
2695
2696 pc = preempt_count();
2697
2698 if (pc & NMI_MASK)
2699 trace_flags |= TRACE_FLAG_NMI;
2700 if (pc & HARDIRQ_MASK)
2701 trace_flags |= TRACE_FLAG_HARDIRQ;
2702 if (in_serving_softirq())
2703 trace_flags |= TRACE_FLAG_SOFTIRQ;
2704 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2705 trace_flags |= TRACE_FLAG_BH_OFF;
2706
2707 if (tif_need_resched())
2708 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2709 if (test_preempt_need_resched())
2710 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2711 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2712 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2713 }
2714
2715 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2716 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2717 int type,
2718 unsigned long len,
2719 unsigned int trace_ctx)
2720 {
2721 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2722 }
2723
2724 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2725 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2726 static int trace_buffered_event_ref;
2727
2728 /**
2729 * trace_buffered_event_enable - enable buffering events
2730 *
2731 * When events are being filtered, it is quicker to use a temporary
2732 * buffer to write the event data into if there's a likely chance
2733 * that it will not be committed. The discard of the ring buffer
2734 * is not as fast as committing, and is much slower than copying
2735 * a commit.
2736 *
2737 * When an event is to be filtered, allocate per cpu buffers to
2738 * write the event data into, and if the event is filtered and discarded
2739 * it is simply dropped, otherwise, the entire data is to be committed
2740 * in one shot.
2741 */
trace_buffered_event_enable(void)2742 void trace_buffered_event_enable(void)
2743 {
2744 struct ring_buffer_event *event;
2745 struct page *page;
2746 int cpu;
2747
2748 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2749
2750 if (trace_buffered_event_ref++)
2751 return;
2752
2753 for_each_tracing_cpu(cpu) {
2754 page = alloc_pages_node(cpu_to_node(cpu),
2755 GFP_KERNEL | __GFP_NORETRY, 0);
2756 /* This is just an optimization and can handle failures */
2757 if (!page) {
2758 pr_err("Failed to allocate event buffer\n");
2759 break;
2760 }
2761
2762 event = page_address(page);
2763 memset(event, 0, sizeof(*event));
2764
2765 per_cpu(trace_buffered_event, cpu) = event;
2766
2767 preempt_disable();
2768 if (cpu == smp_processor_id() &&
2769 __this_cpu_read(trace_buffered_event) !=
2770 per_cpu(trace_buffered_event, cpu))
2771 WARN_ON_ONCE(1);
2772 preempt_enable();
2773 }
2774 }
2775
enable_trace_buffered_event(void * data)2776 static void enable_trace_buffered_event(void *data)
2777 {
2778 /* Probably not needed, but do it anyway */
2779 smp_rmb();
2780 this_cpu_dec(trace_buffered_event_cnt);
2781 }
2782
disable_trace_buffered_event(void * data)2783 static void disable_trace_buffered_event(void *data)
2784 {
2785 this_cpu_inc(trace_buffered_event_cnt);
2786 }
2787
2788 /**
2789 * trace_buffered_event_disable - disable buffering events
2790 *
2791 * When a filter is removed, it is faster to not use the buffered
2792 * events, and to commit directly into the ring buffer. Free up
2793 * the temp buffers when there are no more users. This requires
2794 * special synchronization with current events.
2795 */
trace_buffered_event_disable(void)2796 void trace_buffered_event_disable(void)
2797 {
2798 int cpu;
2799
2800 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2801
2802 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2803 return;
2804
2805 if (--trace_buffered_event_ref)
2806 return;
2807
2808 /* For each CPU, set the buffer as used. */
2809 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2810 NULL, true);
2811
2812 /* Wait for all current users to finish */
2813 synchronize_rcu();
2814
2815 for_each_tracing_cpu(cpu) {
2816 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2817 per_cpu(trace_buffered_event, cpu) = NULL;
2818 }
2819
2820 /*
2821 * Wait for all CPUs that potentially started checking if they can use
2822 * their event buffer only after the previous synchronize_rcu() call and
2823 * they still read a valid pointer from trace_buffered_event. It must be
2824 * ensured they don't see cleared trace_buffered_event_cnt else they
2825 * could wrongly decide to use the pointed-to buffer which is now freed.
2826 */
2827 synchronize_rcu();
2828
2829 /* For each CPU, relinquish the buffer */
2830 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2831 true);
2832 }
2833
2834 static struct trace_buffer *temp_buffer;
2835
2836 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2837 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2838 struct trace_event_file *trace_file,
2839 int type, unsigned long len,
2840 unsigned int trace_ctx)
2841 {
2842 struct ring_buffer_event *entry;
2843 struct trace_array *tr = trace_file->tr;
2844 int val;
2845
2846 *current_rb = tr->array_buffer.buffer;
2847
2848 if (!tr->no_filter_buffering_ref &&
2849 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2850 preempt_disable_notrace();
2851 /*
2852 * Filtering is on, so try to use the per cpu buffer first.
2853 * This buffer will simulate a ring_buffer_event,
2854 * where the type_len is zero and the array[0] will
2855 * hold the full length.
2856 * (see include/linux/ring-buffer.h for details on
2857 * how the ring_buffer_event is structured).
2858 *
2859 * Using a temp buffer during filtering and copying it
2860 * on a matched filter is quicker than writing directly
2861 * into the ring buffer and then discarding it when
2862 * it doesn't match. That is because the discard
2863 * requires several atomic operations to get right.
2864 * Copying on match and doing nothing on a failed match
2865 * is still quicker than no copy on match, but having
2866 * to discard out of the ring buffer on a failed match.
2867 */
2868 if ((entry = __this_cpu_read(trace_buffered_event))) {
2869 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2870
2871 val = this_cpu_inc_return(trace_buffered_event_cnt);
2872
2873 /*
2874 * Preemption is disabled, but interrupts and NMIs
2875 * can still come in now. If that happens after
2876 * the above increment, then it will have to go
2877 * back to the old method of allocating the event
2878 * on the ring buffer, and if the filter fails, it
2879 * will have to call ring_buffer_discard_commit()
2880 * to remove it.
2881 *
2882 * Need to also check the unlikely case that the
2883 * length is bigger than the temp buffer size.
2884 * If that happens, then the reserve is pretty much
2885 * guaranteed to fail, as the ring buffer currently
2886 * only allows events less than a page. But that may
2887 * change in the future, so let the ring buffer reserve
2888 * handle the failure in that case.
2889 */
2890 if (val == 1 && likely(len <= max_len)) {
2891 trace_event_setup(entry, type, trace_ctx);
2892 entry->array[0] = len;
2893 /* Return with preemption disabled */
2894 return entry;
2895 }
2896 this_cpu_dec(trace_buffered_event_cnt);
2897 }
2898 /* __trace_buffer_lock_reserve() disables preemption */
2899 preempt_enable_notrace();
2900 }
2901
2902 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2903 trace_ctx);
2904 /*
2905 * If tracing is off, but we have triggers enabled
2906 * we still need to look at the event data. Use the temp_buffer
2907 * to store the trace event for the trigger to use. It's recursive
2908 * safe and will not be recorded anywhere.
2909 */
2910 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2911 *current_rb = temp_buffer;
2912 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2913 trace_ctx);
2914 }
2915 return entry;
2916 }
2917 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2918
2919 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2920 static DEFINE_MUTEX(tracepoint_printk_mutex);
2921
output_printk(struct trace_event_buffer * fbuffer)2922 static void output_printk(struct trace_event_buffer *fbuffer)
2923 {
2924 struct trace_event_call *event_call;
2925 struct trace_event_file *file;
2926 struct trace_event *event;
2927 unsigned long flags;
2928 struct trace_iterator *iter = tracepoint_print_iter;
2929
2930 /* We should never get here if iter is NULL */
2931 if (WARN_ON_ONCE(!iter))
2932 return;
2933
2934 event_call = fbuffer->trace_file->event_call;
2935 if (!event_call || !event_call->event.funcs ||
2936 !event_call->event.funcs->trace)
2937 return;
2938
2939 file = fbuffer->trace_file;
2940 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2941 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2942 !filter_match_preds(file->filter, fbuffer->entry)))
2943 return;
2944
2945 event = &fbuffer->trace_file->event_call->event;
2946
2947 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2948 trace_seq_init(&iter->seq);
2949 iter->ent = fbuffer->entry;
2950 event_call->event.funcs->trace(iter, 0, event);
2951 trace_seq_putc(&iter->seq, 0);
2952 printk("%s", iter->seq.buffer);
2953
2954 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2955 }
2956
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2957 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2958 void *buffer, size_t *lenp,
2959 loff_t *ppos)
2960 {
2961 int save_tracepoint_printk;
2962 int ret;
2963
2964 mutex_lock(&tracepoint_printk_mutex);
2965 save_tracepoint_printk = tracepoint_printk;
2966
2967 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2968
2969 /*
2970 * This will force exiting early, as tracepoint_printk
2971 * is always zero when tracepoint_printk_iter is not allocated
2972 */
2973 if (!tracepoint_print_iter)
2974 tracepoint_printk = 0;
2975
2976 if (save_tracepoint_printk == tracepoint_printk)
2977 goto out;
2978
2979 if (tracepoint_printk)
2980 static_key_enable(&tracepoint_printk_key.key);
2981 else
2982 static_key_disable(&tracepoint_printk_key.key);
2983
2984 out:
2985 mutex_unlock(&tracepoint_printk_mutex);
2986
2987 return ret;
2988 }
2989
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2990 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2991 {
2992 enum event_trigger_type tt = ETT_NONE;
2993 struct trace_event_file *file = fbuffer->trace_file;
2994
2995 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2996 fbuffer->entry, &tt))
2997 goto discard;
2998
2999 if (static_key_false(&tracepoint_printk_key.key))
3000 output_printk(fbuffer);
3001
3002 if (static_branch_unlikely(&trace_event_exports_enabled))
3003 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3004
3005 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3006 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3007
3008 discard:
3009 if (tt)
3010 event_triggers_post_call(file, tt);
3011
3012 }
3013 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3014
3015 /*
3016 * Skip 3:
3017 *
3018 * trace_buffer_unlock_commit_regs()
3019 * trace_event_buffer_commit()
3020 * trace_event_raw_event_xxx()
3021 */
3022 # define STACK_SKIP 3
3023
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)3024 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3025 struct trace_buffer *buffer,
3026 struct ring_buffer_event *event,
3027 unsigned int trace_ctx,
3028 struct pt_regs *regs)
3029 {
3030 __buffer_unlock_commit(buffer, event);
3031
3032 /*
3033 * If regs is not set, then skip the necessary functions.
3034 * Note, we can still get here via blktrace, wakeup tracer
3035 * and mmiotrace, but that's ok if they lose a function or
3036 * two. They are not that meaningful.
3037 */
3038 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3039 ftrace_trace_userstack(tr, buffer, trace_ctx);
3040 }
3041
3042 /*
3043 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3044 */
3045 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)3046 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3047 struct ring_buffer_event *event)
3048 {
3049 __buffer_unlock_commit(buffer, event);
3050 }
3051
3052 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)3053 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3054 parent_ip, unsigned int trace_ctx)
3055 {
3056 struct trace_event_call *call = &event_function;
3057 struct trace_buffer *buffer = tr->array_buffer.buffer;
3058 struct ring_buffer_event *event;
3059 struct ftrace_entry *entry;
3060
3061 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3062 trace_ctx);
3063 if (!event)
3064 return;
3065 entry = ring_buffer_event_data(event);
3066 entry->ip = ip;
3067 entry->parent_ip = parent_ip;
3068
3069 if (!call_filter_check_discard(call, entry, buffer, event)) {
3070 if (static_branch_unlikely(&trace_function_exports_enabled))
3071 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3072 __buffer_unlock_commit(buffer, event);
3073 }
3074 }
3075
3076 #ifdef CONFIG_STACKTRACE
3077
3078 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3079 #define FTRACE_KSTACK_NESTING 4
3080
3081 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3082
3083 struct ftrace_stack {
3084 unsigned long calls[FTRACE_KSTACK_ENTRIES];
3085 };
3086
3087
3088 struct ftrace_stacks {
3089 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3090 };
3091
3092 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3093 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3094
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3095 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3096 unsigned int trace_ctx,
3097 int skip, struct pt_regs *regs)
3098 {
3099 struct trace_event_call *call = &event_kernel_stack;
3100 struct ring_buffer_event *event;
3101 unsigned int size, nr_entries;
3102 struct ftrace_stack *fstack;
3103 struct stack_entry *entry;
3104 int stackidx;
3105
3106 /*
3107 * Add one, for this function and the call to save_stack_trace()
3108 * If regs is set, then these functions will not be in the way.
3109 */
3110 #ifndef CONFIG_UNWINDER_ORC
3111 if (!regs)
3112 skip++;
3113 #endif
3114
3115 preempt_disable_notrace();
3116
3117 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3118
3119 /* This should never happen. If it does, yell once and skip */
3120 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3121 goto out;
3122
3123 /*
3124 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3125 * interrupt will either see the value pre increment or post
3126 * increment. If the interrupt happens pre increment it will have
3127 * restored the counter when it returns. We just need a barrier to
3128 * keep gcc from moving things around.
3129 */
3130 barrier();
3131
3132 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3133 size = ARRAY_SIZE(fstack->calls);
3134
3135 if (regs) {
3136 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3137 size, skip);
3138 } else {
3139 nr_entries = stack_trace_save(fstack->calls, size, skip);
3140 }
3141
3142 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3143 struct_size(entry, caller, nr_entries),
3144 trace_ctx);
3145 if (!event)
3146 goto out;
3147 entry = ring_buffer_event_data(event);
3148
3149 entry->size = nr_entries;
3150 memcpy(&entry->caller, fstack->calls,
3151 flex_array_size(entry, caller, nr_entries));
3152
3153 if (!call_filter_check_discard(call, entry, buffer, event))
3154 __buffer_unlock_commit(buffer, event);
3155
3156 out:
3157 /* Again, don't let gcc optimize things here */
3158 barrier();
3159 __this_cpu_dec(ftrace_stack_reserve);
3160 preempt_enable_notrace();
3161
3162 }
3163
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3164 static inline void ftrace_trace_stack(struct trace_array *tr,
3165 struct trace_buffer *buffer,
3166 unsigned int trace_ctx,
3167 int skip, struct pt_regs *regs)
3168 {
3169 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3170 return;
3171
3172 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3173 }
3174
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3175 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3176 int skip)
3177 {
3178 struct trace_buffer *buffer = tr->array_buffer.buffer;
3179
3180 if (rcu_is_watching()) {
3181 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3182 return;
3183 }
3184
3185 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3186 return;
3187
3188 /*
3189 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3190 * but if the above rcu_is_watching() failed, then the NMI
3191 * triggered someplace critical, and ct_irq_enter() should
3192 * not be called from NMI.
3193 */
3194 if (unlikely(in_nmi()))
3195 return;
3196
3197 ct_irq_enter_irqson();
3198 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3199 ct_irq_exit_irqson();
3200 }
3201
3202 /**
3203 * trace_dump_stack - record a stack back trace in the trace buffer
3204 * @skip: Number of functions to skip (helper handlers)
3205 */
trace_dump_stack(int skip)3206 void trace_dump_stack(int skip)
3207 {
3208 if (tracing_disabled || tracing_selftest_running)
3209 return;
3210
3211 #ifndef CONFIG_UNWINDER_ORC
3212 /* Skip 1 to skip this function. */
3213 skip++;
3214 #endif
3215 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3216 tracing_gen_ctx(), skip, NULL);
3217 }
3218 EXPORT_SYMBOL_GPL(trace_dump_stack);
3219
3220 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3221 static DEFINE_PER_CPU(int, user_stack_count);
3222
3223 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3224 ftrace_trace_userstack(struct trace_array *tr,
3225 struct trace_buffer *buffer, unsigned int trace_ctx)
3226 {
3227 struct trace_event_call *call = &event_user_stack;
3228 struct ring_buffer_event *event;
3229 struct userstack_entry *entry;
3230
3231 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3232 return;
3233
3234 /*
3235 * NMIs can not handle page faults, even with fix ups.
3236 * The save user stack can (and often does) fault.
3237 */
3238 if (unlikely(in_nmi()))
3239 return;
3240
3241 /*
3242 * prevent recursion, since the user stack tracing may
3243 * trigger other kernel events.
3244 */
3245 preempt_disable();
3246 if (__this_cpu_read(user_stack_count))
3247 goto out;
3248
3249 __this_cpu_inc(user_stack_count);
3250
3251 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3252 sizeof(*entry), trace_ctx);
3253 if (!event)
3254 goto out_drop_count;
3255 entry = ring_buffer_event_data(event);
3256
3257 entry->tgid = current->tgid;
3258 memset(&entry->caller, 0, sizeof(entry->caller));
3259
3260 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3261 if (!call_filter_check_discard(call, entry, buffer, event))
3262 __buffer_unlock_commit(buffer, event);
3263
3264 out_drop_count:
3265 __this_cpu_dec(user_stack_count);
3266 out:
3267 preempt_enable();
3268 }
3269 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3270 static void ftrace_trace_userstack(struct trace_array *tr,
3271 struct trace_buffer *buffer,
3272 unsigned int trace_ctx)
3273 {
3274 }
3275 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3276
3277 #endif /* CONFIG_STACKTRACE */
3278
3279 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3280 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3281 unsigned long long delta)
3282 {
3283 entry->bottom_delta_ts = delta & U32_MAX;
3284 entry->top_delta_ts = (delta >> 32);
3285 }
3286
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3287 void trace_last_func_repeats(struct trace_array *tr,
3288 struct trace_func_repeats *last_info,
3289 unsigned int trace_ctx)
3290 {
3291 struct trace_buffer *buffer = tr->array_buffer.buffer;
3292 struct func_repeats_entry *entry;
3293 struct ring_buffer_event *event;
3294 u64 delta;
3295
3296 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3297 sizeof(*entry), trace_ctx);
3298 if (!event)
3299 return;
3300
3301 delta = ring_buffer_event_time_stamp(buffer, event) -
3302 last_info->ts_last_call;
3303
3304 entry = ring_buffer_event_data(event);
3305 entry->ip = last_info->ip;
3306 entry->parent_ip = last_info->parent_ip;
3307 entry->count = last_info->count;
3308 func_repeats_set_delta_ts(entry, delta);
3309
3310 __buffer_unlock_commit(buffer, event);
3311 }
3312
3313 /* created for use with alloc_percpu */
3314 struct trace_buffer_struct {
3315 int nesting;
3316 char buffer[4][TRACE_BUF_SIZE];
3317 };
3318
3319 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3320
3321 /*
3322 * This allows for lockless recording. If we're nested too deeply, then
3323 * this returns NULL.
3324 */
get_trace_buf(void)3325 static char *get_trace_buf(void)
3326 {
3327 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3328
3329 if (!trace_percpu_buffer || buffer->nesting >= 4)
3330 return NULL;
3331
3332 buffer->nesting++;
3333
3334 /* Interrupts must see nesting incremented before we use the buffer */
3335 barrier();
3336 return &buffer->buffer[buffer->nesting - 1][0];
3337 }
3338
put_trace_buf(void)3339 static void put_trace_buf(void)
3340 {
3341 /* Don't let the decrement of nesting leak before this */
3342 barrier();
3343 this_cpu_dec(trace_percpu_buffer->nesting);
3344 }
3345
alloc_percpu_trace_buffer(void)3346 static int alloc_percpu_trace_buffer(void)
3347 {
3348 struct trace_buffer_struct __percpu *buffers;
3349
3350 if (trace_percpu_buffer)
3351 return 0;
3352
3353 buffers = alloc_percpu(struct trace_buffer_struct);
3354 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3355 return -ENOMEM;
3356
3357 trace_percpu_buffer = buffers;
3358 return 0;
3359 }
3360
3361 static int buffers_allocated;
3362
trace_printk_init_buffers(void)3363 void trace_printk_init_buffers(void)
3364 {
3365 if (buffers_allocated)
3366 return;
3367
3368 if (alloc_percpu_trace_buffer())
3369 return;
3370
3371 /* trace_printk() is for debug use only. Don't use it in production. */
3372
3373 pr_warn("\n");
3374 pr_warn("**********************************************************\n");
3375 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3376 pr_warn("** **\n");
3377 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3378 pr_warn("** **\n");
3379 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3380 pr_warn("** unsafe for production use. **\n");
3381 pr_warn("** **\n");
3382 pr_warn("** If you see this message and you are not debugging **\n");
3383 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3384 pr_warn("** **\n");
3385 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3386 pr_warn("**********************************************************\n");
3387
3388 /* Expand the buffers to set size */
3389 tracing_update_buffers();
3390
3391 buffers_allocated = 1;
3392
3393 /*
3394 * trace_printk_init_buffers() can be called by modules.
3395 * If that happens, then we need to start cmdline recording
3396 * directly here. If the global_trace.buffer is already
3397 * allocated here, then this was called by module code.
3398 */
3399 if (global_trace.array_buffer.buffer)
3400 tracing_start_cmdline_record();
3401 }
3402 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3403
trace_printk_start_comm(void)3404 void trace_printk_start_comm(void)
3405 {
3406 /* Start tracing comms if trace printk is set */
3407 if (!buffers_allocated)
3408 return;
3409 tracing_start_cmdline_record();
3410 }
3411
trace_printk_start_stop_comm(int enabled)3412 static void trace_printk_start_stop_comm(int enabled)
3413 {
3414 if (!buffers_allocated)
3415 return;
3416
3417 if (enabled)
3418 tracing_start_cmdline_record();
3419 else
3420 tracing_stop_cmdline_record();
3421 }
3422
3423 /**
3424 * trace_vbprintk - write binary msg to tracing buffer
3425 * @ip: The address of the caller
3426 * @fmt: The string format to write to the buffer
3427 * @args: Arguments for @fmt
3428 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3429 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3430 {
3431 struct trace_event_call *call = &event_bprint;
3432 struct ring_buffer_event *event;
3433 struct trace_buffer *buffer;
3434 struct trace_array *tr = &global_trace;
3435 struct bprint_entry *entry;
3436 unsigned int trace_ctx;
3437 char *tbuffer;
3438 int len = 0, size;
3439
3440 if (unlikely(tracing_selftest_running || tracing_disabled))
3441 return 0;
3442
3443 /* Don't pollute graph traces with trace_vprintk internals */
3444 pause_graph_tracing();
3445
3446 trace_ctx = tracing_gen_ctx();
3447 preempt_disable_notrace();
3448
3449 tbuffer = get_trace_buf();
3450 if (!tbuffer) {
3451 len = 0;
3452 goto out_nobuffer;
3453 }
3454
3455 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3456
3457 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3458 goto out_put;
3459
3460 size = sizeof(*entry) + sizeof(u32) * len;
3461 buffer = tr->array_buffer.buffer;
3462 ring_buffer_nest_start(buffer);
3463 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3464 trace_ctx);
3465 if (!event)
3466 goto out;
3467 entry = ring_buffer_event_data(event);
3468 entry->ip = ip;
3469 entry->fmt = fmt;
3470
3471 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3472 if (!call_filter_check_discard(call, entry, buffer, event)) {
3473 __buffer_unlock_commit(buffer, event);
3474 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3475 }
3476
3477 out:
3478 ring_buffer_nest_end(buffer);
3479 out_put:
3480 put_trace_buf();
3481
3482 out_nobuffer:
3483 preempt_enable_notrace();
3484 unpause_graph_tracing();
3485
3486 return len;
3487 }
3488 EXPORT_SYMBOL_GPL(trace_vbprintk);
3489
3490 __printf(3, 0)
3491 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3492 __trace_array_vprintk(struct trace_buffer *buffer,
3493 unsigned long ip, const char *fmt, va_list args)
3494 {
3495 struct trace_event_call *call = &event_print;
3496 struct ring_buffer_event *event;
3497 int len = 0, size;
3498 struct print_entry *entry;
3499 unsigned int trace_ctx;
3500 char *tbuffer;
3501
3502 if (tracing_disabled)
3503 return 0;
3504
3505 /* Don't pollute graph traces with trace_vprintk internals */
3506 pause_graph_tracing();
3507
3508 trace_ctx = tracing_gen_ctx();
3509 preempt_disable_notrace();
3510
3511
3512 tbuffer = get_trace_buf();
3513 if (!tbuffer) {
3514 len = 0;
3515 goto out_nobuffer;
3516 }
3517
3518 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3519
3520 size = sizeof(*entry) + len + 1;
3521 ring_buffer_nest_start(buffer);
3522 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3523 trace_ctx);
3524 if (!event)
3525 goto out;
3526 entry = ring_buffer_event_data(event);
3527 entry->ip = ip;
3528
3529 memcpy(&entry->buf, tbuffer, len + 1);
3530 if (!call_filter_check_discard(call, entry, buffer, event)) {
3531 __buffer_unlock_commit(buffer, event);
3532 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3533 }
3534
3535 out:
3536 ring_buffer_nest_end(buffer);
3537 put_trace_buf();
3538
3539 out_nobuffer:
3540 preempt_enable_notrace();
3541 unpause_graph_tracing();
3542
3543 return len;
3544 }
3545
3546 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3547 int trace_array_vprintk(struct trace_array *tr,
3548 unsigned long ip, const char *fmt, va_list args)
3549 {
3550 if (tracing_selftest_running && tr == &global_trace)
3551 return 0;
3552
3553 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3554 }
3555
3556 /**
3557 * trace_array_printk - Print a message to a specific instance
3558 * @tr: The instance trace_array descriptor
3559 * @ip: The instruction pointer that this is called from.
3560 * @fmt: The format to print (printf format)
3561 *
3562 * If a subsystem sets up its own instance, they have the right to
3563 * printk strings into their tracing instance buffer using this
3564 * function. Note, this function will not write into the top level
3565 * buffer (use trace_printk() for that), as writing into the top level
3566 * buffer should only have events that can be individually disabled.
3567 * trace_printk() is only used for debugging a kernel, and should not
3568 * be ever incorporated in normal use.
3569 *
3570 * trace_array_printk() can be used, as it will not add noise to the
3571 * top level tracing buffer.
3572 *
3573 * Note, trace_array_init_printk() must be called on @tr before this
3574 * can be used.
3575 */
3576 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3577 int trace_array_printk(struct trace_array *tr,
3578 unsigned long ip, const char *fmt, ...)
3579 {
3580 int ret;
3581 va_list ap;
3582
3583 if (!tr)
3584 return -ENOENT;
3585
3586 /* This is only allowed for created instances */
3587 if (tr == &global_trace)
3588 return 0;
3589
3590 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3591 return 0;
3592
3593 va_start(ap, fmt);
3594 ret = trace_array_vprintk(tr, ip, fmt, ap);
3595 va_end(ap);
3596 return ret;
3597 }
3598 EXPORT_SYMBOL_GPL(trace_array_printk);
3599
3600 /**
3601 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3602 * @tr: The trace array to initialize the buffers for
3603 *
3604 * As trace_array_printk() only writes into instances, they are OK to
3605 * have in the kernel (unlike trace_printk()). This needs to be called
3606 * before trace_array_printk() can be used on a trace_array.
3607 */
trace_array_init_printk(struct trace_array * tr)3608 int trace_array_init_printk(struct trace_array *tr)
3609 {
3610 if (!tr)
3611 return -ENOENT;
3612
3613 /* This is only allowed for created instances */
3614 if (tr == &global_trace)
3615 return -EINVAL;
3616
3617 return alloc_percpu_trace_buffer();
3618 }
3619 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3620
3621 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3622 int trace_array_printk_buf(struct trace_buffer *buffer,
3623 unsigned long ip, const char *fmt, ...)
3624 {
3625 int ret;
3626 va_list ap;
3627
3628 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3629 return 0;
3630
3631 va_start(ap, fmt);
3632 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3633 va_end(ap);
3634 return ret;
3635 }
3636
3637 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3638 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3639 {
3640 return trace_array_vprintk(&global_trace, ip, fmt, args);
3641 }
3642 EXPORT_SYMBOL_GPL(trace_vprintk);
3643
trace_iterator_increment(struct trace_iterator * iter)3644 static void trace_iterator_increment(struct trace_iterator *iter)
3645 {
3646 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3647
3648 iter->idx++;
3649 if (buf_iter)
3650 ring_buffer_iter_advance(buf_iter);
3651 }
3652
3653 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3654 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3655 unsigned long *lost_events)
3656 {
3657 struct ring_buffer_event *event;
3658 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3659
3660 if (buf_iter) {
3661 event = ring_buffer_iter_peek(buf_iter, ts);
3662 if (lost_events)
3663 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3664 (unsigned long)-1 : 0;
3665 } else {
3666 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3667 lost_events);
3668 }
3669
3670 if (event) {
3671 iter->ent_size = ring_buffer_event_length(event);
3672 return ring_buffer_event_data(event);
3673 }
3674 iter->ent_size = 0;
3675 return NULL;
3676 }
3677
3678 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3679 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3680 unsigned long *missing_events, u64 *ent_ts)
3681 {
3682 struct trace_buffer *buffer = iter->array_buffer->buffer;
3683 struct trace_entry *ent, *next = NULL;
3684 unsigned long lost_events = 0, next_lost = 0;
3685 int cpu_file = iter->cpu_file;
3686 u64 next_ts = 0, ts;
3687 int next_cpu = -1;
3688 int next_size = 0;
3689 int cpu;
3690
3691 /*
3692 * If we are in a per_cpu trace file, don't bother by iterating over
3693 * all cpu and peek directly.
3694 */
3695 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3696 if (ring_buffer_empty_cpu(buffer, cpu_file))
3697 return NULL;
3698 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3699 if (ent_cpu)
3700 *ent_cpu = cpu_file;
3701
3702 return ent;
3703 }
3704
3705 for_each_tracing_cpu(cpu) {
3706
3707 if (ring_buffer_empty_cpu(buffer, cpu))
3708 continue;
3709
3710 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3711
3712 /*
3713 * Pick the entry with the smallest timestamp:
3714 */
3715 if (ent && (!next || ts < next_ts)) {
3716 next = ent;
3717 next_cpu = cpu;
3718 next_ts = ts;
3719 next_lost = lost_events;
3720 next_size = iter->ent_size;
3721 }
3722 }
3723
3724 iter->ent_size = next_size;
3725
3726 if (ent_cpu)
3727 *ent_cpu = next_cpu;
3728
3729 if (ent_ts)
3730 *ent_ts = next_ts;
3731
3732 if (missing_events)
3733 *missing_events = next_lost;
3734
3735 return next;
3736 }
3737
3738 #define STATIC_FMT_BUF_SIZE 128
3739 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3740
trace_iter_expand_format(struct trace_iterator * iter)3741 char *trace_iter_expand_format(struct trace_iterator *iter)
3742 {
3743 char *tmp;
3744
3745 /*
3746 * iter->tr is NULL when used with tp_printk, which makes
3747 * this get called where it is not safe to call krealloc().
3748 */
3749 if (!iter->tr || iter->fmt == static_fmt_buf)
3750 return NULL;
3751
3752 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3753 GFP_KERNEL);
3754 if (tmp) {
3755 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3756 iter->fmt = tmp;
3757 }
3758
3759 return tmp;
3760 }
3761
3762 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str,bool star,int len)3763 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3764 bool star, int len)
3765 {
3766 unsigned long addr = (unsigned long)str;
3767 struct trace_event *trace_event;
3768 struct trace_event_call *event;
3769
3770 /* Ignore strings with no length */
3771 if (star && !len)
3772 return true;
3773
3774 /* OK if part of the event data */
3775 if ((addr >= (unsigned long)iter->ent) &&
3776 (addr < (unsigned long)iter->ent + iter->ent_size))
3777 return true;
3778
3779 /* OK if part of the temp seq buffer */
3780 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3781 (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3782 return true;
3783
3784 /* Core rodata can not be freed */
3785 if (is_kernel_rodata(addr))
3786 return true;
3787
3788 if (trace_is_tracepoint_string(str))
3789 return true;
3790
3791 /*
3792 * Now this could be a module event, referencing core module
3793 * data, which is OK.
3794 */
3795 if (!iter->ent)
3796 return false;
3797
3798 trace_event = ftrace_find_event(iter->ent->type);
3799 if (!trace_event)
3800 return false;
3801
3802 event = container_of(trace_event, struct trace_event_call, event);
3803 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3804 return false;
3805
3806 /* Would rather have rodata, but this will suffice */
3807 if (within_module_core(addr, event->module))
3808 return true;
3809
3810 return false;
3811 }
3812
show_buffer(struct trace_seq * s)3813 static const char *show_buffer(struct trace_seq *s)
3814 {
3815 struct seq_buf *seq = &s->seq;
3816
3817 seq_buf_terminate(seq);
3818
3819 return seq->buffer;
3820 }
3821
3822 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3823
test_can_verify_check(const char * fmt,...)3824 static int test_can_verify_check(const char *fmt, ...)
3825 {
3826 char buf[16];
3827 va_list ap;
3828 int ret;
3829
3830 /*
3831 * The verifier is dependent on vsnprintf() modifies the va_list
3832 * passed to it, where it is sent as a reference. Some architectures
3833 * (like x86_32) passes it by value, which means that vsnprintf()
3834 * does not modify the va_list passed to it, and the verifier
3835 * would then need to be able to understand all the values that
3836 * vsnprintf can use. If it is passed by value, then the verifier
3837 * is disabled.
3838 */
3839 va_start(ap, fmt);
3840 vsnprintf(buf, 16, "%d", ap);
3841 ret = va_arg(ap, int);
3842 va_end(ap);
3843
3844 return ret;
3845 }
3846
test_can_verify(void)3847 static void test_can_verify(void)
3848 {
3849 if (!test_can_verify_check("%d %d", 0, 1)) {
3850 pr_info("trace event string verifier disabled\n");
3851 static_branch_inc(&trace_no_verify);
3852 }
3853 }
3854
3855 /**
3856 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3857 * @iter: The iterator that holds the seq buffer and the event being printed
3858 * @fmt: The format used to print the event
3859 * @ap: The va_list holding the data to print from @fmt.
3860 *
3861 * This writes the data into the @iter->seq buffer using the data from
3862 * @fmt and @ap. If the format has a %s, then the source of the string
3863 * is examined to make sure it is safe to print, otherwise it will
3864 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3865 * pointer.
3866 */
trace_check_vprintf(struct trace_iterator * iter,const char * fmt,va_list ap)3867 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3868 va_list ap)
3869 {
3870 const char *p = fmt;
3871 const char *str;
3872 int i, j;
3873
3874 if (WARN_ON_ONCE(!fmt))
3875 return;
3876
3877 if (static_branch_unlikely(&trace_no_verify))
3878 goto print;
3879
3880 /* Don't bother checking when doing a ftrace_dump() */
3881 if (iter->fmt == static_fmt_buf)
3882 goto print;
3883
3884 while (*p) {
3885 bool star = false;
3886 int len = 0;
3887
3888 j = 0;
3889
3890 /* We only care about %s and variants */
3891 for (i = 0; p[i]; i++) {
3892 if (i + 1 >= iter->fmt_size) {
3893 /*
3894 * If we can't expand the copy buffer,
3895 * just print it.
3896 */
3897 if (!trace_iter_expand_format(iter))
3898 goto print;
3899 }
3900
3901 if (p[i] == '\\' && p[i+1]) {
3902 i++;
3903 continue;
3904 }
3905 if (p[i] == '%') {
3906 /* Need to test cases like %08.*s */
3907 for (j = 1; p[i+j]; j++) {
3908 if (isdigit(p[i+j]) ||
3909 p[i+j] == '.')
3910 continue;
3911 if (p[i+j] == '*') {
3912 star = true;
3913 continue;
3914 }
3915 break;
3916 }
3917 if (p[i+j] == 's')
3918 break;
3919 star = false;
3920 }
3921 j = 0;
3922 }
3923 /* If no %s found then just print normally */
3924 if (!p[i])
3925 break;
3926
3927 /* Copy up to the %s, and print that */
3928 strncpy(iter->fmt, p, i);
3929 iter->fmt[i] = '\0';
3930 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3931
3932 /*
3933 * If iter->seq is full, the above call no longer guarantees
3934 * that ap is in sync with fmt processing, and further calls
3935 * to va_arg() can return wrong positional arguments.
3936 *
3937 * Ensure that ap is no longer used in this case.
3938 */
3939 if (iter->seq.full) {
3940 p = "";
3941 break;
3942 }
3943
3944 if (star)
3945 len = va_arg(ap, int);
3946
3947 /* The ap now points to the string data of the %s */
3948 str = va_arg(ap, const char *);
3949
3950 /*
3951 * If you hit this warning, it is likely that the
3952 * trace event in question used %s on a string that
3953 * was saved at the time of the event, but may not be
3954 * around when the trace is read. Use __string(),
3955 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3956 * instead. See samples/trace_events/trace-events-sample.h
3957 * for reference.
3958 */
3959 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3960 "fmt: '%s' current_buffer: '%s'",
3961 fmt, show_buffer(&iter->seq))) {
3962 int ret;
3963
3964 /* Try to safely read the string */
3965 if (star) {
3966 if (len + 1 > iter->fmt_size)
3967 len = iter->fmt_size - 1;
3968 if (len < 0)
3969 len = 0;
3970 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3971 iter->fmt[len] = 0;
3972 star = false;
3973 } else {
3974 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3975 iter->fmt_size);
3976 }
3977 if (ret < 0)
3978 trace_seq_printf(&iter->seq, "(0x%px)", str);
3979 else
3980 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3981 str, iter->fmt);
3982 str = "[UNSAFE-MEMORY]";
3983 strcpy(iter->fmt, "%s");
3984 } else {
3985 strncpy(iter->fmt, p + i, j + 1);
3986 iter->fmt[j+1] = '\0';
3987 }
3988 if (star)
3989 trace_seq_printf(&iter->seq, iter->fmt, len, str);
3990 else
3991 trace_seq_printf(&iter->seq, iter->fmt, str);
3992
3993 p += i + j + 1;
3994 }
3995 print:
3996 if (*p)
3997 trace_seq_vprintf(&iter->seq, p, ap);
3998 }
3999
trace_event_format(struct trace_iterator * iter,const char * fmt)4000 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4001 {
4002 const char *p, *new_fmt;
4003 char *q;
4004
4005 if (WARN_ON_ONCE(!fmt))
4006 return fmt;
4007
4008 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4009 return fmt;
4010
4011 p = fmt;
4012 new_fmt = q = iter->fmt;
4013 while (*p) {
4014 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4015 if (!trace_iter_expand_format(iter))
4016 return fmt;
4017
4018 q += iter->fmt - new_fmt;
4019 new_fmt = iter->fmt;
4020 }
4021
4022 *q++ = *p++;
4023
4024 /* Replace %p with %px */
4025 if (p[-1] == '%') {
4026 if (p[0] == '%') {
4027 *q++ = *p++;
4028 } else if (p[0] == 'p' && !isalnum(p[1])) {
4029 *q++ = *p++;
4030 *q++ = 'x';
4031 }
4032 }
4033 }
4034 *q = '\0';
4035
4036 return new_fmt;
4037 }
4038
4039 #define STATIC_TEMP_BUF_SIZE 128
4040 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4041
4042 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)4043 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4044 int *ent_cpu, u64 *ent_ts)
4045 {
4046 /* __find_next_entry will reset ent_size */
4047 int ent_size = iter->ent_size;
4048 struct trace_entry *entry;
4049
4050 /*
4051 * If called from ftrace_dump(), then the iter->temp buffer
4052 * will be the static_temp_buf and not created from kmalloc.
4053 * If the entry size is greater than the buffer, we can
4054 * not save it. Just return NULL in that case. This is only
4055 * used to add markers when two consecutive events' time
4056 * stamps have a large delta. See trace_print_lat_context()
4057 */
4058 if (iter->temp == static_temp_buf &&
4059 STATIC_TEMP_BUF_SIZE < ent_size)
4060 return NULL;
4061
4062 /*
4063 * The __find_next_entry() may call peek_next_entry(), which may
4064 * call ring_buffer_peek() that may make the contents of iter->ent
4065 * undefined. Need to copy iter->ent now.
4066 */
4067 if (iter->ent && iter->ent != iter->temp) {
4068 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4069 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4070 void *temp;
4071 temp = kmalloc(iter->ent_size, GFP_KERNEL);
4072 if (!temp)
4073 return NULL;
4074 kfree(iter->temp);
4075 iter->temp = temp;
4076 iter->temp_size = iter->ent_size;
4077 }
4078 memcpy(iter->temp, iter->ent, iter->ent_size);
4079 iter->ent = iter->temp;
4080 }
4081 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4082 /* Put back the original ent_size */
4083 iter->ent_size = ent_size;
4084
4085 return entry;
4086 }
4087
4088 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)4089 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4090 {
4091 iter->ent = __find_next_entry(iter, &iter->cpu,
4092 &iter->lost_events, &iter->ts);
4093
4094 if (iter->ent)
4095 trace_iterator_increment(iter);
4096
4097 return iter->ent ? iter : NULL;
4098 }
4099
trace_consume(struct trace_iterator * iter)4100 static void trace_consume(struct trace_iterator *iter)
4101 {
4102 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4103 &iter->lost_events);
4104 }
4105
s_next(struct seq_file * m,void * v,loff_t * pos)4106 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4107 {
4108 struct trace_iterator *iter = m->private;
4109 int i = (int)*pos;
4110 void *ent;
4111
4112 WARN_ON_ONCE(iter->leftover);
4113
4114 (*pos)++;
4115
4116 /* can't go backwards */
4117 if (iter->idx > i)
4118 return NULL;
4119
4120 if (iter->idx < 0)
4121 ent = trace_find_next_entry_inc(iter);
4122 else
4123 ent = iter;
4124
4125 while (ent && iter->idx < i)
4126 ent = trace_find_next_entry_inc(iter);
4127
4128 iter->pos = *pos;
4129
4130 return ent;
4131 }
4132
tracing_iter_reset(struct trace_iterator * iter,int cpu)4133 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4134 {
4135 struct ring_buffer_iter *buf_iter;
4136 unsigned long entries = 0;
4137 u64 ts;
4138
4139 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4140
4141 buf_iter = trace_buffer_iter(iter, cpu);
4142 if (!buf_iter)
4143 return;
4144
4145 ring_buffer_iter_reset(buf_iter);
4146
4147 /*
4148 * We could have the case with the max latency tracers
4149 * that a reset never took place on a cpu. This is evident
4150 * by the timestamp being before the start of the buffer.
4151 */
4152 while (ring_buffer_iter_peek(buf_iter, &ts)) {
4153 if (ts >= iter->array_buffer->time_start)
4154 break;
4155 entries++;
4156 ring_buffer_iter_advance(buf_iter);
4157 /* This could be a big loop */
4158 cond_resched();
4159 }
4160
4161 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4162 }
4163
4164 /*
4165 * The current tracer is copied to avoid a global locking
4166 * all around.
4167 */
s_start(struct seq_file * m,loff_t * pos)4168 static void *s_start(struct seq_file *m, loff_t *pos)
4169 {
4170 struct trace_iterator *iter = m->private;
4171 struct trace_array *tr = iter->tr;
4172 int cpu_file = iter->cpu_file;
4173 void *p = NULL;
4174 loff_t l = 0;
4175 int cpu;
4176
4177 mutex_lock(&trace_types_lock);
4178 if (unlikely(tr->current_trace != iter->trace)) {
4179 /* Close iter->trace before switching to the new current tracer */
4180 if (iter->trace->close)
4181 iter->trace->close(iter);
4182 iter->trace = tr->current_trace;
4183 /* Reopen the new current tracer */
4184 if (iter->trace->open)
4185 iter->trace->open(iter);
4186 }
4187 mutex_unlock(&trace_types_lock);
4188
4189 #ifdef CONFIG_TRACER_MAX_TRACE
4190 if (iter->snapshot && iter->trace->use_max_tr)
4191 return ERR_PTR(-EBUSY);
4192 #endif
4193
4194 if (*pos != iter->pos) {
4195 iter->ent = NULL;
4196 iter->cpu = 0;
4197 iter->idx = -1;
4198
4199 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4200 for_each_tracing_cpu(cpu)
4201 tracing_iter_reset(iter, cpu);
4202 } else
4203 tracing_iter_reset(iter, cpu_file);
4204
4205 iter->leftover = 0;
4206 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4207 ;
4208
4209 } else {
4210 /*
4211 * If we overflowed the seq_file before, then we want
4212 * to just reuse the trace_seq buffer again.
4213 */
4214 if (iter->leftover)
4215 p = iter;
4216 else {
4217 l = *pos - 1;
4218 p = s_next(m, p, &l);
4219 }
4220 }
4221
4222 trace_event_read_lock();
4223 trace_access_lock(cpu_file);
4224 return p;
4225 }
4226
s_stop(struct seq_file * m,void * p)4227 static void s_stop(struct seq_file *m, void *p)
4228 {
4229 struct trace_iterator *iter = m->private;
4230
4231 #ifdef CONFIG_TRACER_MAX_TRACE
4232 if (iter->snapshot && iter->trace->use_max_tr)
4233 return;
4234 #endif
4235
4236 trace_access_unlock(iter->cpu_file);
4237 trace_event_read_unlock();
4238 }
4239
4240 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4241 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4242 unsigned long *entries, int cpu)
4243 {
4244 unsigned long count;
4245
4246 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4247 /*
4248 * If this buffer has skipped entries, then we hold all
4249 * entries for the trace and we need to ignore the
4250 * ones before the time stamp.
4251 */
4252 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4253 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4254 /* total is the same as the entries */
4255 *total = count;
4256 } else
4257 *total = count +
4258 ring_buffer_overrun_cpu(buf->buffer, cpu);
4259 *entries = count;
4260 }
4261
4262 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4263 get_total_entries(struct array_buffer *buf,
4264 unsigned long *total, unsigned long *entries)
4265 {
4266 unsigned long t, e;
4267 int cpu;
4268
4269 *total = 0;
4270 *entries = 0;
4271
4272 for_each_tracing_cpu(cpu) {
4273 get_total_entries_cpu(buf, &t, &e, cpu);
4274 *total += t;
4275 *entries += e;
4276 }
4277 }
4278
trace_total_entries_cpu(struct trace_array * tr,int cpu)4279 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4280 {
4281 unsigned long total, entries;
4282
4283 if (!tr)
4284 tr = &global_trace;
4285
4286 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4287
4288 return entries;
4289 }
4290
trace_total_entries(struct trace_array * tr)4291 unsigned long trace_total_entries(struct trace_array *tr)
4292 {
4293 unsigned long total, entries;
4294
4295 if (!tr)
4296 tr = &global_trace;
4297
4298 get_total_entries(&tr->array_buffer, &total, &entries);
4299
4300 return entries;
4301 }
4302
print_lat_help_header(struct seq_file * m)4303 static void print_lat_help_header(struct seq_file *m)
4304 {
4305 seq_puts(m, "# _------=> CPU# \n"
4306 "# / _-----=> irqs-off/BH-disabled\n"
4307 "# | / _----=> need-resched \n"
4308 "# || / _---=> hardirq/softirq \n"
4309 "# ||| / _--=> preempt-depth \n"
4310 "# |||| / _-=> migrate-disable \n"
4311 "# ||||| / delay \n"
4312 "# cmd pid |||||| time | caller \n"
4313 "# \\ / |||||| \\ | / \n");
4314 }
4315
print_event_info(struct array_buffer * buf,struct seq_file * m)4316 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4317 {
4318 unsigned long total;
4319 unsigned long entries;
4320
4321 get_total_entries(buf, &total, &entries);
4322 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4323 entries, total, num_online_cpus());
4324 seq_puts(m, "#\n");
4325 }
4326
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4327 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4328 unsigned int flags)
4329 {
4330 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4331
4332 print_event_info(buf, m);
4333
4334 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4335 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4336 }
4337
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4338 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4339 unsigned int flags)
4340 {
4341 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4342 static const char space[] = " ";
4343 int prec = tgid ? 12 : 2;
4344
4345 print_event_info(buf, m);
4346
4347 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4348 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4349 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4350 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4351 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4352 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4353 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4354 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4355 }
4356
4357 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4358 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4359 {
4360 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4361 struct array_buffer *buf = iter->array_buffer;
4362 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4363 struct tracer *type = iter->trace;
4364 unsigned long entries;
4365 unsigned long total;
4366 const char *name = type->name;
4367
4368 get_total_entries(buf, &total, &entries);
4369
4370 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4371 name, UTS_RELEASE);
4372 seq_puts(m, "# -----------------------------------"
4373 "---------------------------------\n");
4374 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4375 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4376 nsecs_to_usecs(data->saved_latency),
4377 entries,
4378 total,
4379 buf->cpu,
4380 preempt_model_none() ? "server" :
4381 preempt_model_voluntary() ? "desktop" :
4382 preempt_model_full() ? "preempt" :
4383 preempt_model_rt() ? "preempt_rt" :
4384 "unknown",
4385 /* These are reserved for later use */
4386 0, 0, 0, 0);
4387 #ifdef CONFIG_SMP
4388 seq_printf(m, " #P:%d)\n", num_online_cpus());
4389 #else
4390 seq_puts(m, ")\n");
4391 #endif
4392 seq_puts(m, "# -----------------\n");
4393 seq_printf(m, "# | task: %.16s-%d "
4394 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4395 data->comm, data->pid,
4396 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4397 data->policy, data->rt_priority);
4398 seq_puts(m, "# -----------------\n");
4399
4400 if (data->critical_start) {
4401 seq_puts(m, "# => started at: ");
4402 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4403 trace_print_seq(m, &iter->seq);
4404 seq_puts(m, "\n# => ended at: ");
4405 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4406 trace_print_seq(m, &iter->seq);
4407 seq_puts(m, "\n#\n");
4408 }
4409
4410 seq_puts(m, "#\n");
4411 }
4412
test_cpu_buff_start(struct trace_iterator * iter)4413 static void test_cpu_buff_start(struct trace_iterator *iter)
4414 {
4415 struct trace_seq *s = &iter->seq;
4416 struct trace_array *tr = iter->tr;
4417
4418 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4419 return;
4420
4421 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4422 return;
4423
4424 if (cpumask_available(iter->started) &&
4425 cpumask_test_cpu(iter->cpu, iter->started))
4426 return;
4427
4428 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4429 return;
4430
4431 if (cpumask_available(iter->started))
4432 cpumask_set_cpu(iter->cpu, iter->started);
4433
4434 /* Don't print started cpu buffer for the first entry of the trace */
4435 if (iter->idx > 1)
4436 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4437 iter->cpu);
4438 }
4439
print_trace_fmt(struct trace_iterator * iter)4440 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4441 {
4442 struct trace_array *tr = iter->tr;
4443 struct trace_seq *s = &iter->seq;
4444 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4445 struct trace_entry *entry;
4446 struct trace_event *event;
4447
4448 entry = iter->ent;
4449
4450 test_cpu_buff_start(iter);
4451
4452 event = ftrace_find_event(entry->type);
4453
4454 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4455 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4456 trace_print_lat_context(iter);
4457 else
4458 trace_print_context(iter);
4459 }
4460
4461 if (trace_seq_has_overflowed(s))
4462 return TRACE_TYPE_PARTIAL_LINE;
4463
4464 if (event) {
4465 if (tr->trace_flags & TRACE_ITER_FIELDS)
4466 return print_event_fields(iter, event);
4467 return event->funcs->trace(iter, sym_flags, event);
4468 }
4469
4470 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4471
4472 return trace_handle_return(s);
4473 }
4474
print_raw_fmt(struct trace_iterator * iter)4475 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4476 {
4477 struct trace_array *tr = iter->tr;
4478 struct trace_seq *s = &iter->seq;
4479 struct trace_entry *entry;
4480 struct trace_event *event;
4481
4482 entry = iter->ent;
4483
4484 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4485 trace_seq_printf(s, "%d %d %llu ",
4486 entry->pid, iter->cpu, iter->ts);
4487
4488 if (trace_seq_has_overflowed(s))
4489 return TRACE_TYPE_PARTIAL_LINE;
4490
4491 event = ftrace_find_event(entry->type);
4492 if (event)
4493 return event->funcs->raw(iter, 0, event);
4494
4495 trace_seq_printf(s, "%d ?\n", entry->type);
4496
4497 return trace_handle_return(s);
4498 }
4499
print_hex_fmt(struct trace_iterator * iter)4500 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4501 {
4502 struct trace_array *tr = iter->tr;
4503 struct trace_seq *s = &iter->seq;
4504 unsigned char newline = '\n';
4505 struct trace_entry *entry;
4506 struct trace_event *event;
4507
4508 entry = iter->ent;
4509
4510 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4511 SEQ_PUT_HEX_FIELD(s, entry->pid);
4512 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4513 SEQ_PUT_HEX_FIELD(s, iter->ts);
4514 if (trace_seq_has_overflowed(s))
4515 return TRACE_TYPE_PARTIAL_LINE;
4516 }
4517
4518 event = ftrace_find_event(entry->type);
4519 if (event) {
4520 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4521 if (ret != TRACE_TYPE_HANDLED)
4522 return ret;
4523 }
4524
4525 SEQ_PUT_FIELD(s, newline);
4526
4527 return trace_handle_return(s);
4528 }
4529
print_bin_fmt(struct trace_iterator * iter)4530 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4531 {
4532 struct trace_array *tr = iter->tr;
4533 struct trace_seq *s = &iter->seq;
4534 struct trace_entry *entry;
4535 struct trace_event *event;
4536
4537 entry = iter->ent;
4538
4539 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4540 SEQ_PUT_FIELD(s, entry->pid);
4541 SEQ_PUT_FIELD(s, iter->cpu);
4542 SEQ_PUT_FIELD(s, iter->ts);
4543 if (trace_seq_has_overflowed(s))
4544 return TRACE_TYPE_PARTIAL_LINE;
4545 }
4546
4547 event = ftrace_find_event(entry->type);
4548 return event ? event->funcs->binary(iter, 0, event) :
4549 TRACE_TYPE_HANDLED;
4550 }
4551
trace_empty(struct trace_iterator * iter)4552 int trace_empty(struct trace_iterator *iter)
4553 {
4554 struct ring_buffer_iter *buf_iter;
4555 int cpu;
4556
4557 /* If we are looking at one CPU buffer, only check that one */
4558 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4559 cpu = iter->cpu_file;
4560 buf_iter = trace_buffer_iter(iter, cpu);
4561 if (buf_iter) {
4562 if (!ring_buffer_iter_empty(buf_iter))
4563 return 0;
4564 } else {
4565 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4566 return 0;
4567 }
4568 return 1;
4569 }
4570
4571 for_each_tracing_cpu(cpu) {
4572 buf_iter = trace_buffer_iter(iter, cpu);
4573 if (buf_iter) {
4574 if (!ring_buffer_iter_empty(buf_iter))
4575 return 0;
4576 } else {
4577 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4578 return 0;
4579 }
4580 }
4581
4582 return 1;
4583 }
4584
4585 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4586 enum print_line_t print_trace_line(struct trace_iterator *iter)
4587 {
4588 struct trace_array *tr = iter->tr;
4589 unsigned long trace_flags = tr->trace_flags;
4590 enum print_line_t ret;
4591
4592 if (iter->lost_events) {
4593 if (iter->lost_events == (unsigned long)-1)
4594 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4595 iter->cpu);
4596 else
4597 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4598 iter->cpu, iter->lost_events);
4599 if (trace_seq_has_overflowed(&iter->seq))
4600 return TRACE_TYPE_PARTIAL_LINE;
4601 }
4602
4603 if (iter->trace && iter->trace->print_line) {
4604 ret = iter->trace->print_line(iter);
4605 if (ret != TRACE_TYPE_UNHANDLED)
4606 return ret;
4607 }
4608
4609 if (iter->ent->type == TRACE_BPUTS &&
4610 trace_flags & TRACE_ITER_PRINTK &&
4611 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4612 return trace_print_bputs_msg_only(iter);
4613
4614 if (iter->ent->type == TRACE_BPRINT &&
4615 trace_flags & TRACE_ITER_PRINTK &&
4616 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4617 return trace_print_bprintk_msg_only(iter);
4618
4619 if (iter->ent->type == TRACE_PRINT &&
4620 trace_flags & TRACE_ITER_PRINTK &&
4621 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4622 return trace_print_printk_msg_only(iter);
4623
4624 if (trace_flags & TRACE_ITER_BIN)
4625 return print_bin_fmt(iter);
4626
4627 if (trace_flags & TRACE_ITER_HEX)
4628 return print_hex_fmt(iter);
4629
4630 if (trace_flags & TRACE_ITER_RAW)
4631 return print_raw_fmt(iter);
4632
4633 return print_trace_fmt(iter);
4634 }
4635
trace_latency_header(struct seq_file * m)4636 void trace_latency_header(struct seq_file *m)
4637 {
4638 struct trace_iterator *iter = m->private;
4639 struct trace_array *tr = iter->tr;
4640
4641 /* print nothing if the buffers are empty */
4642 if (trace_empty(iter))
4643 return;
4644
4645 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4646 print_trace_header(m, iter);
4647
4648 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4649 print_lat_help_header(m);
4650 }
4651
trace_default_header(struct seq_file * m)4652 void trace_default_header(struct seq_file *m)
4653 {
4654 struct trace_iterator *iter = m->private;
4655 struct trace_array *tr = iter->tr;
4656 unsigned long trace_flags = tr->trace_flags;
4657
4658 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4659 return;
4660
4661 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4662 /* print nothing if the buffers are empty */
4663 if (trace_empty(iter))
4664 return;
4665 print_trace_header(m, iter);
4666 if (!(trace_flags & TRACE_ITER_VERBOSE))
4667 print_lat_help_header(m);
4668 } else {
4669 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4670 if (trace_flags & TRACE_ITER_IRQ_INFO)
4671 print_func_help_header_irq(iter->array_buffer,
4672 m, trace_flags);
4673 else
4674 print_func_help_header(iter->array_buffer, m,
4675 trace_flags);
4676 }
4677 }
4678 }
4679
test_ftrace_alive(struct seq_file * m)4680 static void test_ftrace_alive(struct seq_file *m)
4681 {
4682 if (!ftrace_is_dead())
4683 return;
4684 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4685 "# MAY BE MISSING FUNCTION EVENTS\n");
4686 }
4687
4688 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4689 static void show_snapshot_main_help(struct seq_file *m)
4690 {
4691 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4692 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4693 "# Takes a snapshot of the main buffer.\n"
4694 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4695 "# (Doesn't have to be '2' works with any number that\n"
4696 "# is not a '0' or '1')\n");
4697 }
4698
show_snapshot_percpu_help(struct seq_file * m)4699 static void show_snapshot_percpu_help(struct seq_file *m)
4700 {
4701 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4702 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4703 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4704 "# Takes a snapshot of the main buffer for this cpu.\n");
4705 #else
4706 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4707 "# Must use main snapshot file to allocate.\n");
4708 #endif
4709 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4710 "# (Doesn't have to be '2' works with any number that\n"
4711 "# is not a '0' or '1')\n");
4712 }
4713
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4714 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4715 {
4716 if (iter->tr->allocated_snapshot)
4717 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4718 else
4719 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4720
4721 seq_puts(m, "# Snapshot commands:\n");
4722 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4723 show_snapshot_main_help(m);
4724 else
4725 show_snapshot_percpu_help(m);
4726 }
4727 #else
4728 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4729 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4730 #endif
4731
s_show(struct seq_file * m,void * v)4732 static int s_show(struct seq_file *m, void *v)
4733 {
4734 struct trace_iterator *iter = v;
4735 int ret;
4736
4737 if (iter->ent == NULL) {
4738 if (iter->tr) {
4739 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4740 seq_puts(m, "#\n");
4741 test_ftrace_alive(m);
4742 }
4743 if (iter->snapshot && trace_empty(iter))
4744 print_snapshot_help(m, iter);
4745 else if (iter->trace && iter->trace->print_header)
4746 iter->trace->print_header(m);
4747 else
4748 trace_default_header(m);
4749
4750 } else if (iter->leftover) {
4751 /*
4752 * If we filled the seq_file buffer earlier, we
4753 * want to just show it now.
4754 */
4755 ret = trace_print_seq(m, &iter->seq);
4756
4757 /* ret should this time be zero, but you never know */
4758 iter->leftover = ret;
4759
4760 } else {
4761 ret = print_trace_line(iter);
4762 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4763 iter->seq.full = 0;
4764 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4765 }
4766 ret = trace_print_seq(m, &iter->seq);
4767 /*
4768 * If we overflow the seq_file buffer, then it will
4769 * ask us for this data again at start up.
4770 * Use that instead.
4771 * ret is 0 if seq_file write succeeded.
4772 * -1 otherwise.
4773 */
4774 iter->leftover = ret;
4775 }
4776
4777 return 0;
4778 }
4779
4780 /*
4781 * Should be used after trace_array_get(), trace_types_lock
4782 * ensures that i_cdev was already initialized.
4783 */
tracing_get_cpu(struct inode * inode)4784 static inline int tracing_get_cpu(struct inode *inode)
4785 {
4786 if (inode->i_cdev) /* See trace_create_cpu_file() */
4787 return (long)inode->i_cdev - 1;
4788 return RING_BUFFER_ALL_CPUS;
4789 }
4790
4791 static const struct seq_operations tracer_seq_ops = {
4792 .start = s_start,
4793 .next = s_next,
4794 .stop = s_stop,
4795 .show = s_show,
4796 };
4797
4798 /*
4799 * Note, as iter itself can be allocated and freed in different
4800 * ways, this function is only used to free its content, and not
4801 * the iterator itself. The only requirement to all the allocations
4802 * is that it must zero all fields (kzalloc), as freeing works with
4803 * ethier allocated content or NULL.
4804 */
free_trace_iter_content(struct trace_iterator * iter)4805 static void free_trace_iter_content(struct trace_iterator *iter)
4806 {
4807 /* The fmt is either NULL, allocated or points to static_fmt_buf */
4808 if (iter->fmt != static_fmt_buf)
4809 kfree(iter->fmt);
4810
4811 kfree(iter->temp);
4812 kfree(iter->buffer_iter);
4813 mutex_destroy(&iter->mutex);
4814 free_cpumask_var(iter->started);
4815 }
4816
4817 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4818 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4819 {
4820 struct trace_array *tr = inode->i_private;
4821 struct trace_iterator *iter;
4822 int cpu;
4823
4824 if (tracing_disabled)
4825 return ERR_PTR(-ENODEV);
4826
4827 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4828 if (!iter)
4829 return ERR_PTR(-ENOMEM);
4830
4831 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4832 GFP_KERNEL);
4833 if (!iter->buffer_iter)
4834 goto release;
4835
4836 /*
4837 * trace_find_next_entry() may need to save off iter->ent.
4838 * It will place it into the iter->temp buffer. As most
4839 * events are less than 128, allocate a buffer of that size.
4840 * If one is greater, then trace_find_next_entry() will
4841 * allocate a new buffer to adjust for the bigger iter->ent.
4842 * It's not critical if it fails to get allocated here.
4843 */
4844 iter->temp = kmalloc(128, GFP_KERNEL);
4845 if (iter->temp)
4846 iter->temp_size = 128;
4847
4848 /*
4849 * trace_event_printf() may need to modify given format
4850 * string to replace %p with %px so that it shows real address
4851 * instead of hash value. However, that is only for the event
4852 * tracing, other tracer may not need. Defer the allocation
4853 * until it is needed.
4854 */
4855 iter->fmt = NULL;
4856 iter->fmt_size = 0;
4857
4858 mutex_lock(&trace_types_lock);
4859 iter->trace = tr->current_trace;
4860
4861 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4862 goto fail;
4863
4864 iter->tr = tr;
4865
4866 #ifdef CONFIG_TRACER_MAX_TRACE
4867 /* Currently only the top directory has a snapshot */
4868 if (tr->current_trace->print_max || snapshot)
4869 iter->array_buffer = &tr->max_buffer;
4870 else
4871 #endif
4872 iter->array_buffer = &tr->array_buffer;
4873 iter->snapshot = snapshot;
4874 iter->pos = -1;
4875 iter->cpu_file = tracing_get_cpu(inode);
4876 mutex_init(&iter->mutex);
4877
4878 /* Notify the tracer early; before we stop tracing. */
4879 if (iter->trace->open)
4880 iter->trace->open(iter);
4881
4882 /* Annotate start of buffers if we had overruns */
4883 if (ring_buffer_overruns(iter->array_buffer->buffer))
4884 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4885
4886 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4887 if (trace_clocks[tr->clock_id].in_ns)
4888 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4889
4890 /*
4891 * If pause-on-trace is enabled, then stop the trace while
4892 * dumping, unless this is the "snapshot" file
4893 */
4894 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4895 tracing_stop_tr(tr);
4896
4897 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4898 for_each_tracing_cpu(cpu) {
4899 iter->buffer_iter[cpu] =
4900 ring_buffer_read_prepare(iter->array_buffer->buffer,
4901 cpu, GFP_KERNEL);
4902 }
4903 ring_buffer_read_prepare_sync();
4904 for_each_tracing_cpu(cpu) {
4905 ring_buffer_read_start(iter->buffer_iter[cpu]);
4906 tracing_iter_reset(iter, cpu);
4907 }
4908 } else {
4909 cpu = iter->cpu_file;
4910 iter->buffer_iter[cpu] =
4911 ring_buffer_read_prepare(iter->array_buffer->buffer,
4912 cpu, GFP_KERNEL);
4913 ring_buffer_read_prepare_sync();
4914 ring_buffer_read_start(iter->buffer_iter[cpu]);
4915 tracing_iter_reset(iter, cpu);
4916 }
4917
4918 mutex_unlock(&trace_types_lock);
4919
4920 return iter;
4921
4922 fail:
4923 mutex_unlock(&trace_types_lock);
4924 free_trace_iter_content(iter);
4925 release:
4926 seq_release_private(inode, file);
4927 return ERR_PTR(-ENOMEM);
4928 }
4929
tracing_open_generic(struct inode * inode,struct file * filp)4930 int tracing_open_generic(struct inode *inode, struct file *filp)
4931 {
4932 int ret;
4933
4934 ret = tracing_check_open_get_tr(NULL);
4935 if (ret)
4936 return ret;
4937
4938 filp->private_data = inode->i_private;
4939 return 0;
4940 }
4941
tracing_is_disabled(void)4942 bool tracing_is_disabled(void)
4943 {
4944 return (tracing_disabled) ? true: false;
4945 }
4946
4947 /*
4948 * Open and update trace_array ref count.
4949 * Must have the current trace_array passed to it.
4950 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4951 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4952 {
4953 struct trace_array *tr = inode->i_private;
4954 int ret;
4955
4956 ret = tracing_check_open_get_tr(tr);
4957 if (ret)
4958 return ret;
4959
4960 filp->private_data = inode->i_private;
4961
4962 return 0;
4963 }
4964
4965 /*
4966 * The private pointer of the inode is the trace_event_file.
4967 * Update the tr ref count associated to it.
4968 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4969 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4970 {
4971 struct trace_event_file *file = inode->i_private;
4972 int ret;
4973
4974 ret = tracing_check_open_get_tr(file->tr);
4975 if (ret)
4976 return ret;
4977
4978 mutex_lock(&event_mutex);
4979
4980 /* Fail if the file is marked for removal */
4981 if (file->flags & EVENT_FILE_FL_FREED) {
4982 trace_array_put(file->tr);
4983 ret = -ENODEV;
4984 } else {
4985 event_file_get(file);
4986 }
4987
4988 mutex_unlock(&event_mutex);
4989 if (ret)
4990 return ret;
4991
4992 filp->private_data = inode->i_private;
4993
4994 return 0;
4995 }
4996
tracing_release_file_tr(struct inode * inode,struct file * filp)4997 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4998 {
4999 struct trace_event_file *file = inode->i_private;
5000
5001 trace_array_put(file->tr);
5002 event_file_put(file);
5003
5004 return 0;
5005 }
5006
tracing_single_release_file_tr(struct inode * inode,struct file * filp)5007 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
5008 {
5009 tracing_release_file_tr(inode, filp);
5010 return single_release(inode, filp);
5011 }
5012
tracing_mark_open(struct inode * inode,struct file * filp)5013 static int tracing_mark_open(struct inode *inode, struct file *filp)
5014 {
5015 stream_open(inode, filp);
5016 return tracing_open_generic_tr(inode, filp);
5017 }
5018
tracing_release(struct inode * inode,struct file * file)5019 static int tracing_release(struct inode *inode, struct file *file)
5020 {
5021 struct trace_array *tr = inode->i_private;
5022 struct seq_file *m = file->private_data;
5023 struct trace_iterator *iter;
5024 int cpu;
5025
5026 if (!(file->f_mode & FMODE_READ)) {
5027 trace_array_put(tr);
5028 return 0;
5029 }
5030
5031 /* Writes do not use seq_file */
5032 iter = m->private;
5033 mutex_lock(&trace_types_lock);
5034
5035 for_each_tracing_cpu(cpu) {
5036 if (iter->buffer_iter[cpu])
5037 ring_buffer_read_finish(iter->buffer_iter[cpu]);
5038 }
5039
5040 if (iter->trace && iter->trace->close)
5041 iter->trace->close(iter);
5042
5043 if (!iter->snapshot && tr->stop_count)
5044 /* reenable tracing if it was previously enabled */
5045 tracing_start_tr(tr);
5046
5047 __trace_array_put(tr);
5048
5049 mutex_unlock(&trace_types_lock);
5050
5051 free_trace_iter_content(iter);
5052 seq_release_private(inode, file);
5053
5054 return 0;
5055 }
5056
tracing_release_generic_tr(struct inode * inode,struct file * file)5057 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5058 {
5059 struct trace_array *tr = inode->i_private;
5060
5061 trace_array_put(tr);
5062 return 0;
5063 }
5064
tracing_single_release_tr(struct inode * inode,struct file * file)5065 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5066 {
5067 struct trace_array *tr = inode->i_private;
5068
5069 trace_array_put(tr);
5070
5071 return single_release(inode, file);
5072 }
5073
tracing_open(struct inode * inode,struct file * file)5074 static int tracing_open(struct inode *inode, struct file *file)
5075 {
5076 struct trace_array *tr = inode->i_private;
5077 struct trace_iterator *iter;
5078 int ret;
5079
5080 ret = tracing_check_open_get_tr(tr);
5081 if (ret)
5082 return ret;
5083
5084 /* If this file was open for write, then erase contents */
5085 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5086 int cpu = tracing_get_cpu(inode);
5087 struct array_buffer *trace_buf = &tr->array_buffer;
5088
5089 #ifdef CONFIG_TRACER_MAX_TRACE
5090 if (tr->current_trace->print_max)
5091 trace_buf = &tr->max_buffer;
5092 #endif
5093
5094 if (cpu == RING_BUFFER_ALL_CPUS)
5095 tracing_reset_online_cpus(trace_buf);
5096 else
5097 tracing_reset_cpu(trace_buf, cpu);
5098 }
5099
5100 if (file->f_mode & FMODE_READ) {
5101 iter = __tracing_open(inode, file, false);
5102 if (IS_ERR(iter))
5103 ret = PTR_ERR(iter);
5104 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5105 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5106 }
5107
5108 if (ret < 0)
5109 trace_array_put(tr);
5110
5111 return ret;
5112 }
5113
5114 /*
5115 * Some tracers are not suitable for instance buffers.
5116 * A tracer is always available for the global array (toplevel)
5117 * or if it explicitly states that it is.
5118 */
5119 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)5120 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5121 {
5122 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5123 }
5124
5125 /* Find the next tracer that this trace array may use */
5126 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)5127 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5128 {
5129 while (t && !trace_ok_for_array(t, tr))
5130 t = t->next;
5131
5132 return t;
5133 }
5134
5135 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)5136 t_next(struct seq_file *m, void *v, loff_t *pos)
5137 {
5138 struct trace_array *tr = m->private;
5139 struct tracer *t = v;
5140
5141 (*pos)++;
5142
5143 if (t)
5144 t = get_tracer_for_array(tr, t->next);
5145
5146 return t;
5147 }
5148
t_start(struct seq_file * m,loff_t * pos)5149 static void *t_start(struct seq_file *m, loff_t *pos)
5150 {
5151 struct trace_array *tr = m->private;
5152 struct tracer *t;
5153 loff_t l = 0;
5154
5155 mutex_lock(&trace_types_lock);
5156
5157 t = get_tracer_for_array(tr, trace_types);
5158 for (; t && l < *pos; t = t_next(m, t, &l))
5159 ;
5160
5161 return t;
5162 }
5163
t_stop(struct seq_file * m,void * p)5164 static void t_stop(struct seq_file *m, void *p)
5165 {
5166 mutex_unlock(&trace_types_lock);
5167 }
5168
t_show(struct seq_file * m,void * v)5169 static int t_show(struct seq_file *m, void *v)
5170 {
5171 struct tracer *t = v;
5172
5173 if (!t)
5174 return 0;
5175
5176 seq_puts(m, t->name);
5177 if (t->next)
5178 seq_putc(m, ' ');
5179 else
5180 seq_putc(m, '\n');
5181
5182 return 0;
5183 }
5184
5185 static const struct seq_operations show_traces_seq_ops = {
5186 .start = t_start,
5187 .next = t_next,
5188 .stop = t_stop,
5189 .show = t_show,
5190 };
5191
show_traces_open(struct inode * inode,struct file * file)5192 static int show_traces_open(struct inode *inode, struct file *file)
5193 {
5194 struct trace_array *tr = inode->i_private;
5195 struct seq_file *m;
5196 int ret;
5197
5198 ret = tracing_check_open_get_tr(tr);
5199 if (ret)
5200 return ret;
5201
5202 ret = seq_open(file, &show_traces_seq_ops);
5203 if (ret) {
5204 trace_array_put(tr);
5205 return ret;
5206 }
5207
5208 m = file->private_data;
5209 m->private = tr;
5210
5211 return 0;
5212 }
5213
show_traces_release(struct inode * inode,struct file * file)5214 static int show_traces_release(struct inode *inode, struct file *file)
5215 {
5216 struct trace_array *tr = inode->i_private;
5217
5218 trace_array_put(tr);
5219 return seq_release(inode, file);
5220 }
5221
5222 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5223 tracing_write_stub(struct file *filp, const char __user *ubuf,
5224 size_t count, loff_t *ppos)
5225 {
5226 return count;
5227 }
5228
tracing_lseek(struct file * file,loff_t offset,int whence)5229 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5230 {
5231 int ret;
5232
5233 if (file->f_mode & FMODE_READ)
5234 ret = seq_lseek(file, offset, whence);
5235 else
5236 file->f_pos = ret = 0;
5237
5238 return ret;
5239 }
5240
5241 static const struct file_operations tracing_fops = {
5242 .open = tracing_open,
5243 .read = seq_read,
5244 .read_iter = seq_read_iter,
5245 .splice_read = copy_splice_read,
5246 .write = tracing_write_stub,
5247 .llseek = tracing_lseek,
5248 .release = tracing_release,
5249 };
5250
5251 static const struct file_operations show_traces_fops = {
5252 .open = show_traces_open,
5253 .read = seq_read,
5254 .llseek = seq_lseek,
5255 .release = show_traces_release,
5256 };
5257
5258 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5259 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5260 size_t count, loff_t *ppos)
5261 {
5262 struct trace_array *tr = file_inode(filp)->i_private;
5263 char *mask_str;
5264 int len;
5265
5266 len = snprintf(NULL, 0, "%*pb\n",
5267 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5268 mask_str = kmalloc(len, GFP_KERNEL);
5269 if (!mask_str)
5270 return -ENOMEM;
5271
5272 len = snprintf(mask_str, len, "%*pb\n",
5273 cpumask_pr_args(tr->tracing_cpumask));
5274 if (len >= count) {
5275 count = -EINVAL;
5276 goto out_err;
5277 }
5278 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5279
5280 out_err:
5281 kfree(mask_str);
5282
5283 return count;
5284 }
5285
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5286 int tracing_set_cpumask(struct trace_array *tr,
5287 cpumask_var_t tracing_cpumask_new)
5288 {
5289 int cpu;
5290
5291 if (!tr)
5292 return -EINVAL;
5293
5294 local_irq_disable();
5295 arch_spin_lock(&tr->max_lock);
5296 for_each_tracing_cpu(cpu) {
5297 /*
5298 * Increase/decrease the disabled counter if we are
5299 * about to flip a bit in the cpumask:
5300 */
5301 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5302 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5303 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5304 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5305 #ifdef CONFIG_TRACER_MAX_TRACE
5306 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5307 #endif
5308 }
5309 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5310 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5311 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5312 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5313 #ifdef CONFIG_TRACER_MAX_TRACE
5314 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5315 #endif
5316 }
5317 }
5318 arch_spin_unlock(&tr->max_lock);
5319 local_irq_enable();
5320
5321 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5322
5323 return 0;
5324 }
5325
5326 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5327 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5328 size_t count, loff_t *ppos)
5329 {
5330 struct trace_array *tr = file_inode(filp)->i_private;
5331 cpumask_var_t tracing_cpumask_new;
5332 int err;
5333
5334 if (count == 0 || count > KMALLOC_MAX_SIZE)
5335 return -EINVAL;
5336
5337 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5338 return -ENOMEM;
5339
5340 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5341 if (err)
5342 goto err_free;
5343
5344 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5345 if (err)
5346 goto err_free;
5347
5348 free_cpumask_var(tracing_cpumask_new);
5349
5350 return count;
5351
5352 err_free:
5353 free_cpumask_var(tracing_cpumask_new);
5354
5355 return err;
5356 }
5357
5358 static const struct file_operations tracing_cpumask_fops = {
5359 .open = tracing_open_generic_tr,
5360 .read = tracing_cpumask_read,
5361 .write = tracing_cpumask_write,
5362 .release = tracing_release_generic_tr,
5363 .llseek = generic_file_llseek,
5364 };
5365
tracing_trace_options_show(struct seq_file * m,void * v)5366 static int tracing_trace_options_show(struct seq_file *m, void *v)
5367 {
5368 struct tracer_opt *trace_opts;
5369 struct trace_array *tr = m->private;
5370 u32 tracer_flags;
5371 int i;
5372
5373 mutex_lock(&trace_types_lock);
5374 tracer_flags = tr->current_trace->flags->val;
5375 trace_opts = tr->current_trace->flags->opts;
5376
5377 for (i = 0; trace_options[i]; i++) {
5378 if (tr->trace_flags & (1 << i))
5379 seq_printf(m, "%s\n", trace_options[i]);
5380 else
5381 seq_printf(m, "no%s\n", trace_options[i]);
5382 }
5383
5384 for (i = 0; trace_opts[i].name; i++) {
5385 if (tracer_flags & trace_opts[i].bit)
5386 seq_printf(m, "%s\n", trace_opts[i].name);
5387 else
5388 seq_printf(m, "no%s\n", trace_opts[i].name);
5389 }
5390 mutex_unlock(&trace_types_lock);
5391
5392 return 0;
5393 }
5394
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5395 static int __set_tracer_option(struct trace_array *tr,
5396 struct tracer_flags *tracer_flags,
5397 struct tracer_opt *opts, int neg)
5398 {
5399 struct tracer *trace = tracer_flags->trace;
5400 int ret;
5401
5402 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5403 if (ret)
5404 return ret;
5405
5406 if (neg)
5407 tracer_flags->val &= ~opts->bit;
5408 else
5409 tracer_flags->val |= opts->bit;
5410 return 0;
5411 }
5412
5413 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5414 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5415 {
5416 struct tracer *trace = tr->current_trace;
5417 struct tracer_flags *tracer_flags = trace->flags;
5418 struct tracer_opt *opts = NULL;
5419 int i;
5420
5421 for (i = 0; tracer_flags->opts[i].name; i++) {
5422 opts = &tracer_flags->opts[i];
5423
5424 if (strcmp(cmp, opts->name) == 0)
5425 return __set_tracer_option(tr, trace->flags, opts, neg);
5426 }
5427
5428 return -EINVAL;
5429 }
5430
5431 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5432 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5433 {
5434 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5435 return -1;
5436
5437 return 0;
5438 }
5439
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5440 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5441 {
5442 int *map;
5443
5444 if ((mask == TRACE_ITER_RECORD_TGID) ||
5445 (mask == TRACE_ITER_RECORD_CMD))
5446 lockdep_assert_held(&event_mutex);
5447
5448 /* do nothing if flag is already set */
5449 if (!!(tr->trace_flags & mask) == !!enabled)
5450 return 0;
5451
5452 /* Give the tracer a chance to approve the change */
5453 if (tr->current_trace->flag_changed)
5454 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5455 return -EINVAL;
5456
5457 if (enabled)
5458 tr->trace_flags |= mask;
5459 else
5460 tr->trace_flags &= ~mask;
5461
5462 if (mask == TRACE_ITER_RECORD_CMD)
5463 trace_event_enable_cmd_record(enabled);
5464
5465 if (mask == TRACE_ITER_RECORD_TGID) {
5466 if (!tgid_map) {
5467 tgid_map_max = pid_max;
5468 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5469 GFP_KERNEL);
5470
5471 /*
5472 * Pairs with smp_load_acquire() in
5473 * trace_find_tgid_ptr() to ensure that if it observes
5474 * the tgid_map we just allocated then it also observes
5475 * the corresponding tgid_map_max value.
5476 */
5477 smp_store_release(&tgid_map, map);
5478 }
5479 if (!tgid_map) {
5480 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5481 return -ENOMEM;
5482 }
5483
5484 trace_event_enable_tgid_record(enabled);
5485 }
5486
5487 if (mask == TRACE_ITER_EVENT_FORK)
5488 trace_event_follow_fork(tr, enabled);
5489
5490 if (mask == TRACE_ITER_FUNC_FORK)
5491 ftrace_pid_follow_fork(tr, enabled);
5492
5493 if (mask == TRACE_ITER_OVERWRITE) {
5494 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5495 #ifdef CONFIG_TRACER_MAX_TRACE
5496 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5497 #endif
5498 }
5499
5500 if (mask == TRACE_ITER_PRINTK) {
5501 trace_printk_start_stop_comm(enabled);
5502 trace_printk_control(enabled);
5503 }
5504
5505 return 0;
5506 }
5507
trace_set_options(struct trace_array * tr,char * option)5508 int trace_set_options(struct trace_array *tr, char *option)
5509 {
5510 char *cmp;
5511 int neg = 0;
5512 int ret;
5513 size_t orig_len = strlen(option);
5514 int len;
5515
5516 cmp = strstrip(option);
5517
5518 len = str_has_prefix(cmp, "no");
5519 if (len)
5520 neg = 1;
5521
5522 cmp += len;
5523
5524 mutex_lock(&event_mutex);
5525 mutex_lock(&trace_types_lock);
5526
5527 ret = match_string(trace_options, -1, cmp);
5528 /* If no option could be set, test the specific tracer options */
5529 if (ret < 0)
5530 ret = set_tracer_option(tr, cmp, neg);
5531 else
5532 ret = set_tracer_flag(tr, 1 << ret, !neg);
5533
5534 mutex_unlock(&trace_types_lock);
5535 mutex_unlock(&event_mutex);
5536
5537 /*
5538 * If the first trailing whitespace is replaced with '\0' by strstrip,
5539 * turn it back into a space.
5540 */
5541 if (orig_len > strlen(option))
5542 option[strlen(option)] = ' ';
5543
5544 return ret;
5545 }
5546
apply_trace_boot_options(void)5547 static void __init apply_trace_boot_options(void)
5548 {
5549 char *buf = trace_boot_options_buf;
5550 char *option;
5551
5552 while (true) {
5553 option = strsep(&buf, ",");
5554
5555 if (!option)
5556 break;
5557
5558 if (*option)
5559 trace_set_options(&global_trace, option);
5560
5561 /* Put back the comma to allow this to be called again */
5562 if (buf)
5563 *(buf - 1) = ',';
5564 }
5565 }
5566
5567 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5568 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5569 size_t cnt, loff_t *ppos)
5570 {
5571 struct seq_file *m = filp->private_data;
5572 struct trace_array *tr = m->private;
5573 char buf[64];
5574 int ret;
5575
5576 if (cnt >= sizeof(buf))
5577 return -EINVAL;
5578
5579 if (copy_from_user(buf, ubuf, cnt))
5580 return -EFAULT;
5581
5582 buf[cnt] = 0;
5583
5584 ret = trace_set_options(tr, buf);
5585 if (ret < 0)
5586 return ret;
5587
5588 *ppos += cnt;
5589
5590 return cnt;
5591 }
5592
tracing_trace_options_open(struct inode * inode,struct file * file)5593 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5594 {
5595 struct trace_array *tr = inode->i_private;
5596 int ret;
5597
5598 ret = tracing_check_open_get_tr(tr);
5599 if (ret)
5600 return ret;
5601
5602 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5603 if (ret < 0)
5604 trace_array_put(tr);
5605
5606 return ret;
5607 }
5608
5609 static const struct file_operations tracing_iter_fops = {
5610 .open = tracing_trace_options_open,
5611 .read = seq_read,
5612 .llseek = seq_lseek,
5613 .release = tracing_single_release_tr,
5614 .write = tracing_trace_options_write,
5615 };
5616
5617 static const char readme_msg[] =
5618 "tracing mini-HOWTO:\n\n"
5619 "# echo 0 > tracing_on : quick way to disable tracing\n"
5620 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5621 " Important files:\n"
5622 " trace\t\t\t- The static contents of the buffer\n"
5623 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5624 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5625 " current_tracer\t- function and latency tracers\n"
5626 " available_tracers\t- list of configured tracers for current_tracer\n"
5627 " error_log\t- error log for failed commands (that support it)\n"
5628 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5629 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5630 " trace_clock\t\t- change the clock used to order events\n"
5631 " local: Per cpu clock but may not be synced across CPUs\n"
5632 " global: Synced across CPUs but slows tracing down.\n"
5633 " counter: Not a clock, but just an increment\n"
5634 " uptime: Jiffy counter from time of boot\n"
5635 " perf: Same clock that perf events use\n"
5636 #ifdef CONFIG_X86_64
5637 " x86-tsc: TSC cycle counter\n"
5638 #endif
5639 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5640 " delta: Delta difference against a buffer-wide timestamp\n"
5641 " absolute: Absolute (standalone) timestamp\n"
5642 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5643 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5644 " tracing_cpumask\t- Limit which CPUs to trace\n"
5645 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5646 "\t\t\t Remove sub-buffer with rmdir\n"
5647 " trace_options\t\t- Set format or modify how tracing happens\n"
5648 "\t\t\t Disable an option by prefixing 'no' to the\n"
5649 "\t\t\t option name\n"
5650 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5651 #ifdef CONFIG_DYNAMIC_FTRACE
5652 "\n available_filter_functions - list of functions that can be filtered on\n"
5653 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5654 "\t\t\t functions\n"
5655 "\t accepts: func_full_name or glob-matching-pattern\n"
5656 "\t modules: Can select a group via module\n"
5657 "\t Format: :mod:<module-name>\n"
5658 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5659 "\t triggers: a command to perform when function is hit\n"
5660 "\t Format: <function>:<trigger>[:count]\n"
5661 "\t trigger: traceon, traceoff\n"
5662 "\t\t enable_event:<system>:<event>\n"
5663 "\t\t disable_event:<system>:<event>\n"
5664 #ifdef CONFIG_STACKTRACE
5665 "\t\t stacktrace\n"
5666 #endif
5667 #ifdef CONFIG_TRACER_SNAPSHOT
5668 "\t\t snapshot\n"
5669 #endif
5670 "\t\t dump\n"
5671 "\t\t cpudump\n"
5672 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5673 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5674 "\t The first one will disable tracing every time do_fault is hit\n"
5675 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5676 "\t The first time do trap is hit and it disables tracing, the\n"
5677 "\t counter will decrement to 2. If tracing is already disabled,\n"
5678 "\t the counter will not decrement. It only decrements when the\n"
5679 "\t trigger did work\n"
5680 "\t To remove trigger without count:\n"
5681 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5682 "\t To remove trigger with a count:\n"
5683 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5684 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5685 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5686 "\t modules: Can select a group via module command :mod:\n"
5687 "\t Does not accept triggers\n"
5688 #endif /* CONFIG_DYNAMIC_FTRACE */
5689 #ifdef CONFIG_FUNCTION_TRACER
5690 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5691 "\t\t (function)\n"
5692 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5693 "\t\t (function)\n"
5694 #endif
5695 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5696 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5697 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5698 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5699 #endif
5700 #ifdef CONFIG_TRACER_SNAPSHOT
5701 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5702 "\t\t\t snapshot buffer. Read the contents for more\n"
5703 "\t\t\t information\n"
5704 #endif
5705 #ifdef CONFIG_STACK_TRACER
5706 " stack_trace\t\t- Shows the max stack trace when active\n"
5707 " stack_max_size\t- Shows current max stack size that was traced\n"
5708 "\t\t\t Write into this file to reset the max size (trigger a\n"
5709 "\t\t\t new trace)\n"
5710 #ifdef CONFIG_DYNAMIC_FTRACE
5711 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5712 "\t\t\t traces\n"
5713 #endif
5714 #endif /* CONFIG_STACK_TRACER */
5715 #ifdef CONFIG_DYNAMIC_EVENTS
5716 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5717 "\t\t\t Write into this file to define/undefine new trace events.\n"
5718 #endif
5719 #ifdef CONFIG_KPROBE_EVENTS
5720 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5721 "\t\t\t Write into this file to define/undefine new trace events.\n"
5722 #endif
5723 #ifdef CONFIG_UPROBE_EVENTS
5724 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5725 "\t\t\t Write into this file to define/undefine new trace events.\n"
5726 #endif
5727 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5728 defined(CONFIG_FPROBE_EVENTS)
5729 "\t accepts: event-definitions (one definition per line)\n"
5730 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5731 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5732 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5733 #endif
5734 #ifdef CONFIG_FPROBE_EVENTS
5735 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5736 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5737 #endif
5738 #ifdef CONFIG_HIST_TRIGGERS
5739 "\t s:[synthetic/]<event> <field> [<field>]\n"
5740 #endif
5741 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5742 "\t -:[<group>/][<event>]\n"
5743 #ifdef CONFIG_KPROBE_EVENTS
5744 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5745 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5746 #endif
5747 #ifdef CONFIG_UPROBE_EVENTS
5748 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5749 #endif
5750 "\t args: <name>=fetcharg[:type]\n"
5751 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5752 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5753 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5754 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5755 "\t <argname>[->field[->field|.field...]],\n"
5756 #else
5757 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5758 #endif
5759 #else
5760 "\t $stack<index>, $stack, $retval, $comm,\n"
5761 #endif
5762 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5763 "\t kernel return probes support: $retval, $arg<N>, $comm\n"
5764 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5765 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5766 "\t symstr, <type>\\[<array-size>\\]\n"
5767 #ifdef CONFIG_HIST_TRIGGERS
5768 "\t field: <stype> <name>;\n"
5769 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5770 "\t [unsigned] char/int/long\n"
5771 #endif
5772 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5773 "\t of the <attached-group>/<attached-event>.\n"
5774 #endif
5775 " events/\t\t- Directory containing all trace event subsystems:\n"
5776 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5777 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5778 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5779 "\t\t\t events\n"
5780 " filter\t\t- If set, only events passing filter are traced\n"
5781 " events/<system>/<event>/\t- Directory containing control files for\n"
5782 "\t\t\t <event>:\n"
5783 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5784 " filter\t\t- If set, only events passing filter are traced\n"
5785 " trigger\t\t- If set, a command to perform when event is hit\n"
5786 "\t Format: <trigger>[:count][if <filter>]\n"
5787 "\t trigger: traceon, traceoff\n"
5788 "\t enable_event:<system>:<event>\n"
5789 "\t disable_event:<system>:<event>\n"
5790 #ifdef CONFIG_HIST_TRIGGERS
5791 "\t enable_hist:<system>:<event>\n"
5792 "\t disable_hist:<system>:<event>\n"
5793 #endif
5794 #ifdef CONFIG_STACKTRACE
5795 "\t\t stacktrace\n"
5796 #endif
5797 #ifdef CONFIG_TRACER_SNAPSHOT
5798 "\t\t snapshot\n"
5799 #endif
5800 #ifdef CONFIG_HIST_TRIGGERS
5801 "\t\t hist (see below)\n"
5802 #endif
5803 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5804 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5805 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5806 "\t events/block/block_unplug/trigger\n"
5807 "\t The first disables tracing every time block_unplug is hit.\n"
5808 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5809 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5810 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5811 "\t Like function triggers, the counter is only decremented if it\n"
5812 "\t enabled or disabled tracing.\n"
5813 "\t To remove a trigger without a count:\n"
5814 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5815 "\t To remove a trigger with a count:\n"
5816 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5817 "\t Filters can be ignored when removing a trigger.\n"
5818 #ifdef CONFIG_HIST_TRIGGERS
5819 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5820 "\t Format: hist:keys=<field1[,field2,...]>\n"
5821 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5822 "\t [:values=<field1[,field2,...]>]\n"
5823 "\t [:sort=<field1[,field2,...]>]\n"
5824 "\t [:size=#entries]\n"
5825 "\t [:pause][:continue][:clear]\n"
5826 "\t [:name=histname1]\n"
5827 "\t [:nohitcount]\n"
5828 "\t [:<handler>.<action>]\n"
5829 "\t [if <filter>]\n\n"
5830 "\t Note, special fields can be used as well:\n"
5831 "\t common_timestamp - to record current timestamp\n"
5832 "\t common_cpu - to record the CPU the event happened on\n"
5833 "\n"
5834 "\t A hist trigger variable can be:\n"
5835 "\t - a reference to a field e.g. x=current_timestamp,\n"
5836 "\t - a reference to another variable e.g. y=$x,\n"
5837 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5838 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5839 "\n"
5840 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5841 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5842 "\t variable reference, field or numeric literal.\n"
5843 "\n"
5844 "\t When a matching event is hit, an entry is added to a hash\n"
5845 "\t table using the key(s) and value(s) named, and the value of a\n"
5846 "\t sum called 'hitcount' is incremented. Keys and values\n"
5847 "\t correspond to fields in the event's format description. Keys\n"
5848 "\t can be any field, or the special string 'common_stacktrace'.\n"
5849 "\t Compound keys consisting of up to two fields can be specified\n"
5850 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5851 "\t fields. Sort keys consisting of up to two fields can be\n"
5852 "\t specified using the 'sort' keyword. The sort direction can\n"
5853 "\t be modified by appending '.descending' or '.ascending' to a\n"
5854 "\t sort field. The 'size' parameter can be used to specify more\n"
5855 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5856 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5857 "\t its histogram data will be shared with other triggers of the\n"
5858 "\t same name, and trigger hits will update this common data.\n\n"
5859 "\t Reading the 'hist' file for the event will dump the hash\n"
5860 "\t table in its entirety to stdout. If there are multiple hist\n"
5861 "\t triggers attached to an event, there will be a table for each\n"
5862 "\t trigger in the output. The table displayed for a named\n"
5863 "\t trigger will be the same as any other instance having the\n"
5864 "\t same name. The default format used to display a given field\n"
5865 "\t can be modified by appending any of the following modifiers\n"
5866 "\t to the field name, as applicable:\n\n"
5867 "\t .hex display a number as a hex value\n"
5868 "\t .sym display an address as a symbol\n"
5869 "\t .sym-offset display an address as a symbol and offset\n"
5870 "\t .execname display a common_pid as a program name\n"
5871 "\t .syscall display a syscall id as a syscall name\n"
5872 "\t .log2 display log2 value rather than raw number\n"
5873 "\t .buckets=size display values in groups of size rather than raw number\n"
5874 "\t .usecs display a common_timestamp in microseconds\n"
5875 "\t .percent display a number of percentage value\n"
5876 "\t .graph display a bar-graph of a value\n\n"
5877 "\t The 'pause' parameter can be used to pause an existing hist\n"
5878 "\t trigger or to start a hist trigger but not log any events\n"
5879 "\t until told to do so. 'continue' can be used to start or\n"
5880 "\t restart a paused hist trigger.\n\n"
5881 "\t The 'clear' parameter will clear the contents of a running\n"
5882 "\t hist trigger and leave its current paused/active state\n"
5883 "\t unchanged.\n\n"
5884 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5885 "\t raw hitcount in the histogram.\n\n"
5886 "\t The enable_hist and disable_hist triggers can be used to\n"
5887 "\t have one event conditionally start and stop another event's\n"
5888 "\t already-attached hist trigger. The syntax is analogous to\n"
5889 "\t the enable_event and disable_event triggers.\n\n"
5890 "\t Hist trigger handlers and actions are executed whenever a\n"
5891 "\t a histogram entry is added or updated. They take the form:\n\n"
5892 "\t <handler>.<action>\n\n"
5893 "\t The available handlers are:\n\n"
5894 "\t onmatch(matching.event) - invoke on addition or update\n"
5895 "\t onmax(var) - invoke if var exceeds current max\n"
5896 "\t onchange(var) - invoke action if var changes\n\n"
5897 "\t The available actions are:\n\n"
5898 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5899 "\t save(field,...) - save current event fields\n"
5900 #ifdef CONFIG_TRACER_SNAPSHOT
5901 "\t snapshot() - snapshot the trace buffer\n\n"
5902 #endif
5903 #ifdef CONFIG_SYNTH_EVENTS
5904 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5905 "\t Write into this file to define/undefine new synthetic events.\n"
5906 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5907 #endif
5908 #endif
5909 ;
5910
5911 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5912 tracing_readme_read(struct file *filp, char __user *ubuf,
5913 size_t cnt, loff_t *ppos)
5914 {
5915 return simple_read_from_buffer(ubuf, cnt, ppos,
5916 readme_msg, strlen(readme_msg));
5917 }
5918
5919 static const struct file_operations tracing_readme_fops = {
5920 .open = tracing_open_generic,
5921 .read = tracing_readme_read,
5922 .llseek = generic_file_llseek,
5923 };
5924
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5925 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5926 {
5927 int pid = ++(*pos);
5928
5929 return trace_find_tgid_ptr(pid);
5930 }
5931
saved_tgids_start(struct seq_file * m,loff_t * pos)5932 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5933 {
5934 int pid = *pos;
5935
5936 return trace_find_tgid_ptr(pid);
5937 }
5938
saved_tgids_stop(struct seq_file * m,void * v)5939 static void saved_tgids_stop(struct seq_file *m, void *v)
5940 {
5941 }
5942
saved_tgids_show(struct seq_file * m,void * v)5943 static int saved_tgids_show(struct seq_file *m, void *v)
5944 {
5945 int *entry = (int *)v;
5946 int pid = entry - tgid_map;
5947 int tgid = *entry;
5948
5949 if (tgid == 0)
5950 return SEQ_SKIP;
5951
5952 seq_printf(m, "%d %d\n", pid, tgid);
5953 return 0;
5954 }
5955
5956 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5957 .start = saved_tgids_start,
5958 .stop = saved_tgids_stop,
5959 .next = saved_tgids_next,
5960 .show = saved_tgids_show,
5961 };
5962
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5963 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5964 {
5965 int ret;
5966
5967 ret = tracing_check_open_get_tr(NULL);
5968 if (ret)
5969 return ret;
5970
5971 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5972 }
5973
5974
5975 static const struct file_operations tracing_saved_tgids_fops = {
5976 .open = tracing_saved_tgids_open,
5977 .read = seq_read,
5978 .llseek = seq_lseek,
5979 .release = seq_release,
5980 };
5981
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5982 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5983 {
5984 unsigned int *ptr = v;
5985
5986 if (*pos || m->count)
5987 ptr++;
5988
5989 (*pos)++;
5990
5991 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5992 ptr++) {
5993 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5994 continue;
5995
5996 return ptr;
5997 }
5998
5999 return NULL;
6000 }
6001
saved_cmdlines_start(struct seq_file * m,loff_t * pos)6002 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
6003 {
6004 void *v;
6005 loff_t l = 0;
6006
6007 preempt_disable();
6008 arch_spin_lock(&trace_cmdline_lock);
6009
6010 v = &savedcmd->map_cmdline_to_pid[0];
6011 while (l <= *pos) {
6012 v = saved_cmdlines_next(m, v, &l);
6013 if (!v)
6014 return NULL;
6015 }
6016
6017 return v;
6018 }
6019
saved_cmdlines_stop(struct seq_file * m,void * v)6020 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6021 {
6022 arch_spin_unlock(&trace_cmdline_lock);
6023 preempt_enable();
6024 }
6025
saved_cmdlines_show(struct seq_file * m,void * v)6026 static int saved_cmdlines_show(struct seq_file *m, void *v)
6027 {
6028 char buf[TASK_COMM_LEN];
6029 unsigned int *pid = v;
6030
6031 __trace_find_cmdline(*pid, buf);
6032 seq_printf(m, "%d %s\n", *pid, buf);
6033 return 0;
6034 }
6035
6036 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6037 .start = saved_cmdlines_start,
6038 .next = saved_cmdlines_next,
6039 .stop = saved_cmdlines_stop,
6040 .show = saved_cmdlines_show,
6041 };
6042
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)6043 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6044 {
6045 int ret;
6046
6047 ret = tracing_check_open_get_tr(NULL);
6048 if (ret)
6049 return ret;
6050
6051 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6052 }
6053
6054 static const struct file_operations tracing_saved_cmdlines_fops = {
6055 .open = tracing_saved_cmdlines_open,
6056 .read = seq_read,
6057 .llseek = seq_lseek,
6058 .release = seq_release,
6059 };
6060
6061 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6062 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6063 size_t cnt, loff_t *ppos)
6064 {
6065 char buf[64];
6066 int r;
6067
6068 preempt_disable();
6069 arch_spin_lock(&trace_cmdline_lock);
6070 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6071 arch_spin_unlock(&trace_cmdline_lock);
6072 preempt_enable();
6073
6074 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6075 }
6076
tracing_resize_saved_cmdlines(unsigned int val)6077 static int tracing_resize_saved_cmdlines(unsigned int val)
6078 {
6079 struct saved_cmdlines_buffer *s, *savedcmd_temp;
6080
6081 s = allocate_cmdlines_buffer(val);
6082 if (!s)
6083 return -ENOMEM;
6084
6085 preempt_disable();
6086 arch_spin_lock(&trace_cmdline_lock);
6087 savedcmd_temp = savedcmd;
6088 savedcmd = s;
6089 arch_spin_unlock(&trace_cmdline_lock);
6090 preempt_enable();
6091 free_saved_cmdlines_buffer(savedcmd_temp);
6092
6093 return 0;
6094 }
6095
6096 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6097 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6098 size_t cnt, loff_t *ppos)
6099 {
6100 unsigned long val;
6101 int ret;
6102
6103 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6104 if (ret)
6105 return ret;
6106
6107 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6108 if (!val || val > PID_MAX_DEFAULT)
6109 return -EINVAL;
6110
6111 ret = tracing_resize_saved_cmdlines((unsigned int)val);
6112 if (ret < 0)
6113 return ret;
6114
6115 *ppos += cnt;
6116
6117 return cnt;
6118 }
6119
6120 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6121 .open = tracing_open_generic,
6122 .read = tracing_saved_cmdlines_size_read,
6123 .write = tracing_saved_cmdlines_size_write,
6124 };
6125
6126 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6127 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)6128 update_eval_map(union trace_eval_map_item *ptr)
6129 {
6130 if (!ptr->map.eval_string) {
6131 if (ptr->tail.next) {
6132 ptr = ptr->tail.next;
6133 /* Set ptr to the next real item (skip head) */
6134 ptr++;
6135 } else
6136 return NULL;
6137 }
6138 return ptr;
6139 }
6140
eval_map_next(struct seq_file * m,void * v,loff_t * pos)6141 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6142 {
6143 union trace_eval_map_item *ptr = v;
6144
6145 /*
6146 * Paranoid! If ptr points to end, we don't want to increment past it.
6147 * This really should never happen.
6148 */
6149 (*pos)++;
6150 ptr = update_eval_map(ptr);
6151 if (WARN_ON_ONCE(!ptr))
6152 return NULL;
6153
6154 ptr++;
6155 ptr = update_eval_map(ptr);
6156
6157 return ptr;
6158 }
6159
eval_map_start(struct seq_file * m,loff_t * pos)6160 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6161 {
6162 union trace_eval_map_item *v;
6163 loff_t l = 0;
6164
6165 mutex_lock(&trace_eval_mutex);
6166
6167 v = trace_eval_maps;
6168 if (v)
6169 v++;
6170
6171 while (v && l < *pos) {
6172 v = eval_map_next(m, v, &l);
6173 }
6174
6175 return v;
6176 }
6177
eval_map_stop(struct seq_file * m,void * v)6178 static void eval_map_stop(struct seq_file *m, void *v)
6179 {
6180 mutex_unlock(&trace_eval_mutex);
6181 }
6182
eval_map_show(struct seq_file * m,void * v)6183 static int eval_map_show(struct seq_file *m, void *v)
6184 {
6185 union trace_eval_map_item *ptr = v;
6186
6187 seq_printf(m, "%s %ld (%s)\n",
6188 ptr->map.eval_string, ptr->map.eval_value,
6189 ptr->map.system);
6190
6191 return 0;
6192 }
6193
6194 static const struct seq_operations tracing_eval_map_seq_ops = {
6195 .start = eval_map_start,
6196 .next = eval_map_next,
6197 .stop = eval_map_stop,
6198 .show = eval_map_show,
6199 };
6200
tracing_eval_map_open(struct inode * inode,struct file * filp)6201 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6202 {
6203 int ret;
6204
6205 ret = tracing_check_open_get_tr(NULL);
6206 if (ret)
6207 return ret;
6208
6209 return seq_open(filp, &tracing_eval_map_seq_ops);
6210 }
6211
6212 static const struct file_operations tracing_eval_map_fops = {
6213 .open = tracing_eval_map_open,
6214 .read = seq_read,
6215 .llseek = seq_lseek,
6216 .release = seq_release,
6217 };
6218
6219 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)6220 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6221 {
6222 /* Return tail of array given the head */
6223 return ptr + ptr->head.length + 1;
6224 }
6225
6226 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6227 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6228 int len)
6229 {
6230 struct trace_eval_map **stop;
6231 struct trace_eval_map **map;
6232 union trace_eval_map_item *map_array;
6233 union trace_eval_map_item *ptr;
6234
6235 stop = start + len;
6236
6237 /*
6238 * The trace_eval_maps contains the map plus a head and tail item,
6239 * where the head holds the module and length of array, and the
6240 * tail holds a pointer to the next list.
6241 */
6242 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6243 if (!map_array) {
6244 pr_warn("Unable to allocate trace eval mapping\n");
6245 return;
6246 }
6247
6248 mutex_lock(&trace_eval_mutex);
6249
6250 if (!trace_eval_maps)
6251 trace_eval_maps = map_array;
6252 else {
6253 ptr = trace_eval_maps;
6254 for (;;) {
6255 ptr = trace_eval_jmp_to_tail(ptr);
6256 if (!ptr->tail.next)
6257 break;
6258 ptr = ptr->tail.next;
6259
6260 }
6261 ptr->tail.next = map_array;
6262 }
6263 map_array->head.mod = mod;
6264 map_array->head.length = len;
6265 map_array++;
6266
6267 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6268 map_array->map = **map;
6269 map_array++;
6270 }
6271 memset(map_array, 0, sizeof(*map_array));
6272
6273 mutex_unlock(&trace_eval_mutex);
6274 }
6275
trace_create_eval_file(struct dentry * d_tracer)6276 static void trace_create_eval_file(struct dentry *d_tracer)
6277 {
6278 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6279 NULL, &tracing_eval_map_fops);
6280 }
6281
6282 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)6283 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6284 static inline void trace_insert_eval_map_file(struct module *mod,
6285 struct trace_eval_map **start, int len) { }
6286 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6287
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)6288 static void trace_insert_eval_map(struct module *mod,
6289 struct trace_eval_map **start, int len)
6290 {
6291 struct trace_eval_map **map;
6292
6293 if (len <= 0)
6294 return;
6295
6296 map = start;
6297
6298 trace_event_eval_update(map, len);
6299
6300 trace_insert_eval_map_file(mod, start, len);
6301 }
6302
6303 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6304 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6305 size_t cnt, loff_t *ppos)
6306 {
6307 struct trace_array *tr = filp->private_data;
6308 char buf[MAX_TRACER_SIZE+2];
6309 int r;
6310
6311 mutex_lock(&trace_types_lock);
6312 r = sprintf(buf, "%s\n", tr->current_trace->name);
6313 mutex_unlock(&trace_types_lock);
6314
6315 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6316 }
6317
tracer_init(struct tracer * t,struct trace_array * tr)6318 int tracer_init(struct tracer *t, struct trace_array *tr)
6319 {
6320 tracing_reset_online_cpus(&tr->array_buffer);
6321 return t->init(tr);
6322 }
6323
set_buffer_entries(struct array_buffer * buf,unsigned long val)6324 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6325 {
6326 int cpu;
6327
6328 for_each_tracing_cpu(cpu)
6329 per_cpu_ptr(buf->data, cpu)->entries = val;
6330 }
6331
update_buffer_entries(struct array_buffer * buf,int cpu)6332 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6333 {
6334 if (cpu == RING_BUFFER_ALL_CPUS) {
6335 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6336 } else {
6337 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6338 }
6339 }
6340
6341 #ifdef CONFIG_TRACER_MAX_TRACE
6342 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)6343 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6344 struct array_buffer *size_buf, int cpu_id)
6345 {
6346 int cpu, ret = 0;
6347
6348 if (cpu_id == RING_BUFFER_ALL_CPUS) {
6349 for_each_tracing_cpu(cpu) {
6350 ret = ring_buffer_resize(trace_buf->buffer,
6351 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6352 if (ret < 0)
6353 break;
6354 per_cpu_ptr(trace_buf->data, cpu)->entries =
6355 per_cpu_ptr(size_buf->data, cpu)->entries;
6356 }
6357 } else {
6358 ret = ring_buffer_resize(trace_buf->buffer,
6359 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6360 if (ret == 0)
6361 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6362 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6363 }
6364
6365 return ret;
6366 }
6367 #endif /* CONFIG_TRACER_MAX_TRACE */
6368
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6369 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6370 unsigned long size, int cpu)
6371 {
6372 int ret;
6373
6374 /*
6375 * If kernel or user changes the size of the ring buffer
6376 * we use the size that was given, and we can forget about
6377 * expanding it later.
6378 */
6379 ring_buffer_expanded = true;
6380
6381 /* May be called before buffers are initialized */
6382 if (!tr->array_buffer.buffer)
6383 return 0;
6384
6385 /* Do not allow tracing while resizing ring buffer */
6386 tracing_stop_tr(tr);
6387
6388 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6389 if (ret < 0)
6390 goto out_start;
6391
6392 #ifdef CONFIG_TRACER_MAX_TRACE
6393 if (!tr->allocated_snapshot)
6394 goto out;
6395
6396 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6397 if (ret < 0) {
6398 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6399 &tr->array_buffer, cpu);
6400 if (r < 0) {
6401 /*
6402 * AARGH! We are left with different
6403 * size max buffer!!!!
6404 * The max buffer is our "snapshot" buffer.
6405 * When a tracer needs a snapshot (one of the
6406 * latency tracers), it swaps the max buffer
6407 * with the saved snap shot. We succeeded to
6408 * update the size of the main buffer, but failed to
6409 * update the size of the max buffer. But when we tried
6410 * to reset the main buffer to the original size, we
6411 * failed there too. This is very unlikely to
6412 * happen, but if it does, warn and kill all
6413 * tracing.
6414 */
6415 WARN_ON(1);
6416 tracing_disabled = 1;
6417 }
6418 goto out_start;
6419 }
6420
6421 update_buffer_entries(&tr->max_buffer, cpu);
6422
6423 out:
6424 #endif /* CONFIG_TRACER_MAX_TRACE */
6425
6426 update_buffer_entries(&tr->array_buffer, cpu);
6427 out_start:
6428 tracing_start_tr(tr);
6429 return ret;
6430 }
6431
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6432 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6433 unsigned long size, int cpu_id)
6434 {
6435 int ret;
6436
6437 mutex_lock(&trace_types_lock);
6438
6439 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6440 /* make sure, this cpu is enabled in the mask */
6441 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6442 ret = -EINVAL;
6443 goto out;
6444 }
6445 }
6446
6447 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6448 if (ret < 0)
6449 ret = -ENOMEM;
6450
6451 out:
6452 mutex_unlock(&trace_types_lock);
6453
6454 return ret;
6455 }
6456
6457
6458 /**
6459 * tracing_update_buffers - used by tracing facility to expand ring buffers
6460 *
6461 * To save on memory when the tracing is never used on a system with it
6462 * configured in. The ring buffers are set to a minimum size. But once
6463 * a user starts to use the tracing facility, then they need to grow
6464 * to their default size.
6465 *
6466 * This function is to be called when a tracer is about to be used.
6467 */
tracing_update_buffers(void)6468 int tracing_update_buffers(void)
6469 {
6470 int ret = 0;
6471
6472 mutex_lock(&trace_types_lock);
6473 if (!ring_buffer_expanded)
6474 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6475 RING_BUFFER_ALL_CPUS);
6476 mutex_unlock(&trace_types_lock);
6477
6478 return ret;
6479 }
6480
6481 struct trace_option_dentry;
6482
6483 static void
6484 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6485
6486 /*
6487 * Used to clear out the tracer before deletion of an instance.
6488 * Must have trace_types_lock held.
6489 */
tracing_set_nop(struct trace_array * tr)6490 static void tracing_set_nop(struct trace_array *tr)
6491 {
6492 if (tr->current_trace == &nop_trace)
6493 return;
6494
6495 tr->current_trace->enabled--;
6496
6497 if (tr->current_trace->reset)
6498 tr->current_trace->reset(tr);
6499
6500 tr->current_trace = &nop_trace;
6501 }
6502
6503 static bool tracer_options_updated;
6504
add_tracer_options(struct trace_array * tr,struct tracer * t)6505 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6506 {
6507 /* Only enable if the directory has been created already. */
6508 if (!tr->dir)
6509 return;
6510
6511 /* Only create trace option files after update_tracer_options finish */
6512 if (!tracer_options_updated)
6513 return;
6514
6515 create_trace_option_files(tr, t);
6516 }
6517
tracing_set_tracer(struct trace_array * tr,const char * buf)6518 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6519 {
6520 struct tracer *t;
6521 #ifdef CONFIG_TRACER_MAX_TRACE
6522 bool had_max_tr;
6523 #endif
6524 int ret = 0;
6525
6526 mutex_lock(&trace_types_lock);
6527
6528 if (!ring_buffer_expanded) {
6529 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6530 RING_BUFFER_ALL_CPUS);
6531 if (ret < 0)
6532 goto out;
6533 ret = 0;
6534 }
6535
6536 for (t = trace_types; t; t = t->next) {
6537 if (strcmp(t->name, buf) == 0)
6538 break;
6539 }
6540 if (!t) {
6541 ret = -EINVAL;
6542 goto out;
6543 }
6544 if (t == tr->current_trace)
6545 goto out;
6546
6547 #ifdef CONFIG_TRACER_SNAPSHOT
6548 if (t->use_max_tr) {
6549 local_irq_disable();
6550 arch_spin_lock(&tr->max_lock);
6551 if (tr->cond_snapshot)
6552 ret = -EBUSY;
6553 arch_spin_unlock(&tr->max_lock);
6554 local_irq_enable();
6555 if (ret)
6556 goto out;
6557 }
6558 #endif
6559 /* Some tracers won't work on kernel command line */
6560 if (system_state < SYSTEM_RUNNING && t->noboot) {
6561 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6562 t->name);
6563 goto out;
6564 }
6565
6566 /* Some tracers are only allowed for the top level buffer */
6567 if (!trace_ok_for_array(t, tr)) {
6568 ret = -EINVAL;
6569 goto out;
6570 }
6571
6572 /* If trace pipe files are being read, we can't change the tracer */
6573 if (tr->trace_ref) {
6574 ret = -EBUSY;
6575 goto out;
6576 }
6577
6578 trace_branch_disable();
6579
6580 tr->current_trace->enabled--;
6581
6582 if (tr->current_trace->reset)
6583 tr->current_trace->reset(tr);
6584
6585 #ifdef CONFIG_TRACER_MAX_TRACE
6586 had_max_tr = tr->current_trace->use_max_tr;
6587
6588 /* Current trace needs to be nop_trace before synchronize_rcu */
6589 tr->current_trace = &nop_trace;
6590
6591 if (had_max_tr && !t->use_max_tr) {
6592 /*
6593 * We need to make sure that the update_max_tr sees that
6594 * current_trace changed to nop_trace to keep it from
6595 * swapping the buffers after we resize it.
6596 * The update_max_tr is called from interrupts disabled
6597 * so a synchronized_sched() is sufficient.
6598 */
6599 synchronize_rcu();
6600 free_snapshot(tr);
6601 }
6602
6603 if (t->use_max_tr && !tr->allocated_snapshot) {
6604 ret = tracing_alloc_snapshot_instance(tr);
6605 if (ret < 0)
6606 goto out;
6607 }
6608 #else
6609 tr->current_trace = &nop_trace;
6610 #endif
6611
6612 if (t->init) {
6613 ret = tracer_init(t, tr);
6614 if (ret)
6615 goto out;
6616 }
6617
6618 tr->current_trace = t;
6619 tr->current_trace->enabled++;
6620 trace_branch_enable(tr);
6621 out:
6622 mutex_unlock(&trace_types_lock);
6623
6624 return ret;
6625 }
6626
6627 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6628 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6629 size_t cnt, loff_t *ppos)
6630 {
6631 struct trace_array *tr = filp->private_data;
6632 char buf[MAX_TRACER_SIZE+1];
6633 char *name;
6634 size_t ret;
6635 int err;
6636
6637 ret = cnt;
6638
6639 if (cnt > MAX_TRACER_SIZE)
6640 cnt = MAX_TRACER_SIZE;
6641
6642 if (copy_from_user(buf, ubuf, cnt))
6643 return -EFAULT;
6644
6645 buf[cnt] = 0;
6646
6647 name = strim(buf);
6648
6649 err = tracing_set_tracer(tr, name);
6650 if (err)
6651 return err;
6652
6653 *ppos += ret;
6654
6655 return ret;
6656 }
6657
6658 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6659 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6660 size_t cnt, loff_t *ppos)
6661 {
6662 char buf[64];
6663 int r;
6664
6665 r = snprintf(buf, sizeof(buf), "%ld\n",
6666 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6667 if (r > sizeof(buf))
6668 r = sizeof(buf);
6669 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6670 }
6671
6672 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6673 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6674 size_t cnt, loff_t *ppos)
6675 {
6676 unsigned long val;
6677 int ret;
6678
6679 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6680 if (ret)
6681 return ret;
6682
6683 *ptr = val * 1000;
6684
6685 return cnt;
6686 }
6687
6688 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6689 tracing_thresh_read(struct file *filp, char __user *ubuf,
6690 size_t cnt, loff_t *ppos)
6691 {
6692 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6693 }
6694
6695 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6696 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6697 size_t cnt, loff_t *ppos)
6698 {
6699 struct trace_array *tr = filp->private_data;
6700 int ret;
6701
6702 mutex_lock(&trace_types_lock);
6703 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6704 if (ret < 0)
6705 goto out;
6706
6707 if (tr->current_trace->update_thresh) {
6708 ret = tr->current_trace->update_thresh(tr);
6709 if (ret < 0)
6710 goto out;
6711 }
6712
6713 ret = cnt;
6714 out:
6715 mutex_unlock(&trace_types_lock);
6716
6717 return ret;
6718 }
6719
6720 #ifdef CONFIG_TRACER_MAX_TRACE
6721
6722 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6723 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6724 size_t cnt, loff_t *ppos)
6725 {
6726 struct trace_array *tr = filp->private_data;
6727
6728 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6729 }
6730
6731 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6732 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6733 size_t cnt, loff_t *ppos)
6734 {
6735 struct trace_array *tr = filp->private_data;
6736
6737 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6738 }
6739
6740 #endif
6741
open_pipe_on_cpu(struct trace_array * tr,int cpu)6742 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6743 {
6744 if (cpu == RING_BUFFER_ALL_CPUS) {
6745 if (cpumask_empty(tr->pipe_cpumask)) {
6746 cpumask_setall(tr->pipe_cpumask);
6747 return 0;
6748 }
6749 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6750 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6751 return 0;
6752 }
6753 return -EBUSY;
6754 }
6755
close_pipe_on_cpu(struct trace_array * tr,int cpu)6756 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6757 {
6758 if (cpu == RING_BUFFER_ALL_CPUS) {
6759 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6760 cpumask_clear(tr->pipe_cpumask);
6761 } else {
6762 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6763 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6764 }
6765 }
6766
tracing_open_pipe(struct inode * inode,struct file * filp)6767 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6768 {
6769 struct trace_array *tr = inode->i_private;
6770 struct trace_iterator *iter;
6771 int cpu;
6772 int ret;
6773
6774 ret = tracing_check_open_get_tr(tr);
6775 if (ret)
6776 return ret;
6777
6778 mutex_lock(&trace_types_lock);
6779 cpu = tracing_get_cpu(inode);
6780 ret = open_pipe_on_cpu(tr, cpu);
6781 if (ret)
6782 goto fail_pipe_on_cpu;
6783
6784 /* create a buffer to store the information to pass to userspace */
6785 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6786 if (!iter) {
6787 ret = -ENOMEM;
6788 goto fail_alloc_iter;
6789 }
6790
6791 trace_seq_init(&iter->seq);
6792 iter->trace = tr->current_trace;
6793
6794 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6795 ret = -ENOMEM;
6796 goto fail;
6797 }
6798
6799 /* trace pipe does not show start of buffer */
6800 cpumask_setall(iter->started);
6801
6802 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6803 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6804
6805 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6806 if (trace_clocks[tr->clock_id].in_ns)
6807 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6808
6809 iter->tr = tr;
6810 iter->array_buffer = &tr->array_buffer;
6811 iter->cpu_file = cpu;
6812 mutex_init(&iter->mutex);
6813 filp->private_data = iter;
6814
6815 if (iter->trace->pipe_open)
6816 iter->trace->pipe_open(iter);
6817
6818 nonseekable_open(inode, filp);
6819
6820 tr->trace_ref++;
6821
6822 mutex_unlock(&trace_types_lock);
6823 return ret;
6824
6825 fail:
6826 kfree(iter);
6827 fail_alloc_iter:
6828 close_pipe_on_cpu(tr, cpu);
6829 fail_pipe_on_cpu:
6830 __trace_array_put(tr);
6831 mutex_unlock(&trace_types_lock);
6832 return ret;
6833 }
6834
tracing_release_pipe(struct inode * inode,struct file * file)6835 static int tracing_release_pipe(struct inode *inode, struct file *file)
6836 {
6837 struct trace_iterator *iter = file->private_data;
6838 struct trace_array *tr = inode->i_private;
6839
6840 mutex_lock(&trace_types_lock);
6841
6842 tr->trace_ref--;
6843
6844 if (iter->trace->pipe_close)
6845 iter->trace->pipe_close(iter);
6846 close_pipe_on_cpu(tr, iter->cpu_file);
6847 mutex_unlock(&trace_types_lock);
6848
6849 free_trace_iter_content(iter);
6850 kfree(iter);
6851
6852 trace_array_put(tr);
6853
6854 return 0;
6855 }
6856
6857 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6858 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6859 {
6860 struct trace_array *tr = iter->tr;
6861
6862 /* Iterators are static, they should be filled or empty */
6863 if (trace_buffer_iter(iter, iter->cpu_file))
6864 return EPOLLIN | EPOLLRDNORM;
6865
6866 if (tr->trace_flags & TRACE_ITER_BLOCK)
6867 /*
6868 * Always select as readable when in blocking mode
6869 */
6870 return EPOLLIN | EPOLLRDNORM;
6871 else
6872 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6873 filp, poll_table, iter->tr->buffer_percent);
6874 }
6875
6876 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6877 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6878 {
6879 struct trace_iterator *iter = filp->private_data;
6880
6881 return trace_poll(iter, filp, poll_table);
6882 }
6883
6884 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6885 static int tracing_wait_pipe(struct file *filp)
6886 {
6887 struct trace_iterator *iter = filp->private_data;
6888 int ret;
6889
6890 while (trace_empty(iter)) {
6891
6892 if ((filp->f_flags & O_NONBLOCK)) {
6893 return -EAGAIN;
6894 }
6895
6896 /*
6897 * We block until we read something and tracing is disabled.
6898 * We still block if tracing is disabled, but we have never
6899 * read anything. This allows a user to cat this file, and
6900 * then enable tracing. But after we have read something,
6901 * we give an EOF when tracing is again disabled.
6902 *
6903 * iter->pos will be 0 if we haven't read anything.
6904 */
6905 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6906 break;
6907
6908 mutex_unlock(&iter->mutex);
6909
6910 ret = wait_on_pipe(iter, 0);
6911
6912 mutex_lock(&iter->mutex);
6913
6914 if (ret)
6915 return ret;
6916 }
6917
6918 return 1;
6919 }
6920
6921 /*
6922 * Consumer reader.
6923 */
6924 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6925 tracing_read_pipe(struct file *filp, char __user *ubuf,
6926 size_t cnt, loff_t *ppos)
6927 {
6928 struct trace_iterator *iter = filp->private_data;
6929 ssize_t sret;
6930
6931 /*
6932 * Avoid more than one consumer on a single file descriptor
6933 * This is just a matter of traces coherency, the ring buffer itself
6934 * is protected.
6935 */
6936 mutex_lock(&iter->mutex);
6937
6938 /* return any leftover data */
6939 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6940 if (sret != -EBUSY)
6941 goto out;
6942
6943 trace_seq_init(&iter->seq);
6944
6945 if (iter->trace->read) {
6946 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6947 if (sret)
6948 goto out;
6949 }
6950
6951 waitagain:
6952 sret = tracing_wait_pipe(filp);
6953 if (sret <= 0)
6954 goto out;
6955
6956 /* stop when tracing is finished */
6957 if (trace_empty(iter)) {
6958 sret = 0;
6959 goto out;
6960 }
6961
6962 if (cnt >= PAGE_SIZE)
6963 cnt = PAGE_SIZE - 1;
6964
6965 /* reset all but tr, trace, and overruns */
6966 trace_iterator_reset(iter);
6967 cpumask_clear(iter->started);
6968 trace_seq_init(&iter->seq);
6969
6970 trace_event_read_lock();
6971 trace_access_lock(iter->cpu_file);
6972 while (trace_find_next_entry_inc(iter) != NULL) {
6973 enum print_line_t ret;
6974 int save_len = iter->seq.seq.len;
6975
6976 ret = print_trace_line(iter);
6977 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6978 /*
6979 * If one print_trace_line() fills entire trace_seq in one shot,
6980 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6981 * In this case, we need to consume it, otherwise, loop will peek
6982 * this event next time, resulting in an infinite loop.
6983 */
6984 if (save_len == 0) {
6985 iter->seq.full = 0;
6986 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6987 trace_consume(iter);
6988 break;
6989 }
6990
6991 /* In other cases, don't print partial lines */
6992 iter->seq.seq.len = save_len;
6993 break;
6994 }
6995 if (ret != TRACE_TYPE_NO_CONSUME)
6996 trace_consume(iter);
6997
6998 if (trace_seq_used(&iter->seq) >= cnt)
6999 break;
7000
7001 /*
7002 * Setting the full flag means we reached the trace_seq buffer
7003 * size and we should leave by partial output condition above.
7004 * One of the trace_seq_* functions is not used properly.
7005 */
7006 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7007 iter->ent->type);
7008 }
7009 trace_access_unlock(iter->cpu_file);
7010 trace_event_read_unlock();
7011
7012 /* Now copy what we have to the user */
7013 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7014 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
7015 trace_seq_init(&iter->seq);
7016
7017 /*
7018 * If there was nothing to send to user, in spite of consuming trace
7019 * entries, go back to wait for more entries.
7020 */
7021 if (sret == -EBUSY)
7022 goto waitagain;
7023
7024 out:
7025 mutex_unlock(&iter->mutex);
7026
7027 return sret;
7028 }
7029
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)7030 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7031 unsigned int idx)
7032 {
7033 __free_page(spd->pages[idx]);
7034 }
7035
7036 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)7037 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7038 {
7039 size_t count;
7040 int save_len;
7041 int ret;
7042
7043 /* Seq buffer is page-sized, exactly what we need. */
7044 for (;;) {
7045 save_len = iter->seq.seq.len;
7046 ret = print_trace_line(iter);
7047
7048 if (trace_seq_has_overflowed(&iter->seq)) {
7049 iter->seq.seq.len = save_len;
7050 break;
7051 }
7052
7053 /*
7054 * This should not be hit, because it should only
7055 * be set if the iter->seq overflowed. But check it
7056 * anyway to be safe.
7057 */
7058 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7059 iter->seq.seq.len = save_len;
7060 break;
7061 }
7062
7063 count = trace_seq_used(&iter->seq) - save_len;
7064 if (rem < count) {
7065 rem = 0;
7066 iter->seq.seq.len = save_len;
7067 break;
7068 }
7069
7070 if (ret != TRACE_TYPE_NO_CONSUME)
7071 trace_consume(iter);
7072 rem -= count;
7073 if (!trace_find_next_entry_inc(iter)) {
7074 rem = 0;
7075 iter->ent = NULL;
7076 break;
7077 }
7078 }
7079
7080 return rem;
7081 }
7082
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7083 static ssize_t tracing_splice_read_pipe(struct file *filp,
7084 loff_t *ppos,
7085 struct pipe_inode_info *pipe,
7086 size_t len,
7087 unsigned int flags)
7088 {
7089 struct page *pages_def[PIPE_DEF_BUFFERS];
7090 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7091 struct trace_iterator *iter = filp->private_data;
7092 struct splice_pipe_desc spd = {
7093 .pages = pages_def,
7094 .partial = partial_def,
7095 .nr_pages = 0, /* This gets updated below. */
7096 .nr_pages_max = PIPE_DEF_BUFFERS,
7097 .ops = &default_pipe_buf_ops,
7098 .spd_release = tracing_spd_release_pipe,
7099 };
7100 ssize_t ret;
7101 size_t rem;
7102 unsigned int i;
7103
7104 if (splice_grow_spd(pipe, &spd))
7105 return -ENOMEM;
7106
7107 mutex_lock(&iter->mutex);
7108
7109 if (iter->trace->splice_read) {
7110 ret = iter->trace->splice_read(iter, filp,
7111 ppos, pipe, len, flags);
7112 if (ret)
7113 goto out_err;
7114 }
7115
7116 ret = tracing_wait_pipe(filp);
7117 if (ret <= 0)
7118 goto out_err;
7119
7120 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7121 ret = -EFAULT;
7122 goto out_err;
7123 }
7124
7125 trace_event_read_lock();
7126 trace_access_lock(iter->cpu_file);
7127
7128 /* Fill as many pages as possible. */
7129 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7130 spd.pages[i] = alloc_page(GFP_KERNEL);
7131 if (!spd.pages[i])
7132 break;
7133
7134 rem = tracing_fill_pipe_page(rem, iter);
7135
7136 /* Copy the data into the page, so we can start over. */
7137 ret = trace_seq_to_buffer(&iter->seq,
7138 page_address(spd.pages[i]),
7139 trace_seq_used(&iter->seq));
7140 if (ret < 0) {
7141 __free_page(spd.pages[i]);
7142 break;
7143 }
7144 spd.partial[i].offset = 0;
7145 spd.partial[i].len = trace_seq_used(&iter->seq);
7146
7147 trace_seq_init(&iter->seq);
7148 }
7149
7150 trace_access_unlock(iter->cpu_file);
7151 trace_event_read_unlock();
7152 mutex_unlock(&iter->mutex);
7153
7154 spd.nr_pages = i;
7155
7156 if (i)
7157 ret = splice_to_pipe(pipe, &spd);
7158 else
7159 ret = 0;
7160 out:
7161 splice_shrink_spd(&spd);
7162 return ret;
7163
7164 out_err:
7165 mutex_unlock(&iter->mutex);
7166 goto out;
7167 }
7168
7169 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7170 tracing_entries_read(struct file *filp, char __user *ubuf,
7171 size_t cnt, loff_t *ppos)
7172 {
7173 struct inode *inode = file_inode(filp);
7174 struct trace_array *tr = inode->i_private;
7175 int cpu = tracing_get_cpu(inode);
7176 char buf[64];
7177 int r = 0;
7178 ssize_t ret;
7179
7180 mutex_lock(&trace_types_lock);
7181
7182 if (cpu == RING_BUFFER_ALL_CPUS) {
7183 int cpu, buf_size_same;
7184 unsigned long size;
7185
7186 size = 0;
7187 buf_size_same = 1;
7188 /* check if all cpu sizes are same */
7189 for_each_tracing_cpu(cpu) {
7190 /* fill in the size from first enabled cpu */
7191 if (size == 0)
7192 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7193 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7194 buf_size_same = 0;
7195 break;
7196 }
7197 }
7198
7199 if (buf_size_same) {
7200 if (!ring_buffer_expanded)
7201 r = sprintf(buf, "%lu (expanded: %lu)\n",
7202 size >> 10,
7203 trace_buf_size >> 10);
7204 else
7205 r = sprintf(buf, "%lu\n", size >> 10);
7206 } else
7207 r = sprintf(buf, "X\n");
7208 } else
7209 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7210
7211 mutex_unlock(&trace_types_lock);
7212
7213 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7214 return ret;
7215 }
7216
7217 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7218 tracing_entries_write(struct file *filp, const char __user *ubuf,
7219 size_t cnt, loff_t *ppos)
7220 {
7221 struct inode *inode = file_inode(filp);
7222 struct trace_array *tr = inode->i_private;
7223 unsigned long val;
7224 int ret;
7225
7226 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7227 if (ret)
7228 return ret;
7229
7230 /* must have at least 1 entry */
7231 if (!val)
7232 return -EINVAL;
7233
7234 /* value is in KB */
7235 val <<= 10;
7236 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7237 if (ret < 0)
7238 return ret;
7239
7240 *ppos += cnt;
7241
7242 return cnt;
7243 }
7244
7245 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7246 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7247 size_t cnt, loff_t *ppos)
7248 {
7249 struct trace_array *tr = filp->private_data;
7250 char buf[64];
7251 int r, cpu;
7252 unsigned long size = 0, expanded_size = 0;
7253
7254 mutex_lock(&trace_types_lock);
7255 for_each_tracing_cpu(cpu) {
7256 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7257 if (!ring_buffer_expanded)
7258 expanded_size += trace_buf_size >> 10;
7259 }
7260 if (ring_buffer_expanded)
7261 r = sprintf(buf, "%lu\n", size);
7262 else
7263 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7264 mutex_unlock(&trace_types_lock);
7265
7266 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7267 }
7268
7269 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7270 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7271 size_t cnt, loff_t *ppos)
7272 {
7273 /*
7274 * There is no need to read what the user has written, this function
7275 * is just to make sure that there is no error when "echo" is used
7276 */
7277
7278 *ppos += cnt;
7279
7280 return cnt;
7281 }
7282
7283 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7284 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7285 {
7286 struct trace_array *tr = inode->i_private;
7287
7288 /* disable tracing ? */
7289 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7290 tracer_tracing_off(tr);
7291 /* resize the ring buffer to 0 */
7292 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7293
7294 trace_array_put(tr);
7295
7296 return 0;
7297 }
7298
7299 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7300 tracing_mark_write(struct file *filp, const char __user *ubuf,
7301 size_t cnt, loff_t *fpos)
7302 {
7303 struct trace_array *tr = filp->private_data;
7304 struct ring_buffer_event *event;
7305 enum event_trigger_type tt = ETT_NONE;
7306 struct trace_buffer *buffer;
7307 struct print_entry *entry;
7308 ssize_t written;
7309 int size;
7310 int len;
7311
7312 /* Used in tracing_mark_raw_write() as well */
7313 #define FAULTED_STR "<faulted>"
7314 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7315
7316 if (tracing_disabled)
7317 return -EINVAL;
7318
7319 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7320 return -EINVAL;
7321
7322 if (cnt > TRACE_BUF_SIZE)
7323 cnt = TRACE_BUF_SIZE;
7324
7325 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7326
7327 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7328
7329 /* If less than "<faulted>", then make sure we can still add that */
7330 if (cnt < FAULTED_SIZE)
7331 size += FAULTED_SIZE - cnt;
7332
7333 buffer = tr->array_buffer.buffer;
7334 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7335 tracing_gen_ctx());
7336 if (unlikely(!event))
7337 /* Ring buffer disabled, return as if not open for write */
7338 return -EBADF;
7339
7340 entry = ring_buffer_event_data(event);
7341 entry->ip = _THIS_IP_;
7342
7343 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7344 if (len) {
7345 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7346 cnt = FAULTED_SIZE;
7347 written = -EFAULT;
7348 } else
7349 written = cnt;
7350
7351 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7352 /* do not add \n before testing triggers, but add \0 */
7353 entry->buf[cnt] = '\0';
7354 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7355 }
7356
7357 if (entry->buf[cnt - 1] != '\n') {
7358 entry->buf[cnt] = '\n';
7359 entry->buf[cnt + 1] = '\0';
7360 } else
7361 entry->buf[cnt] = '\0';
7362
7363 if (static_branch_unlikely(&trace_marker_exports_enabled))
7364 ftrace_exports(event, TRACE_EXPORT_MARKER);
7365 __buffer_unlock_commit(buffer, event);
7366
7367 if (tt)
7368 event_triggers_post_call(tr->trace_marker_file, tt);
7369
7370 return written;
7371 }
7372
7373 /* Limit it for now to 3K (including tag) */
7374 #define RAW_DATA_MAX_SIZE (1024*3)
7375
7376 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7377 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7378 size_t cnt, loff_t *fpos)
7379 {
7380 struct trace_array *tr = filp->private_data;
7381 struct ring_buffer_event *event;
7382 struct trace_buffer *buffer;
7383 struct raw_data_entry *entry;
7384 ssize_t written;
7385 int size;
7386 int len;
7387
7388 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7389
7390 if (tracing_disabled)
7391 return -EINVAL;
7392
7393 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7394 return -EINVAL;
7395
7396 /* The marker must at least have a tag id */
7397 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7398 return -EINVAL;
7399
7400 if (cnt > TRACE_BUF_SIZE)
7401 cnt = TRACE_BUF_SIZE;
7402
7403 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7404
7405 size = sizeof(*entry) + cnt;
7406 if (cnt < FAULT_SIZE_ID)
7407 size += FAULT_SIZE_ID - cnt;
7408
7409 buffer = tr->array_buffer.buffer;
7410 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7411 tracing_gen_ctx());
7412 if (!event)
7413 /* Ring buffer disabled, return as if not open for write */
7414 return -EBADF;
7415
7416 entry = ring_buffer_event_data(event);
7417
7418 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7419 if (len) {
7420 entry->id = -1;
7421 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7422 written = -EFAULT;
7423 } else
7424 written = cnt;
7425
7426 __buffer_unlock_commit(buffer, event);
7427
7428 return written;
7429 }
7430
tracing_clock_show(struct seq_file * m,void * v)7431 static int tracing_clock_show(struct seq_file *m, void *v)
7432 {
7433 struct trace_array *tr = m->private;
7434 int i;
7435
7436 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7437 seq_printf(m,
7438 "%s%s%s%s", i ? " " : "",
7439 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7440 i == tr->clock_id ? "]" : "");
7441 seq_putc(m, '\n');
7442
7443 return 0;
7444 }
7445
tracing_set_clock(struct trace_array * tr,const char * clockstr)7446 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7447 {
7448 int i;
7449
7450 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7451 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7452 break;
7453 }
7454 if (i == ARRAY_SIZE(trace_clocks))
7455 return -EINVAL;
7456
7457 mutex_lock(&trace_types_lock);
7458
7459 tr->clock_id = i;
7460
7461 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7462
7463 /*
7464 * New clock may not be consistent with the previous clock.
7465 * Reset the buffer so that it doesn't have incomparable timestamps.
7466 */
7467 tracing_reset_online_cpus(&tr->array_buffer);
7468
7469 #ifdef CONFIG_TRACER_MAX_TRACE
7470 if (tr->max_buffer.buffer)
7471 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7472 tracing_reset_online_cpus(&tr->max_buffer);
7473 #endif
7474
7475 mutex_unlock(&trace_types_lock);
7476
7477 return 0;
7478 }
7479
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7480 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7481 size_t cnt, loff_t *fpos)
7482 {
7483 struct seq_file *m = filp->private_data;
7484 struct trace_array *tr = m->private;
7485 char buf[64];
7486 const char *clockstr;
7487 int ret;
7488
7489 if (cnt >= sizeof(buf))
7490 return -EINVAL;
7491
7492 if (copy_from_user(buf, ubuf, cnt))
7493 return -EFAULT;
7494
7495 buf[cnt] = 0;
7496
7497 clockstr = strstrip(buf);
7498
7499 ret = tracing_set_clock(tr, clockstr);
7500 if (ret)
7501 return ret;
7502
7503 *fpos += cnt;
7504
7505 return cnt;
7506 }
7507
tracing_clock_open(struct inode * inode,struct file * file)7508 static int tracing_clock_open(struct inode *inode, struct file *file)
7509 {
7510 struct trace_array *tr = inode->i_private;
7511 int ret;
7512
7513 ret = tracing_check_open_get_tr(tr);
7514 if (ret)
7515 return ret;
7516
7517 ret = single_open(file, tracing_clock_show, inode->i_private);
7518 if (ret < 0)
7519 trace_array_put(tr);
7520
7521 return ret;
7522 }
7523
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7524 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7525 {
7526 struct trace_array *tr = m->private;
7527
7528 mutex_lock(&trace_types_lock);
7529
7530 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7531 seq_puts(m, "delta [absolute]\n");
7532 else
7533 seq_puts(m, "[delta] absolute\n");
7534
7535 mutex_unlock(&trace_types_lock);
7536
7537 return 0;
7538 }
7539
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7540 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7541 {
7542 struct trace_array *tr = inode->i_private;
7543 int ret;
7544
7545 ret = tracing_check_open_get_tr(tr);
7546 if (ret)
7547 return ret;
7548
7549 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7550 if (ret < 0)
7551 trace_array_put(tr);
7552
7553 return ret;
7554 }
7555
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7556 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7557 {
7558 if (rbe == this_cpu_read(trace_buffered_event))
7559 return ring_buffer_time_stamp(buffer);
7560
7561 return ring_buffer_event_time_stamp(buffer, rbe);
7562 }
7563
7564 /*
7565 * Set or disable using the per CPU trace_buffer_event when possible.
7566 */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7567 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7568 {
7569 int ret = 0;
7570
7571 mutex_lock(&trace_types_lock);
7572
7573 if (set && tr->no_filter_buffering_ref++)
7574 goto out;
7575
7576 if (!set) {
7577 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7578 ret = -EINVAL;
7579 goto out;
7580 }
7581
7582 --tr->no_filter_buffering_ref;
7583 }
7584 out:
7585 mutex_unlock(&trace_types_lock);
7586
7587 return ret;
7588 }
7589
7590 struct ftrace_buffer_info {
7591 struct trace_iterator iter;
7592 void *spare;
7593 unsigned int spare_cpu;
7594 unsigned int read;
7595 };
7596
7597 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7598 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7599 {
7600 struct trace_array *tr = inode->i_private;
7601 struct trace_iterator *iter;
7602 struct seq_file *m;
7603 int ret;
7604
7605 ret = tracing_check_open_get_tr(tr);
7606 if (ret)
7607 return ret;
7608
7609 if (file->f_mode & FMODE_READ) {
7610 iter = __tracing_open(inode, file, true);
7611 if (IS_ERR(iter))
7612 ret = PTR_ERR(iter);
7613 } else {
7614 /* Writes still need the seq_file to hold the private data */
7615 ret = -ENOMEM;
7616 m = kzalloc(sizeof(*m), GFP_KERNEL);
7617 if (!m)
7618 goto out;
7619 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7620 if (!iter) {
7621 kfree(m);
7622 goto out;
7623 }
7624 ret = 0;
7625
7626 iter->tr = tr;
7627 iter->array_buffer = &tr->max_buffer;
7628 iter->cpu_file = tracing_get_cpu(inode);
7629 m->private = iter;
7630 file->private_data = m;
7631 }
7632 out:
7633 if (ret < 0)
7634 trace_array_put(tr);
7635
7636 return ret;
7637 }
7638
tracing_swap_cpu_buffer(void * tr)7639 static void tracing_swap_cpu_buffer(void *tr)
7640 {
7641 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7642 }
7643
7644 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7645 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7646 loff_t *ppos)
7647 {
7648 struct seq_file *m = filp->private_data;
7649 struct trace_iterator *iter = m->private;
7650 struct trace_array *tr = iter->tr;
7651 unsigned long val;
7652 int ret;
7653
7654 ret = tracing_update_buffers();
7655 if (ret < 0)
7656 return ret;
7657
7658 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7659 if (ret)
7660 return ret;
7661
7662 mutex_lock(&trace_types_lock);
7663
7664 if (tr->current_trace->use_max_tr) {
7665 ret = -EBUSY;
7666 goto out;
7667 }
7668
7669 local_irq_disable();
7670 arch_spin_lock(&tr->max_lock);
7671 if (tr->cond_snapshot)
7672 ret = -EBUSY;
7673 arch_spin_unlock(&tr->max_lock);
7674 local_irq_enable();
7675 if (ret)
7676 goto out;
7677
7678 switch (val) {
7679 case 0:
7680 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7681 ret = -EINVAL;
7682 break;
7683 }
7684 if (tr->allocated_snapshot)
7685 free_snapshot(tr);
7686 break;
7687 case 1:
7688 /* Only allow per-cpu swap if the ring buffer supports it */
7689 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7690 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7691 ret = -EINVAL;
7692 break;
7693 }
7694 #endif
7695 if (tr->allocated_snapshot)
7696 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7697 &tr->array_buffer, iter->cpu_file);
7698 else
7699 ret = tracing_alloc_snapshot_instance(tr);
7700 if (ret < 0)
7701 break;
7702 /* Now, we're going to swap */
7703 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7704 local_irq_disable();
7705 update_max_tr(tr, current, smp_processor_id(), NULL);
7706 local_irq_enable();
7707 } else {
7708 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7709 (void *)tr, 1);
7710 }
7711 break;
7712 default:
7713 if (tr->allocated_snapshot) {
7714 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7715 tracing_reset_online_cpus(&tr->max_buffer);
7716 else
7717 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7718 }
7719 break;
7720 }
7721
7722 if (ret >= 0) {
7723 *ppos += cnt;
7724 ret = cnt;
7725 }
7726 out:
7727 mutex_unlock(&trace_types_lock);
7728 return ret;
7729 }
7730
tracing_snapshot_release(struct inode * inode,struct file * file)7731 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7732 {
7733 struct seq_file *m = file->private_data;
7734 int ret;
7735
7736 ret = tracing_release(inode, file);
7737
7738 if (file->f_mode & FMODE_READ)
7739 return ret;
7740
7741 /* If write only, the seq_file is just a stub */
7742 if (m)
7743 kfree(m->private);
7744 kfree(m);
7745
7746 return 0;
7747 }
7748
7749 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7750 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7751 size_t count, loff_t *ppos);
7752 static int tracing_buffers_release(struct inode *inode, struct file *file);
7753 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7754 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7755
snapshot_raw_open(struct inode * inode,struct file * filp)7756 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7757 {
7758 struct ftrace_buffer_info *info;
7759 int ret;
7760
7761 /* The following checks for tracefs lockdown */
7762 ret = tracing_buffers_open(inode, filp);
7763 if (ret < 0)
7764 return ret;
7765
7766 info = filp->private_data;
7767
7768 if (info->iter.trace->use_max_tr) {
7769 tracing_buffers_release(inode, filp);
7770 return -EBUSY;
7771 }
7772
7773 info->iter.snapshot = true;
7774 info->iter.array_buffer = &info->iter.tr->max_buffer;
7775
7776 return ret;
7777 }
7778
7779 #endif /* CONFIG_TRACER_SNAPSHOT */
7780
7781
7782 static const struct file_operations tracing_thresh_fops = {
7783 .open = tracing_open_generic,
7784 .read = tracing_thresh_read,
7785 .write = tracing_thresh_write,
7786 .llseek = generic_file_llseek,
7787 };
7788
7789 #ifdef CONFIG_TRACER_MAX_TRACE
7790 static const struct file_operations tracing_max_lat_fops = {
7791 .open = tracing_open_generic_tr,
7792 .read = tracing_max_lat_read,
7793 .write = tracing_max_lat_write,
7794 .llseek = generic_file_llseek,
7795 .release = tracing_release_generic_tr,
7796 };
7797 #endif
7798
7799 static const struct file_operations set_tracer_fops = {
7800 .open = tracing_open_generic_tr,
7801 .read = tracing_set_trace_read,
7802 .write = tracing_set_trace_write,
7803 .llseek = generic_file_llseek,
7804 .release = tracing_release_generic_tr,
7805 };
7806
7807 static const struct file_operations tracing_pipe_fops = {
7808 .open = tracing_open_pipe,
7809 .poll = tracing_poll_pipe,
7810 .read = tracing_read_pipe,
7811 .splice_read = tracing_splice_read_pipe,
7812 .release = tracing_release_pipe,
7813 .llseek = no_llseek,
7814 };
7815
7816 static const struct file_operations tracing_entries_fops = {
7817 .open = tracing_open_generic_tr,
7818 .read = tracing_entries_read,
7819 .write = tracing_entries_write,
7820 .llseek = generic_file_llseek,
7821 .release = tracing_release_generic_tr,
7822 };
7823
7824 static const struct file_operations tracing_total_entries_fops = {
7825 .open = tracing_open_generic_tr,
7826 .read = tracing_total_entries_read,
7827 .llseek = generic_file_llseek,
7828 .release = tracing_release_generic_tr,
7829 };
7830
7831 static const struct file_operations tracing_free_buffer_fops = {
7832 .open = tracing_open_generic_tr,
7833 .write = tracing_free_buffer_write,
7834 .release = tracing_free_buffer_release,
7835 };
7836
7837 static const struct file_operations tracing_mark_fops = {
7838 .open = tracing_mark_open,
7839 .write = tracing_mark_write,
7840 .release = tracing_release_generic_tr,
7841 };
7842
7843 static const struct file_operations tracing_mark_raw_fops = {
7844 .open = tracing_mark_open,
7845 .write = tracing_mark_raw_write,
7846 .release = tracing_release_generic_tr,
7847 };
7848
7849 static const struct file_operations trace_clock_fops = {
7850 .open = tracing_clock_open,
7851 .read = seq_read,
7852 .llseek = seq_lseek,
7853 .release = tracing_single_release_tr,
7854 .write = tracing_clock_write,
7855 };
7856
7857 static const struct file_operations trace_time_stamp_mode_fops = {
7858 .open = tracing_time_stamp_mode_open,
7859 .read = seq_read,
7860 .llseek = seq_lseek,
7861 .release = tracing_single_release_tr,
7862 };
7863
7864 #ifdef CONFIG_TRACER_SNAPSHOT
7865 static const struct file_operations snapshot_fops = {
7866 .open = tracing_snapshot_open,
7867 .read = seq_read,
7868 .write = tracing_snapshot_write,
7869 .llseek = tracing_lseek,
7870 .release = tracing_snapshot_release,
7871 };
7872
7873 static const struct file_operations snapshot_raw_fops = {
7874 .open = snapshot_raw_open,
7875 .read = tracing_buffers_read,
7876 .release = tracing_buffers_release,
7877 .splice_read = tracing_buffers_splice_read,
7878 .llseek = no_llseek,
7879 };
7880
7881 #endif /* CONFIG_TRACER_SNAPSHOT */
7882
7883 /*
7884 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7885 * @filp: The active open file structure
7886 * @ubuf: The userspace provided buffer to read value into
7887 * @cnt: The maximum number of bytes to read
7888 * @ppos: The current "file" position
7889 *
7890 * This function implements the write interface for a struct trace_min_max_param.
7891 * The filp->private_data must point to a trace_min_max_param structure that
7892 * defines where to write the value, the min and the max acceptable values,
7893 * and a lock to protect the write.
7894 */
7895 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7896 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7897 {
7898 struct trace_min_max_param *param = filp->private_data;
7899 u64 val;
7900 int err;
7901
7902 if (!param)
7903 return -EFAULT;
7904
7905 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7906 if (err)
7907 return err;
7908
7909 if (param->lock)
7910 mutex_lock(param->lock);
7911
7912 if (param->min && val < *param->min)
7913 err = -EINVAL;
7914
7915 if (param->max && val > *param->max)
7916 err = -EINVAL;
7917
7918 if (!err)
7919 *param->val = val;
7920
7921 if (param->lock)
7922 mutex_unlock(param->lock);
7923
7924 if (err)
7925 return err;
7926
7927 return cnt;
7928 }
7929
7930 /*
7931 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7932 * @filp: The active open file structure
7933 * @ubuf: The userspace provided buffer to read value into
7934 * @cnt: The maximum number of bytes to read
7935 * @ppos: The current "file" position
7936 *
7937 * This function implements the read interface for a struct trace_min_max_param.
7938 * The filp->private_data must point to a trace_min_max_param struct with valid
7939 * data.
7940 */
7941 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7942 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7943 {
7944 struct trace_min_max_param *param = filp->private_data;
7945 char buf[U64_STR_SIZE];
7946 int len;
7947 u64 val;
7948
7949 if (!param)
7950 return -EFAULT;
7951
7952 val = *param->val;
7953
7954 if (cnt > sizeof(buf))
7955 cnt = sizeof(buf);
7956
7957 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7958
7959 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7960 }
7961
7962 const struct file_operations trace_min_max_fops = {
7963 .open = tracing_open_generic,
7964 .read = trace_min_max_read,
7965 .write = trace_min_max_write,
7966 };
7967
7968 #define TRACING_LOG_ERRS_MAX 8
7969 #define TRACING_LOG_LOC_MAX 128
7970
7971 #define CMD_PREFIX " Command: "
7972
7973 struct err_info {
7974 const char **errs; /* ptr to loc-specific array of err strings */
7975 u8 type; /* index into errs -> specific err string */
7976 u16 pos; /* caret position */
7977 u64 ts;
7978 };
7979
7980 struct tracing_log_err {
7981 struct list_head list;
7982 struct err_info info;
7983 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7984 char *cmd; /* what caused err */
7985 };
7986
7987 static DEFINE_MUTEX(tracing_err_log_lock);
7988
alloc_tracing_log_err(int len)7989 static struct tracing_log_err *alloc_tracing_log_err(int len)
7990 {
7991 struct tracing_log_err *err;
7992
7993 err = kzalloc(sizeof(*err), GFP_KERNEL);
7994 if (!err)
7995 return ERR_PTR(-ENOMEM);
7996
7997 err->cmd = kzalloc(len, GFP_KERNEL);
7998 if (!err->cmd) {
7999 kfree(err);
8000 return ERR_PTR(-ENOMEM);
8001 }
8002
8003 return err;
8004 }
8005
free_tracing_log_err(struct tracing_log_err * err)8006 static void free_tracing_log_err(struct tracing_log_err *err)
8007 {
8008 kfree(err->cmd);
8009 kfree(err);
8010 }
8011
get_tracing_log_err(struct trace_array * tr,int len)8012 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8013 int len)
8014 {
8015 struct tracing_log_err *err;
8016 char *cmd;
8017
8018 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8019 err = alloc_tracing_log_err(len);
8020 if (PTR_ERR(err) != -ENOMEM)
8021 tr->n_err_log_entries++;
8022
8023 return err;
8024 }
8025 cmd = kzalloc(len, GFP_KERNEL);
8026 if (!cmd)
8027 return ERR_PTR(-ENOMEM);
8028 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8029 kfree(err->cmd);
8030 err->cmd = cmd;
8031 list_del(&err->list);
8032
8033 return err;
8034 }
8035
8036 /**
8037 * err_pos - find the position of a string within a command for error careting
8038 * @cmd: The tracing command that caused the error
8039 * @str: The string to position the caret at within @cmd
8040 *
8041 * Finds the position of the first occurrence of @str within @cmd. The
8042 * return value can be passed to tracing_log_err() for caret placement
8043 * within @cmd.
8044 *
8045 * Returns the index within @cmd of the first occurrence of @str or 0
8046 * if @str was not found.
8047 */
err_pos(char * cmd,const char * str)8048 unsigned int err_pos(char *cmd, const char *str)
8049 {
8050 char *found;
8051
8052 if (WARN_ON(!strlen(cmd)))
8053 return 0;
8054
8055 found = strstr(cmd, str);
8056 if (found)
8057 return found - cmd;
8058
8059 return 0;
8060 }
8061
8062 /**
8063 * tracing_log_err - write an error to the tracing error log
8064 * @tr: The associated trace array for the error (NULL for top level array)
8065 * @loc: A string describing where the error occurred
8066 * @cmd: The tracing command that caused the error
8067 * @errs: The array of loc-specific static error strings
8068 * @type: The index into errs[], which produces the specific static err string
8069 * @pos: The position the caret should be placed in the cmd
8070 *
8071 * Writes an error into tracing/error_log of the form:
8072 *
8073 * <loc>: error: <text>
8074 * Command: <cmd>
8075 * ^
8076 *
8077 * tracing/error_log is a small log file containing the last
8078 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
8079 * unless there has been a tracing error, and the error log can be
8080 * cleared and have its memory freed by writing the empty string in
8081 * truncation mode to it i.e. echo > tracing/error_log.
8082 *
8083 * NOTE: the @errs array along with the @type param are used to
8084 * produce a static error string - this string is not copied and saved
8085 * when the error is logged - only a pointer to it is saved. See
8086 * existing callers for examples of how static strings are typically
8087 * defined for use with tracing_log_err().
8088 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8089 void tracing_log_err(struct trace_array *tr,
8090 const char *loc, const char *cmd,
8091 const char **errs, u8 type, u16 pos)
8092 {
8093 struct tracing_log_err *err;
8094 int len = 0;
8095
8096 if (!tr)
8097 tr = &global_trace;
8098
8099 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8100
8101 mutex_lock(&tracing_err_log_lock);
8102 err = get_tracing_log_err(tr, len);
8103 if (PTR_ERR(err) == -ENOMEM) {
8104 mutex_unlock(&tracing_err_log_lock);
8105 return;
8106 }
8107
8108 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8109 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8110
8111 err->info.errs = errs;
8112 err->info.type = type;
8113 err->info.pos = pos;
8114 err->info.ts = local_clock();
8115
8116 list_add_tail(&err->list, &tr->err_log);
8117 mutex_unlock(&tracing_err_log_lock);
8118 }
8119
clear_tracing_err_log(struct trace_array * tr)8120 static void clear_tracing_err_log(struct trace_array *tr)
8121 {
8122 struct tracing_log_err *err, *next;
8123
8124 mutex_lock(&tracing_err_log_lock);
8125 list_for_each_entry_safe(err, next, &tr->err_log, list) {
8126 list_del(&err->list);
8127 free_tracing_log_err(err);
8128 }
8129
8130 tr->n_err_log_entries = 0;
8131 mutex_unlock(&tracing_err_log_lock);
8132 }
8133
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8134 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8135 {
8136 struct trace_array *tr = m->private;
8137
8138 mutex_lock(&tracing_err_log_lock);
8139
8140 return seq_list_start(&tr->err_log, *pos);
8141 }
8142
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8143 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8144 {
8145 struct trace_array *tr = m->private;
8146
8147 return seq_list_next(v, &tr->err_log, pos);
8148 }
8149
tracing_err_log_seq_stop(struct seq_file * m,void * v)8150 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8151 {
8152 mutex_unlock(&tracing_err_log_lock);
8153 }
8154
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8155 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8156 {
8157 u16 i;
8158
8159 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8160 seq_putc(m, ' ');
8161 for (i = 0; i < pos; i++)
8162 seq_putc(m, ' ');
8163 seq_puts(m, "^\n");
8164 }
8165
tracing_err_log_seq_show(struct seq_file * m,void * v)8166 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8167 {
8168 struct tracing_log_err *err = v;
8169
8170 if (err) {
8171 const char *err_text = err->info.errs[err->info.type];
8172 u64 sec = err->info.ts;
8173 u32 nsec;
8174
8175 nsec = do_div(sec, NSEC_PER_SEC);
8176 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8177 err->loc, err_text);
8178 seq_printf(m, "%s", err->cmd);
8179 tracing_err_log_show_pos(m, err->info.pos);
8180 }
8181
8182 return 0;
8183 }
8184
8185 static const struct seq_operations tracing_err_log_seq_ops = {
8186 .start = tracing_err_log_seq_start,
8187 .next = tracing_err_log_seq_next,
8188 .stop = tracing_err_log_seq_stop,
8189 .show = tracing_err_log_seq_show
8190 };
8191
tracing_err_log_open(struct inode * inode,struct file * file)8192 static int tracing_err_log_open(struct inode *inode, struct file *file)
8193 {
8194 struct trace_array *tr = inode->i_private;
8195 int ret = 0;
8196
8197 ret = tracing_check_open_get_tr(tr);
8198 if (ret)
8199 return ret;
8200
8201 /* If this file was opened for write, then erase contents */
8202 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8203 clear_tracing_err_log(tr);
8204
8205 if (file->f_mode & FMODE_READ) {
8206 ret = seq_open(file, &tracing_err_log_seq_ops);
8207 if (!ret) {
8208 struct seq_file *m = file->private_data;
8209 m->private = tr;
8210 } else {
8211 trace_array_put(tr);
8212 }
8213 }
8214 return ret;
8215 }
8216
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8217 static ssize_t tracing_err_log_write(struct file *file,
8218 const char __user *buffer,
8219 size_t count, loff_t *ppos)
8220 {
8221 return count;
8222 }
8223
tracing_err_log_release(struct inode * inode,struct file * file)8224 static int tracing_err_log_release(struct inode *inode, struct file *file)
8225 {
8226 struct trace_array *tr = inode->i_private;
8227
8228 trace_array_put(tr);
8229
8230 if (file->f_mode & FMODE_READ)
8231 seq_release(inode, file);
8232
8233 return 0;
8234 }
8235
8236 static const struct file_operations tracing_err_log_fops = {
8237 .open = tracing_err_log_open,
8238 .write = tracing_err_log_write,
8239 .read = seq_read,
8240 .llseek = tracing_lseek,
8241 .release = tracing_err_log_release,
8242 };
8243
tracing_buffers_open(struct inode * inode,struct file * filp)8244 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8245 {
8246 struct trace_array *tr = inode->i_private;
8247 struct ftrace_buffer_info *info;
8248 int ret;
8249
8250 ret = tracing_check_open_get_tr(tr);
8251 if (ret)
8252 return ret;
8253
8254 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8255 if (!info) {
8256 trace_array_put(tr);
8257 return -ENOMEM;
8258 }
8259
8260 mutex_lock(&trace_types_lock);
8261
8262 info->iter.tr = tr;
8263 info->iter.cpu_file = tracing_get_cpu(inode);
8264 info->iter.trace = tr->current_trace;
8265 info->iter.array_buffer = &tr->array_buffer;
8266 info->spare = NULL;
8267 /* Force reading ring buffer for first read */
8268 info->read = (unsigned int)-1;
8269
8270 filp->private_data = info;
8271
8272 tr->trace_ref++;
8273
8274 mutex_unlock(&trace_types_lock);
8275
8276 ret = nonseekable_open(inode, filp);
8277 if (ret < 0)
8278 trace_array_put(tr);
8279
8280 return ret;
8281 }
8282
8283 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8284 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8285 {
8286 struct ftrace_buffer_info *info = filp->private_data;
8287 struct trace_iterator *iter = &info->iter;
8288
8289 return trace_poll(iter, filp, poll_table);
8290 }
8291
8292 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8293 tracing_buffers_read(struct file *filp, char __user *ubuf,
8294 size_t count, loff_t *ppos)
8295 {
8296 struct ftrace_buffer_info *info = filp->private_data;
8297 struct trace_iterator *iter = &info->iter;
8298 ssize_t ret = 0;
8299 ssize_t size;
8300
8301 if (!count)
8302 return 0;
8303
8304 #ifdef CONFIG_TRACER_MAX_TRACE
8305 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8306 return -EBUSY;
8307 #endif
8308
8309 if (!info->spare) {
8310 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8311 iter->cpu_file);
8312 if (IS_ERR(info->spare)) {
8313 ret = PTR_ERR(info->spare);
8314 info->spare = NULL;
8315 } else {
8316 info->spare_cpu = iter->cpu_file;
8317 }
8318 }
8319 if (!info->spare)
8320 return ret;
8321
8322 /* Do we have previous read data to read? */
8323 if (info->read < PAGE_SIZE)
8324 goto read;
8325
8326 again:
8327 trace_access_lock(iter->cpu_file);
8328 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8329 &info->spare,
8330 count,
8331 iter->cpu_file, 0);
8332 trace_access_unlock(iter->cpu_file);
8333
8334 if (ret < 0) {
8335 if (trace_empty(iter)) {
8336 if ((filp->f_flags & O_NONBLOCK))
8337 return -EAGAIN;
8338
8339 ret = wait_on_pipe(iter, 0);
8340 if (ret)
8341 return ret;
8342
8343 goto again;
8344 }
8345 return 0;
8346 }
8347
8348 info->read = 0;
8349 read:
8350 size = PAGE_SIZE - info->read;
8351 if (size > count)
8352 size = count;
8353
8354 ret = copy_to_user(ubuf, info->spare + info->read, size);
8355 if (ret == size)
8356 return -EFAULT;
8357
8358 size -= ret;
8359
8360 *ppos += size;
8361 info->read += size;
8362
8363 return size;
8364 }
8365
tracing_buffers_flush(struct file * file,fl_owner_t id)8366 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8367 {
8368 struct ftrace_buffer_info *info = file->private_data;
8369 struct trace_iterator *iter = &info->iter;
8370
8371 iter->wait_index++;
8372 /* Make sure the waiters see the new wait_index */
8373 smp_wmb();
8374
8375 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8376
8377 return 0;
8378 }
8379
tracing_buffers_release(struct inode * inode,struct file * file)8380 static int tracing_buffers_release(struct inode *inode, struct file *file)
8381 {
8382 struct ftrace_buffer_info *info = file->private_data;
8383 struct trace_iterator *iter = &info->iter;
8384
8385 mutex_lock(&trace_types_lock);
8386
8387 iter->tr->trace_ref--;
8388
8389 __trace_array_put(iter->tr);
8390
8391 if (info->spare)
8392 ring_buffer_free_read_page(iter->array_buffer->buffer,
8393 info->spare_cpu, info->spare);
8394 kvfree(info);
8395
8396 mutex_unlock(&trace_types_lock);
8397
8398 return 0;
8399 }
8400
8401 struct buffer_ref {
8402 struct trace_buffer *buffer;
8403 void *page;
8404 int cpu;
8405 refcount_t refcount;
8406 };
8407
buffer_ref_release(struct buffer_ref * ref)8408 static void buffer_ref_release(struct buffer_ref *ref)
8409 {
8410 if (!refcount_dec_and_test(&ref->refcount))
8411 return;
8412 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8413 kfree(ref);
8414 }
8415
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8416 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8417 struct pipe_buffer *buf)
8418 {
8419 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8420
8421 buffer_ref_release(ref);
8422 buf->private = 0;
8423 }
8424
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8425 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8426 struct pipe_buffer *buf)
8427 {
8428 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8429
8430 if (refcount_read(&ref->refcount) > INT_MAX/2)
8431 return false;
8432
8433 refcount_inc(&ref->refcount);
8434 return true;
8435 }
8436
8437 /* Pipe buffer operations for a buffer. */
8438 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8439 .release = buffer_pipe_buf_release,
8440 .get = buffer_pipe_buf_get,
8441 };
8442
8443 /*
8444 * Callback from splice_to_pipe(), if we need to release some pages
8445 * at the end of the spd in case we error'ed out in filling the pipe.
8446 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8447 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8448 {
8449 struct buffer_ref *ref =
8450 (struct buffer_ref *)spd->partial[i].private;
8451
8452 buffer_ref_release(ref);
8453 spd->partial[i].private = 0;
8454 }
8455
8456 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8457 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8458 struct pipe_inode_info *pipe, size_t len,
8459 unsigned int flags)
8460 {
8461 struct ftrace_buffer_info *info = file->private_data;
8462 struct trace_iterator *iter = &info->iter;
8463 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8464 struct page *pages_def[PIPE_DEF_BUFFERS];
8465 struct splice_pipe_desc spd = {
8466 .pages = pages_def,
8467 .partial = partial_def,
8468 .nr_pages_max = PIPE_DEF_BUFFERS,
8469 .ops = &buffer_pipe_buf_ops,
8470 .spd_release = buffer_spd_release,
8471 };
8472 struct buffer_ref *ref;
8473 int entries, i;
8474 ssize_t ret = 0;
8475
8476 #ifdef CONFIG_TRACER_MAX_TRACE
8477 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8478 return -EBUSY;
8479 #endif
8480
8481 if (*ppos & (PAGE_SIZE - 1))
8482 return -EINVAL;
8483
8484 if (len & (PAGE_SIZE - 1)) {
8485 if (len < PAGE_SIZE)
8486 return -EINVAL;
8487 len &= PAGE_MASK;
8488 }
8489
8490 if (splice_grow_spd(pipe, &spd))
8491 return -ENOMEM;
8492
8493 again:
8494 trace_access_lock(iter->cpu_file);
8495 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8496
8497 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8498 struct page *page;
8499 int r;
8500
8501 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8502 if (!ref) {
8503 ret = -ENOMEM;
8504 break;
8505 }
8506
8507 refcount_set(&ref->refcount, 1);
8508 ref->buffer = iter->array_buffer->buffer;
8509 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8510 if (IS_ERR(ref->page)) {
8511 ret = PTR_ERR(ref->page);
8512 ref->page = NULL;
8513 kfree(ref);
8514 break;
8515 }
8516 ref->cpu = iter->cpu_file;
8517
8518 r = ring_buffer_read_page(ref->buffer, &ref->page,
8519 len, iter->cpu_file, 1);
8520 if (r < 0) {
8521 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8522 ref->page);
8523 kfree(ref);
8524 break;
8525 }
8526
8527 page = virt_to_page(ref->page);
8528
8529 spd.pages[i] = page;
8530 spd.partial[i].len = PAGE_SIZE;
8531 spd.partial[i].offset = 0;
8532 spd.partial[i].private = (unsigned long)ref;
8533 spd.nr_pages++;
8534 *ppos += PAGE_SIZE;
8535
8536 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8537 }
8538
8539 trace_access_unlock(iter->cpu_file);
8540 spd.nr_pages = i;
8541
8542 /* did we read anything? */
8543 if (!spd.nr_pages) {
8544 long wait_index;
8545
8546 if (ret)
8547 goto out;
8548
8549 ret = -EAGAIN;
8550 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8551 goto out;
8552
8553 wait_index = READ_ONCE(iter->wait_index);
8554
8555 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8556 if (ret)
8557 goto out;
8558
8559 /* No need to wait after waking up when tracing is off */
8560 if (!tracer_tracing_is_on(iter->tr))
8561 goto out;
8562
8563 /* Make sure we see the new wait_index */
8564 smp_rmb();
8565 if (wait_index != iter->wait_index)
8566 goto out;
8567
8568 goto again;
8569 }
8570
8571 ret = splice_to_pipe(pipe, &spd);
8572 out:
8573 splice_shrink_spd(&spd);
8574
8575 return ret;
8576 }
8577
8578 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8579 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8580 {
8581 struct ftrace_buffer_info *info = file->private_data;
8582 struct trace_iterator *iter = &info->iter;
8583
8584 if (cmd)
8585 return -ENOIOCTLCMD;
8586
8587 mutex_lock(&trace_types_lock);
8588
8589 iter->wait_index++;
8590 /* Make sure the waiters see the new wait_index */
8591 smp_wmb();
8592
8593 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8594
8595 mutex_unlock(&trace_types_lock);
8596 return 0;
8597 }
8598
8599 static const struct file_operations tracing_buffers_fops = {
8600 .open = tracing_buffers_open,
8601 .read = tracing_buffers_read,
8602 .poll = tracing_buffers_poll,
8603 .release = tracing_buffers_release,
8604 .flush = tracing_buffers_flush,
8605 .splice_read = tracing_buffers_splice_read,
8606 .unlocked_ioctl = tracing_buffers_ioctl,
8607 .llseek = no_llseek,
8608 };
8609
8610 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8611 tracing_stats_read(struct file *filp, char __user *ubuf,
8612 size_t count, loff_t *ppos)
8613 {
8614 struct inode *inode = file_inode(filp);
8615 struct trace_array *tr = inode->i_private;
8616 struct array_buffer *trace_buf = &tr->array_buffer;
8617 int cpu = tracing_get_cpu(inode);
8618 struct trace_seq *s;
8619 unsigned long cnt;
8620 unsigned long long t;
8621 unsigned long usec_rem;
8622
8623 s = kmalloc(sizeof(*s), GFP_KERNEL);
8624 if (!s)
8625 return -ENOMEM;
8626
8627 trace_seq_init(s);
8628
8629 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8630 trace_seq_printf(s, "entries: %ld\n", cnt);
8631
8632 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8633 trace_seq_printf(s, "overrun: %ld\n", cnt);
8634
8635 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8636 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8637
8638 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8639 trace_seq_printf(s, "bytes: %ld\n", cnt);
8640
8641 if (trace_clocks[tr->clock_id].in_ns) {
8642 /* local or global for trace_clock */
8643 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8644 usec_rem = do_div(t, USEC_PER_SEC);
8645 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8646 t, usec_rem);
8647
8648 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8649 usec_rem = do_div(t, USEC_PER_SEC);
8650 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8651 } else {
8652 /* counter or tsc mode for trace_clock */
8653 trace_seq_printf(s, "oldest event ts: %llu\n",
8654 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8655
8656 trace_seq_printf(s, "now ts: %llu\n",
8657 ring_buffer_time_stamp(trace_buf->buffer));
8658 }
8659
8660 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8661 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8662
8663 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8664 trace_seq_printf(s, "read events: %ld\n", cnt);
8665
8666 count = simple_read_from_buffer(ubuf, count, ppos,
8667 s->buffer, trace_seq_used(s));
8668
8669 kfree(s);
8670
8671 return count;
8672 }
8673
8674 static const struct file_operations tracing_stats_fops = {
8675 .open = tracing_open_generic_tr,
8676 .read = tracing_stats_read,
8677 .llseek = generic_file_llseek,
8678 .release = tracing_release_generic_tr,
8679 };
8680
8681 #ifdef CONFIG_DYNAMIC_FTRACE
8682
8683 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8684 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8685 size_t cnt, loff_t *ppos)
8686 {
8687 ssize_t ret;
8688 char *buf;
8689 int r;
8690
8691 /* 256 should be plenty to hold the amount needed */
8692 buf = kmalloc(256, GFP_KERNEL);
8693 if (!buf)
8694 return -ENOMEM;
8695
8696 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8697 ftrace_update_tot_cnt,
8698 ftrace_number_of_pages,
8699 ftrace_number_of_groups);
8700
8701 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8702 kfree(buf);
8703 return ret;
8704 }
8705
8706 static const struct file_operations tracing_dyn_info_fops = {
8707 .open = tracing_open_generic,
8708 .read = tracing_read_dyn_info,
8709 .llseek = generic_file_llseek,
8710 };
8711 #endif /* CONFIG_DYNAMIC_FTRACE */
8712
8713 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8714 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8715 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8716 struct trace_array *tr, struct ftrace_probe_ops *ops,
8717 void *data)
8718 {
8719 tracing_snapshot_instance(tr);
8720 }
8721
8722 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8723 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8724 struct trace_array *tr, struct ftrace_probe_ops *ops,
8725 void *data)
8726 {
8727 struct ftrace_func_mapper *mapper = data;
8728 long *count = NULL;
8729
8730 if (mapper)
8731 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8732
8733 if (count) {
8734
8735 if (*count <= 0)
8736 return;
8737
8738 (*count)--;
8739 }
8740
8741 tracing_snapshot_instance(tr);
8742 }
8743
8744 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8745 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8746 struct ftrace_probe_ops *ops, void *data)
8747 {
8748 struct ftrace_func_mapper *mapper = data;
8749 long *count = NULL;
8750
8751 seq_printf(m, "%ps:", (void *)ip);
8752
8753 seq_puts(m, "snapshot");
8754
8755 if (mapper)
8756 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8757
8758 if (count)
8759 seq_printf(m, ":count=%ld\n", *count);
8760 else
8761 seq_puts(m, ":unlimited\n");
8762
8763 return 0;
8764 }
8765
8766 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8767 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8768 unsigned long ip, void *init_data, void **data)
8769 {
8770 struct ftrace_func_mapper *mapper = *data;
8771
8772 if (!mapper) {
8773 mapper = allocate_ftrace_func_mapper();
8774 if (!mapper)
8775 return -ENOMEM;
8776 *data = mapper;
8777 }
8778
8779 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8780 }
8781
8782 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8783 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8784 unsigned long ip, void *data)
8785 {
8786 struct ftrace_func_mapper *mapper = data;
8787
8788 if (!ip) {
8789 if (!mapper)
8790 return;
8791 free_ftrace_func_mapper(mapper, NULL);
8792 return;
8793 }
8794
8795 ftrace_func_mapper_remove_ip(mapper, ip);
8796 }
8797
8798 static struct ftrace_probe_ops snapshot_probe_ops = {
8799 .func = ftrace_snapshot,
8800 .print = ftrace_snapshot_print,
8801 };
8802
8803 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8804 .func = ftrace_count_snapshot,
8805 .print = ftrace_snapshot_print,
8806 .init = ftrace_snapshot_init,
8807 .free = ftrace_snapshot_free,
8808 };
8809
8810 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8811 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8812 char *glob, char *cmd, char *param, int enable)
8813 {
8814 struct ftrace_probe_ops *ops;
8815 void *count = (void *)-1;
8816 char *number;
8817 int ret;
8818
8819 if (!tr)
8820 return -ENODEV;
8821
8822 /* hash funcs only work with set_ftrace_filter */
8823 if (!enable)
8824 return -EINVAL;
8825
8826 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8827
8828 if (glob[0] == '!')
8829 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8830
8831 if (!param)
8832 goto out_reg;
8833
8834 number = strsep(¶m, ":");
8835
8836 if (!strlen(number))
8837 goto out_reg;
8838
8839 /*
8840 * We use the callback data field (which is a pointer)
8841 * as our counter.
8842 */
8843 ret = kstrtoul(number, 0, (unsigned long *)&count);
8844 if (ret)
8845 return ret;
8846
8847 out_reg:
8848 ret = tracing_alloc_snapshot_instance(tr);
8849 if (ret < 0)
8850 goto out;
8851
8852 ret = register_ftrace_function_probe(glob, tr, ops, count);
8853
8854 out:
8855 return ret < 0 ? ret : 0;
8856 }
8857
8858 static struct ftrace_func_command ftrace_snapshot_cmd = {
8859 .name = "snapshot",
8860 .func = ftrace_trace_snapshot_callback,
8861 };
8862
register_snapshot_cmd(void)8863 static __init int register_snapshot_cmd(void)
8864 {
8865 return register_ftrace_command(&ftrace_snapshot_cmd);
8866 }
8867 #else
register_snapshot_cmd(void)8868 static inline __init int register_snapshot_cmd(void) { return 0; }
8869 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8870
tracing_get_dentry(struct trace_array * tr)8871 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8872 {
8873 if (WARN_ON(!tr->dir))
8874 return ERR_PTR(-ENODEV);
8875
8876 /* Top directory uses NULL as the parent */
8877 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8878 return NULL;
8879
8880 /* All sub buffers have a descriptor */
8881 return tr->dir;
8882 }
8883
tracing_dentry_percpu(struct trace_array * tr,int cpu)8884 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8885 {
8886 struct dentry *d_tracer;
8887
8888 if (tr->percpu_dir)
8889 return tr->percpu_dir;
8890
8891 d_tracer = tracing_get_dentry(tr);
8892 if (IS_ERR(d_tracer))
8893 return NULL;
8894
8895 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8896
8897 MEM_FAIL(!tr->percpu_dir,
8898 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8899
8900 return tr->percpu_dir;
8901 }
8902
8903 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8904 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8905 void *data, long cpu, const struct file_operations *fops)
8906 {
8907 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8908
8909 if (ret) /* See tracing_get_cpu() */
8910 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8911 return ret;
8912 }
8913
8914 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8915 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8916 {
8917 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8918 struct dentry *d_cpu;
8919 char cpu_dir[30]; /* 30 characters should be more than enough */
8920
8921 if (!d_percpu)
8922 return;
8923
8924 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8925 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8926 if (!d_cpu) {
8927 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8928 return;
8929 }
8930
8931 /* per cpu trace_pipe */
8932 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8933 tr, cpu, &tracing_pipe_fops);
8934
8935 /* per cpu trace */
8936 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8937 tr, cpu, &tracing_fops);
8938
8939 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8940 tr, cpu, &tracing_buffers_fops);
8941
8942 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8943 tr, cpu, &tracing_stats_fops);
8944
8945 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8946 tr, cpu, &tracing_entries_fops);
8947
8948 #ifdef CONFIG_TRACER_SNAPSHOT
8949 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8950 tr, cpu, &snapshot_fops);
8951
8952 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8953 tr, cpu, &snapshot_raw_fops);
8954 #endif
8955 }
8956
8957 #ifdef CONFIG_FTRACE_SELFTEST
8958 /* Let selftest have access to static functions in this file */
8959 #include "trace_selftest.c"
8960 #endif
8961
8962 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8963 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8964 loff_t *ppos)
8965 {
8966 struct trace_option_dentry *topt = filp->private_data;
8967 char *buf;
8968
8969 if (topt->flags->val & topt->opt->bit)
8970 buf = "1\n";
8971 else
8972 buf = "0\n";
8973
8974 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8975 }
8976
8977 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8978 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8979 loff_t *ppos)
8980 {
8981 struct trace_option_dentry *topt = filp->private_data;
8982 unsigned long val;
8983 int ret;
8984
8985 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8986 if (ret)
8987 return ret;
8988
8989 if (val != 0 && val != 1)
8990 return -EINVAL;
8991
8992 if (!!(topt->flags->val & topt->opt->bit) != val) {
8993 mutex_lock(&trace_types_lock);
8994 ret = __set_tracer_option(topt->tr, topt->flags,
8995 topt->opt, !val);
8996 mutex_unlock(&trace_types_lock);
8997 if (ret)
8998 return ret;
8999 }
9000
9001 *ppos += cnt;
9002
9003 return cnt;
9004 }
9005
tracing_open_options(struct inode * inode,struct file * filp)9006 static int tracing_open_options(struct inode *inode, struct file *filp)
9007 {
9008 struct trace_option_dentry *topt = inode->i_private;
9009 int ret;
9010
9011 ret = tracing_check_open_get_tr(topt->tr);
9012 if (ret)
9013 return ret;
9014
9015 filp->private_data = inode->i_private;
9016 return 0;
9017 }
9018
tracing_release_options(struct inode * inode,struct file * file)9019 static int tracing_release_options(struct inode *inode, struct file *file)
9020 {
9021 struct trace_option_dentry *topt = file->private_data;
9022
9023 trace_array_put(topt->tr);
9024 return 0;
9025 }
9026
9027 static const struct file_operations trace_options_fops = {
9028 .open = tracing_open_options,
9029 .read = trace_options_read,
9030 .write = trace_options_write,
9031 .llseek = generic_file_llseek,
9032 .release = tracing_release_options,
9033 };
9034
9035 /*
9036 * In order to pass in both the trace_array descriptor as well as the index
9037 * to the flag that the trace option file represents, the trace_array
9038 * has a character array of trace_flags_index[], which holds the index
9039 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9040 * The address of this character array is passed to the flag option file
9041 * read/write callbacks.
9042 *
9043 * In order to extract both the index and the trace_array descriptor,
9044 * get_tr_index() uses the following algorithm.
9045 *
9046 * idx = *ptr;
9047 *
9048 * As the pointer itself contains the address of the index (remember
9049 * index[1] == 1).
9050 *
9051 * Then to get the trace_array descriptor, by subtracting that index
9052 * from the ptr, we get to the start of the index itself.
9053 *
9054 * ptr - idx == &index[0]
9055 *
9056 * Then a simple container_of() from that pointer gets us to the
9057 * trace_array descriptor.
9058 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9059 static void get_tr_index(void *data, struct trace_array **ptr,
9060 unsigned int *pindex)
9061 {
9062 *pindex = *(unsigned char *)data;
9063
9064 *ptr = container_of(data - *pindex, struct trace_array,
9065 trace_flags_index);
9066 }
9067
9068 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9069 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9070 loff_t *ppos)
9071 {
9072 void *tr_index = filp->private_data;
9073 struct trace_array *tr;
9074 unsigned int index;
9075 char *buf;
9076
9077 get_tr_index(tr_index, &tr, &index);
9078
9079 if (tr->trace_flags & (1 << index))
9080 buf = "1\n";
9081 else
9082 buf = "0\n";
9083
9084 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9085 }
9086
9087 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9088 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9089 loff_t *ppos)
9090 {
9091 void *tr_index = filp->private_data;
9092 struct trace_array *tr;
9093 unsigned int index;
9094 unsigned long val;
9095 int ret;
9096
9097 get_tr_index(tr_index, &tr, &index);
9098
9099 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9100 if (ret)
9101 return ret;
9102
9103 if (val != 0 && val != 1)
9104 return -EINVAL;
9105
9106 mutex_lock(&event_mutex);
9107 mutex_lock(&trace_types_lock);
9108 ret = set_tracer_flag(tr, 1 << index, val);
9109 mutex_unlock(&trace_types_lock);
9110 mutex_unlock(&event_mutex);
9111
9112 if (ret < 0)
9113 return ret;
9114
9115 *ppos += cnt;
9116
9117 return cnt;
9118 }
9119
9120 static const struct file_operations trace_options_core_fops = {
9121 .open = tracing_open_generic,
9122 .read = trace_options_core_read,
9123 .write = trace_options_core_write,
9124 .llseek = generic_file_llseek,
9125 };
9126
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9127 struct dentry *trace_create_file(const char *name,
9128 umode_t mode,
9129 struct dentry *parent,
9130 void *data,
9131 const struct file_operations *fops)
9132 {
9133 struct dentry *ret;
9134
9135 ret = tracefs_create_file(name, mode, parent, data, fops);
9136 if (!ret)
9137 pr_warn("Could not create tracefs '%s' entry\n", name);
9138
9139 return ret;
9140 }
9141
9142
trace_options_init_dentry(struct trace_array * tr)9143 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9144 {
9145 struct dentry *d_tracer;
9146
9147 if (tr->options)
9148 return tr->options;
9149
9150 d_tracer = tracing_get_dentry(tr);
9151 if (IS_ERR(d_tracer))
9152 return NULL;
9153
9154 tr->options = tracefs_create_dir("options", d_tracer);
9155 if (!tr->options) {
9156 pr_warn("Could not create tracefs directory 'options'\n");
9157 return NULL;
9158 }
9159
9160 return tr->options;
9161 }
9162
9163 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9164 create_trace_option_file(struct trace_array *tr,
9165 struct trace_option_dentry *topt,
9166 struct tracer_flags *flags,
9167 struct tracer_opt *opt)
9168 {
9169 struct dentry *t_options;
9170
9171 t_options = trace_options_init_dentry(tr);
9172 if (!t_options)
9173 return;
9174
9175 topt->flags = flags;
9176 topt->opt = opt;
9177 topt->tr = tr;
9178
9179 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9180 t_options, topt, &trace_options_fops);
9181
9182 }
9183
9184 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9185 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9186 {
9187 struct trace_option_dentry *topts;
9188 struct trace_options *tr_topts;
9189 struct tracer_flags *flags;
9190 struct tracer_opt *opts;
9191 int cnt;
9192 int i;
9193
9194 if (!tracer)
9195 return;
9196
9197 flags = tracer->flags;
9198
9199 if (!flags || !flags->opts)
9200 return;
9201
9202 /*
9203 * If this is an instance, only create flags for tracers
9204 * the instance may have.
9205 */
9206 if (!trace_ok_for_array(tracer, tr))
9207 return;
9208
9209 for (i = 0; i < tr->nr_topts; i++) {
9210 /* Make sure there's no duplicate flags. */
9211 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9212 return;
9213 }
9214
9215 opts = flags->opts;
9216
9217 for (cnt = 0; opts[cnt].name; cnt++)
9218 ;
9219
9220 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9221 if (!topts)
9222 return;
9223
9224 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9225 GFP_KERNEL);
9226 if (!tr_topts) {
9227 kfree(topts);
9228 return;
9229 }
9230
9231 tr->topts = tr_topts;
9232 tr->topts[tr->nr_topts].tracer = tracer;
9233 tr->topts[tr->nr_topts].topts = topts;
9234 tr->nr_topts++;
9235
9236 for (cnt = 0; opts[cnt].name; cnt++) {
9237 create_trace_option_file(tr, &topts[cnt], flags,
9238 &opts[cnt]);
9239 MEM_FAIL(topts[cnt].entry == NULL,
9240 "Failed to create trace option: %s",
9241 opts[cnt].name);
9242 }
9243 }
9244
9245 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9246 create_trace_option_core_file(struct trace_array *tr,
9247 const char *option, long index)
9248 {
9249 struct dentry *t_options;
9250
9251 t_options = trace_options_init_dentry(tr);
9252 if (!t_options)
9253 return NULL;
9254
9255 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9256 (void *)&tr->trace_flags_index[index],
9257 &trace_options_core_fops);
9258 }
9259
create_trace_options_dir(struct trace_array * tr)9260 static void create_trace_options_dir(struct trace_array *tr)
9261 {
9262 struct dentry *t_options;
9263 bool top_level = tr == &global_trace;
9264 int i;
9265
9266 t_options = trace_options_init_dentry(tr);
9267 if (!t_options)
9268 return;
9269
9270 for (i = 0; trace_options[i]; i++) {
9271 if (top_level ||
9272 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9273 create_trace_option_core_file(tr, trace_options[i], i);
9274 }
9275 }
9276
9277 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9278 rb_simple_read(struct file *filp, char __user *ubuf,
9279 size_t cnt, loff_t *ppos)
9280 {
9281 struct trace_array *tr = filp->private_data;
9282 char buf[64];
9283 int r;
9284
9285 r = tracer_tracing_is_on(tr);
9286 r = sprintf(buf, "%d\n", r);
9287
9288 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9289 }
9290
9291 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9292 rb_simple_write(struct file *filp, const char __user *ubuf,
9293 size_t cnt, loff_t *ppos)
9294 {
9295 struct trace_array *tr = filp->private_data;
9296 struct trace_buffer *buffer = tr->array_buffer.buffer;
9297 unsigned long val;
9298 int ret;
9299
9300 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9301 if (ret)
9302 return ret;
9303
9304 if (buffer) {
9305 mutex_lock(&trace_types_lock);
9306 if (!!val == tracer_tracing_is_on(tr)) {
9307 val = 0; /* do nothing */
9308 } else if (val) {
9309 tracer_tracing_on(tr);
9310 if (tr->current_trace->start)
9311 tr->current_trace->start(tr);
9312 } else {
9313 tracer_tracing_off(tr);
9314 if (tr->current_trace->stop)
9315 tr->current_trace->stop(tr);
9316 /* Wake up any waiters */
9317 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9318 }
9319 mutex_unlock(&trace_types_lock);
9320 }
9321
9322 (*ppos)++;
9323
9324 return cnt;
9325 }
9326
9327 static const struct file_operations rb_simple_fops = {
9328 .open = tracing_open_generic_tr,
9329 .read = rb_simple_read,
9330 .write = rb_simple_write,
9331 .release = tracing_release_generic_tr,
9332 .llseek = default_llseek,
9333 };
9334
9335 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9336 buffer_percent_read(struct file *filp, char __user *ubuf,
9337 size_t cnt, loff_t *ppos)
9338 {
9339 struct trace_array *tr = filp->private_data;
9340 char buf[64];
9341 int r;
9342
9343 r = tr->buffer_percent;
9344 r = sprintf(buf, "%d\n", r);
9345
9346 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9347 }
9348
9349 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9350 buffer_percent_write(struct file *filp, const char __user *ubuf,
9351 size_t cnt, loff_t *ppos)
9352 {
9353 struct trace_array *tr = filp->private_data;
9354 unsigned long val;
9355 int ret;
9356
9357 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9358 if (ret)
9359 return ret;
9360
9361 if (val > 100)
9362 return -EINVAL;
9363
9364 tr->buffer_percent = val;
9365
9366 (*ppos)++;
9367
9368 return cnt;
9369 }
9370
9371 static const struct file_operations buffer_percent_fops = {
9372 .open = tracing_open_generic_tr,
9373 .read = buffer_percent_read,
9374 .write = buffer_percent_write,
9375 .release = tracing_release_generic_tr,
9376 .llseek = default_llseek,
9377 };
9378
9379 static struct dentry *trace_instance_dir;
9380
9381 static void
9382 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9383
9384 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9385 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9386 {
9387 enum ring_buffer_flags rb_flags;
9388
9389 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9390
9391 buf->tr = tr;
9392
9393 buf->buffer = ring_buffer_alloc(size, rb_flags);
9394 if (!buf->buffer)
9395 return -ENOMEM;
9396
9397 buf->data = alloc_percpu(struct trace_array_cpu);
9398 if (!buf->data) {
9399 ring_buffer_free(buf->buffer);
9400 buf->buffer = NULL;
9401 return -ENOMEM;
9402 }
9403
9404 /* Allocate the first page for all buffers */
9405 set_buffer_entries(&tr->array_buffer,
9406 ring_buffer_size(tr->array_buffer.buffer, 0));
9407
9408 return 0;
9409 }
9410
free_trace_buffer(struct array_buffer * buf)9411 static void free_trace_buffer(struct array_buffer *buf)
9412 {
9413 if (buf->buffer) {
9414 ring_buffer_free(buf->buffer);
9415 buf->buffer = NULL;
9416 free_percpu(buf->data);
9417 buf->data = NULL;
9418 }
9419 }
9420
allocate_trace_buffers(struct trace_array * tr,int size)9421 static int allocate_trace_buffers(struct trace_array *tr, int size)
9422 {
9423 int ret;
9424
9425 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9426 if (ret)
9427 return ret;
9428
9429 #ifdef CONFIG_TRACER_MAX_TRACE
9430 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9431 allocate_snapshot ? size : 1);
9432 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9433 free_trace_buffer(&tr->array_buffer);
9434 return -ENOMEM;
9435 }
9436 tr->allocated_snapshot = allocate_snapshot;
9437
9438 allocate_snapshot = false;
9439 #endif
9440
9441 return 0;
9442 }
9443
free_trace_buffers(struct trace_array * tr)9444 static void free_trace_buffers(struct trace_array *tr)
9445 {
9446 if (!tr)
9447 return;
9448
9449 free_trace_buffer(&tr->array_buffer);
9450
9451 #ifdef CONFIG_TRACER_MAX_TRACE
9452 free_trace_buffer(&tr->max_buffer);
9453 #endif
9454 }
9455
init_trace_flags_index(struct trace_array * tr)9456 static void init_trace_flags_index(struct trace_array *tr)
9457 {
9458 int i;
9459
9460 /* Used by the trace options files */
9461 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9462 tr->trace_flags_index[i] = i;
9463 }
9464
__update_tracer_options(struct trace_array * tr)9465 static void __update_tracer_options(struct trace_array *tr)
9466 {
9467 struct tracer *t;
9468
9469 for (t = trace_types; t; t = t->next)
9470 add_tracer_options(tr, t);
9471 }
9472
update_tracer_options(struct trace_array * tr)9473 static void update_tracer_options(struct trace_array *tr)
9474 {
9475 mutex_lock(&trace_types_lock);
9476 tracer_options_updated = true;
9477 __update_tracer_options(tr);
9478 mutex_unlock(&trace_types_lock);
9479 }
9480
9481 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9482 struct trace_array *trace_array_find(const char *instance)
9483 {
9484 struct trace_array *tr, *found = NULL;
9485
9486 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9487 if (tr->name && strcmp(tr->name, instance) == 0) {
9488 found = tr;
9489 break;
9490 }
9491 }
9492
9493 return found;
9494 }
9495
trace_array_find_get(const char * instance)9496 struct trace_array *trace_array_find_get(const char *instance)
9497 {
9498 struct trace_array *tr;
9499
9500 mutex_lock(&trace_types_lock);
9501 tr = trace_array_find(instance);
9502 if (tr)
9503 tr->ref++;
9504 mutex_unlock(&trace_types_lock);
9505
9506 return tr;
9507 }
9508
trace_array_create_dir(struct trace_array * tr)9509 static int trace_array_create_dir(struct trace_array *tr)
9510 {
9511 int ret;
9512
9513 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9514 if (!tr->dir)
9515 return -EINVAL;
9516
9517 ret = event_trace_add_tracer(tr->dir, tr);
9518 if (ret) {
9519 tracefs_remove(tr->dir);
9520 return ret;
9521 }
9522
9523 init_tracer_tracefs(tr, tr->dir);
9524 __update_tracer_options(tr);
9525
9526 return ret;
9527 }
9528
trace_array_create(const char * name)9529 static struct trace_array *trace_array_create(const char *name)
9530 {
9531 struct trace_array *tr;
9532 int ret;
9533
9534 ret = -ENOMEM;
9535 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9536 if (!tr)
9537 return ERR_PTR(ret);
9538
9539 tr->name = kstrdup(name, GFP_KERNEL);
9540 if (!tr->name)
9541 goto out_free_tr;
9542
9543 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9544 goto out_free_tr;
9545
9546 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9547 goto out_free_tr;
9548
9549 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9550
9551 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9552
9553 raw_spin_lock_init(&tr->start_lock);
9554
9555 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9556
9557 tr->current_trace = &nop_trace;
9558
9559 INIT_LIST_HEAD(&tr->systems);
9560 INIT_LIST_HEAD(&tr->events);
9561 INIT_LIST_HEAD(&tr->hist_vars);
9562 INIT_LIST_HEAD(&tr->err_log);
9563
9564 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9565 goto out_free_tr;
9566
9567 if (ftrace_allocate_ftrace_ops(tr) < 0)
9568 goto out_free_tr;
9569
9570 ftrace_init_trace_array(tr);
9571
9572 init_trace_flags_index(tr);
9573
9574 if (trace_instance_dir) {
9575 ret = trace_array_create_dir(tr);
9576 if (ret)
9577 goto out_free_tr;
9578 } else
9579 __trace_early_add_events(tr);
9580
9581 list_add(&tr->list, &ftrace_trace_arrays);
9582
9583 tr->ref++;
9584
9585 return tr;
9586
9587 out_free_tr:
9588 ftrace_free_ftrace_ops(tr);
9589 free_trace_buffers(tr);
9590 free_cpumask_var(tr->pipe_cpumask);
9591 free_cpumask_var(tr->tracing_cpumask);
9592 kfree(tr->name);
9593 kfree(tr);
9594
9595 return ERR_PTR(ret);
9596 }
9597
instance_mkdir(const char * name)9598 static int instance_mkdir(const char *name)
9599 {
9600 struct trace_array *tr;
9601 int ret;
9602
9603 mutex_lock(&event_mutex);
9604 mutex_lock(&trace_types_lock);
9605
9606 ret = -EEXIST;
9607 if (trace_array_find(name))
9608 goto out_unlock;
9609
9610 tr = trace_array_create(name);
9611
9612 ret = PTR_ERR_OR_ZERO(tr);
9613
9614 out_unlock:
9615 mutex_unlock(&trace_types_lock);
9616 mutex_unlock(&event_mutex);
9617 return ret;
9618 }
9619
9620 /**
9621 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9622 * @name: The name of the trace array to be looked up/created.
9623 *
9624 * Returns pointer to trace array with given name.
9625 * NULL, if it cannot be created.
9626 *
9627 * NOTE: This function increments the reference counter associated with the
9628 * trace array returned. This makes sure it cannot be freed while in use.
9629 * Use trace_array_put() once the trace array is no longer needed.
9630 * If the trace_array is to be freed, trace_array_destroy() needs to
9631 * be called after the trace_array_put(), or simply let user space delete
9632 * it from the tracefs instances directory. But until the
9633 * trace_array_put() is called, user space can not delete it.
9634 *
9635 */
trace_array_get_by_name(const char * name)9636 struct trace_array *trace_array_get_by_name(const char *name)
9637 {
9638 struct trace_array *tr;
9639
9640 mutex_lock(&event_mutex);
9641 mutex_lock(&trace_types_lock);
9642
9643 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9644 if (tr->name && strcmp(tr->name, name) == 0)
9645 goto out_unlock;
9646 }
9647
9648 tr = trace_array_create(name);
9649
9650 if (IS_ERR(tr))
9651 tr = NULL;
9652 out_unlock:
9653 if (tr)
9654 tr->ref++;
9655
9656 mutex_unlock(&trace_types_lock);
9657 mutex_unlock(&event_mutex);
9658 return tr;
9659 }
9660 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9661
__remove_instance(struct trace_array * tr)9662 static int __remove_instance(struct trace_array *tr)
9663 {
9664 int i;
9665
9666 /* Reference counter for a newly created trace array = 1. */
9667 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9668 return -EBUSY;
9669
9670 list_del(&tr->list);
9671
9672 /* Disable all the flags that were enabled coming in */
9673 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9674 if ((1 << i) & ZEROED_TRACE_FLAGS)
9675 set_tracer_flag(tr, 1 << i, 0);
9676 }
9677
9678 tracing_set_nop(tr);
9679 clear_ftrace_function_probes(tr);
9680 event_trace_del_tracer(tr);
9681 ftrace_clear_pids(tr);
9682 ftrace_destroy_function_files(tr);
9683 tracefs_remove(tr->dir);
9684 free_percpu(tr->last_func_repeats);
9685 free_trace_buffers(tr);
9686 clear_tracing_err_log(tr);
9687
9688 for (i = 0; i < tr->nr_topts; i++) {
9689 kfree(tr->topts[i].topts);
9690 }
9691 kfree(tr->topts);
9692
9693 free_cpumask_var(tr->pipe_cpumask);
9694 free_cpumask_var(tr->tracing_cpumask);
9695 kfree(tr->name);
9696 kfree(tr);
9697
9698 return 0;
9699 }
9700
trace_array_destroy(struct trace_array * this_tr)9701 int trace_array_destroy(struct trace_array *this_tr)
9702 {
9703 struct trace_array *tr;
9704 int ret;
9705
9706 if (!this_tr)
9707 return -EINVAL;
9708
9709 mutex_lock(&event_mutex);
9710 mutex_lock(&trace_types_lock);
9711
9712 ret = -ENODEV;
9713
9714 /* Making sure trace array exists before destroying it. */
9715 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9716 if (tr == this_tr) {
9717 ret = __remove_instance(tr);
9718 break;
9719 }
9720 }
9721
9722 mutex_unlock(&trace_types_lock);
9723 mutex_unlock(&event_mutex);
9724
9725 return ret;
9726 }
9727 EXPORT_SYMBOL_GPL(trace_array_destroy);
9728
instance_rmdir(const char * name)9729 static int instance_rmdir(const char *name)
9730 {
9731 struct trace_array *tr;
9732 int ret;
9733
9734 mutex_lock(&event_mutex);
9735 mutex_lock(&trace_types_lock);
9736
9737 ret = -ENODEV;
9738 tr = trace_array_find(name);
9739 if (tr)
9740 ret = __remove_instance(tr);
9741
9742 mutex_unlock(&trace_types_lock);
9743 mutex_unlock(&event_mutex);
9744
9745 return ret;
9746 }
9747
create_trace_instances(struct dentry * d_tracer)9748 static __init void create_trace_instances(struct dentry *d_tracer)
9749 {
9750 struct trace_array *tr;
9751
9752 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9753 instance_mkdir,
9754 instance_rmdir);
9755 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9756 return;
9757
9758 mutex_lock(&event_mutex);
9759 mutex_lock(&trace_types_lock);
9760
9761 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9762 if (!tr->name)
9763 continue;
9764 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9765 "Failed to create instance directory\n"))
9766 break;
9767 }
9768
9769 mutex_unlock(&trace_types_lock);
9770 mutex_unlock(&event_mutex);
9771 }
9772
9773 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9774 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9775 {
9776 int cpu;
9777
9778 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9779 tr, &show_traces_fops);
9780
9781 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9782 tr, &set_tracer_fops);
9783
9784 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9785 tr, &tracing_cpumask_fops);
9786
9787 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9788 tr, &tracing_iter_fops);
9789
9790 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9791 tr, &tracing_fops);
9792
9793 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9794 tr, &tracing_pipe_fops);
9795
9796 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9797 tr, &tracing_entries_fops);
9798
9799 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9800 tr, &tracing_total_entries_fops);
9801
9802 trace_create_file("free_buffer", 0200, d_tracer,
9803 tr, &tracing_free_buffer_fops);
9804
9805 trace_create_file("trace_marker", 0220, d_tracer,
9806 tr, &tracing_mark_fops);
9807
9808 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9809
9810 trace_create_file("trace_marker_raw", 0220, d_tracer,
9811 tr, &tracing_mark_raw_fops);
9812
9813 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9814 &trace_clock_fops);
9815
9816 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9817 tr, &rb_simple_fops);
9818
9819 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9820 &trace_time_stamp_mode_fops);
9821
9822 tr->buffer_percent = 50;
9823
9824 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9825 tr, &buffer_percent_fops);
9826
9827 create_trace_options_dir(tr);
9828
9829 #ifdef CONFIG_TRACER_MAX_TRACE
9830 trace_create_maxlat_file(tr, d_tracer);
9831 #endif
9832
9833 if (ftrace_create_function_files(tr, d_tracer))
9834 MEM_FAIL(1, "Could not allocate function filter files");
9835
9836 #ifdef CONFIG_TRACER_SNAPSHOT
9837 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9838 tr, &snapshot_fops);
9839 #endif
9840
9841 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9842 tr, &tracing_err_log_fops);
9843
9844 for_each_tracing_cpu(cpu)
9845 tracing_init_tracefs_percpu(tr, cpu);
9846
9847 ftrace_init_tracefs(tr, d_tracer);
9848 }
9849
trace_automount(struct dentry * mntpt,void * ingore)9850 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9851 {
9852 struct vfsmount *mnt;
9853 struct file_system_type *type;
9854
9855 /*
9856 * To maintain backward compatibility for tools that mount
9857 * debugfs to get to the tracing facility, tracefs is automatically
9858 * mounted to the debugfs/tracing directory.
9859 */
9860 type = get_fs_type("tracefs");
9861 if (!type)
9862 return NULL;
9863 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9864 put_filesystem(type);
9865 if (IS_ERR(mnt))
9866 return NULL;
9867 mntget(mnt);
9868
9869 return mnt;
9870 }
9871
9872 /**
9873 * tracing_init_dentry - initialize top level trace array
9874 *
9875 * This is called when creating files or directories in the tracing
9876 * directory. It is called via fs_initcall() by any of the boot up code
9877 * and expects to return the dentry of the top level tracing directory.
9878 */
tracing_init_dentry(void)9879 int tracing_init_dentry(void)
9880 {
9881 struct trace_array *tr = &global_trace;
9882
9883 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9884 pr_warn("Tracing disabled due to lockdown\n");
9885 return -EPERM;
9886 }
9887
9888 /* The top level trace array uses NULL as parent */
9889 if (tr->dir)
9890 return 0;
9891
9892 if (WARN_ON(!tracefs_initialized()))
9893 return -ENODEV;
9894
9895 /*
9896 * As there may still be users that expect the tracing
9897 * files to exist in debugfs/tracing, we must automount
9898 * the tracefs file system there, so older tools still
9899 * work with the newer kernel.
9900 */
9901 tr->dir = debugfs_create_automount("tracing", NULL,
9902 trace_automount, NULL);
9903
9904 return 0;
9905 }
9906
9907 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9908 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9909
9910 static struct workqueue_struct *eval_map_wq __initdata;
9911 static struct work_struct eval_map_work __initdata;
9912 static struct work_struct tracerfs_init_work __initdata;
9913
eval_map_work_func(struct work_struct * work)9914 static void __init eval_map_work_func(struct work_struct *work)
9915 {
9916 int len;
9917
9918 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9919 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9920 }
9921
trace_eval_init(void)9922 static int __init trace_eval_init(void)
9923 {
9924 INIT_WORK(&eval_map_work, eval_map_work_func);
9925
9926 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9927 if (!eval_map_wq) {
9928 pr_err("Unable to allocate eval_map_wq\n");
9929 /* Do work here */
9930 eval_map_work_func(&eval_map_work);
9931 return -ENOMEM;
9932 }
9933
9934 queue_work(eval_map_wq, &eval_map_work);
9935 return 0;
9936 }
9937
9938 subsys_initcall(trace_eval_init);
9939
trace_eval_sync(void)9940 static int __init trace_eval_sync(void)
9941 {
9942 /* Make sure the eval map updates are finished */
9943 if (eval_map_wq)
9944 destroy_workqueue(eval_map_wq);
9945 return 0;
9946 }
9947
9948 late_initcall_sync(trace_eval_sync);
9949
9950
9951 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9952 static void trace_module_add_evals(struct module *mod)
9953 {
9954 if (!mod->num_trace_evals)
9955 return;
9956
9957 /*
9958 * Modules with bad taint do not have events created, do
9959 * not bother with enums either.
9960 */
9961 if (trace_module_has_bad_taint(mod))
9962 return;
9963
9964 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9965 }
9966
9967 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9968 static void trace_module_remove_evals(struct module *mod)
9969 {
9970 union trace_eval_map_item *map;
9971 union trace_eval_map_item **last = &trace_eval_maps;
9972
9973 if (!mod->num_trace_evals)
9974 return;
9975
9976 mutex_lock(&trace_eval_mutex);
9977
9978 map = trace_eval_maps;
9979
9980 while (map) {
9981 if (map->head.mod == mod)
9982 break;
9983 map = trace_eval_jmp_to_tail(map);
9984 last = &map->tail.next;
9985 map = map->tail.next;
9986 }
9987 if (!map)
9988 goto out;
9989
9990 *last = trace_eval_jmp_to_tail(map)->tail.next;
9991 kfree(map);
9992 out:
9993 mutex_unlock(&trace_eval_mutex);
9994 }
9995 #else
trace_module_remove_evals(struct module * mod)9996 static inline void trace_module_remove_evals(struct module *mod) { }
9997 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9998
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9999 static int trace_module_notify(struct notifier_block *self,
10000 unsigned long val, void *data)
10001 {
10002 struct module *mod = data;
10003
10004 switch (val) {
10005 case MODULE_STATE_COMING:
10006 trace_module_add_evals(mod);
10007 break;
10008 case MODULE_STATE_GOING:
10009 trace_module_remove_evals(mod);
10010 break;
10011 }
10012
10013 return NOTIFY_OK;
10014 }
10015
10016 static struct notifier_block trace_module_nb = {
10017 .notifier_call = trace_module_notify,
10018 .priority = 0,
10019 };
10020 #endif /* CONFIG_MODULES */
10021
tracer_init_tracefs_work_func(struct work_struct * work)10022 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10023 {
10024
10025 event_trace_init();
10026
10027 init_tracer_tracefs(&global_trace, NULL);
10028 ftrace_init_tracefs_toplevel(&global_trace, NULL);
10029
10030 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10031 &global_trace, &tracing_thresh_fops);
10032
10033 trace_create_file("README", TRACE_MODE_READ, NULL,
10034 NULL, &tracing_readme_fops);
10035
10036 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10037 NULL, &tracing_saved_cmdlines_fops);
10038
10039 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10040 NULL, &tracing_saved_cmdlines_size_fops);
10041
10042 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10043 NULL, &tracing_saved_tgids_fops);
10044
10045 trace_create_eval_file(NULL);
10046
10047 #ifdef CONFIG_MODULES
10048 register_module_notifier(&trace_module_nb);
10049 #endif
10050
10051 #ifdef CONFIG_DYNAMIC_FTRACE
10052 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10053 NULL, &tracing_dyn_info_fops);
10054 #endif
10055
10056 create_trace_instances(NULL);
10057
10058 update_tracer_options(&global_trace);
10059 }
10060
tracer_init_tracefs(void)10061 static __init int tracer_init_tracefs(void)
10062 {
10063 int ret;
10064
10065 trace_access_lock_init();
10066
10067 ret = tracing_init_dentry();
10068 if (ret)
10069 return 0;
10070
10071 if (eval_map_wq) {
10072 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10073 queue_work(eval_map_wq, &tracerfs_init_work);
10074 } else {
10075 tracer_init_tracefs_work_func(NULL);
10076 }
10077
10078 rv_init_interface();
10079
10080 return 0;
10081 }
10082
10083 fs_initcall(tracer_init_tracefs);
10084
10085 static int trace_die_panic_handler(struct notifier_block *self,
10086 unsigned long ev, void *unused);
10087
10088 static struct notifier_block trace_panic_notifier = {
10089 .notifier_call = trace_die_panic_handler,
10090 .priority = INT_MAX - 1,
10091 };
10092
10093 static struct notifier_block trace_die_notifier = {
10094 .notifier_call = trace_die_panic_handler,
10095 .priority = INT_MAX - 1,
10096 };
10097
10098 /*
10099 * The idea is to execute the following die/panic callback early, in order
10100 * to avoid showing irrelevant information in the trace (like other panic
10101 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10102 * warnings get disabled (to prevent potential log flooding).
10103 */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10104 static int trace_die_panic_handler(struct notifier_block *self,
10105 unsigned long ev, void *unused)
10106 {
10107 if (!ftrace_dump_on_oops)
10108 return NOTIFY_DONE;
10109
10110 /* The die notifier requires DIE_OOPS to trigger */
10111 if (self == &trace_die_notifier && ev != DIE_OOPS)
10112 return NOTIFY_DONE;
10113
10114 ftrace_dump(ftrace_dump_on_oops);
10115
10116 return NOTIFY_DONE;
10117 }
10118
10119 /*
10120 * printk is set to max of 1024, we really don't need it that big.
10121 * Nothing should be printing 1000 characters anyway.
10122 */
10123 #define TRACE_MAX_PRINT 1000
10124
10125 /*
10126 * Define here KERN_TRACE so that we have one place to modify
10127 * it if we decide to change what log level the ftrace dump
10128 * should be at.
10129 */
10130 #define KERN_TRACE KERN_EMERG
10131
10132 void
trace_printk_seq(struct trace_seq * s)10133 trace_printk_seq(struct trace_seq *s)
10134 {
10135 /* Probably should print a warning here. */
10136 if (s->seq.len >= TRACE_MAX_PRINT)
10137 s->seq.len = TRACE_MAX_PRINT;
10138
10139 /*
10140 * More paranoid code. Although the buffer size is set to
10141 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10142 * an extra layer of protection.
10143 */
10144 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10145 s->seq.len = s->seq.size - 1;
10146
10147 /* should be zero ended, but we are paranoid. */
10148 s->buffer[s->seq.len] = 0;
10149
10150 printk(KERN_TRACE "%s", s->buffer);
10151
10152 trace_seq_init(s);
10153 }
10154
trace_init_global_iter(struct trace_iterator * iter)10155 void trace_init_global_iter(struct trace_iterator *iter)
10156 {
10157 iter->tr = &global_trace;
10158 iter->trace = iter->tr->current_trace;
10159 iter->cpu_file = RING_BUFFER_ALL_CPUS;
10160 iter->array_buffer = &global_trace.array_buffer;
10161
10162 if (iter->trace && iter->trace->open)
10163 iter->trace->open(iter);
10164
10165 /* Annotate start of buffers if we had overruns */
10166 if (ring_buffer_overruns(iter->array_buffer->buffer))
10167 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10168
10169 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10170 if (trace_clocks[iter->tr->clock_id].in_ns)
10171 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10172
10173 /* Can not use kmalloc for iter.temp and iter.fmt */
10174 iter->temp = static_temp_buf;
10175 iter->temp_size = STATIC_TEMP_BUF_SIZE;
10176 iter->fmt = static_fmt_buf;
10177 iter->fmt_size = STATIC_FMT_BUF_SIZE;
10178 }
10179
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10180 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10181 {
10182 /* use static because iter can be a bit big for the stack */
10183 static struct trace_iterator iter;
10184 static atomic_t dump_running;
10185 struct trace_array *tr = &global_trace;
10186 unsigned int old_userobj;
10187 unsigned long flags;
10188 int cnt = 0, cpu;
10189
10190 /* Only allow one dump user at a time. */
10191 if (atomic_inc_return(&dump_running) != 1) {
10192 atomic_dec(&dump_running);
10193 return;
10194 }
10195
10196 /*
10197 * Always turn off tracing when we dump.
10198 * We don't need to show trace output of what happens
10199 * between multiple crashes.
10200 *
10201 * If the user does a sysrq-z, then they can re-enable
10202 * tracing with echo 1 > tracing_on.
10203 */
10204 tracing_off();
10205
10206 local_irq_save(flags);
10207
10208 /* Simulate the iterator */
10209 trace_init_global_iter(&iter);
10210
10211 for_each_tracing_cpu(cpu) {
10212 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10213 }
10214
10215 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10216
10217 /* don't look at user memory in panic mode */
10218 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10219
10220 switch (oops_dump_mode) {
10221 case DUMP_ALL:
10222 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10223 break;
10224 case DUMP_ORIG:
10225 iter.cpu_file = raw_smp_processor_id();
10226 break;
10227 case DUMP_NONE:
10228 goto out_enable;
10229 default:
10230 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10231 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10232 }
10233
10234 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10235
10236 /* Did function tracer already get disabled? */
10237 if (ftrace_is_dead()) {
10238 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10239 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10240 }
10241
10242 /*
10243 * We need to stop all tracing on all CPUS to read
10244 * the next buffer. This is a bit expensive, but is
10245 * not done often. We fill all what we can read,
10246 * and then release the locks again.
10247 */
10248
10249 while (!trace_empty(&iter)) {
10250
10251 if (!cnt)
10252 printk(KERN_TRACE "---------------------------------\n");
10253
10254 cnt++;
10255
10256 trace_iterator_reset(&iter);
10257 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10258
10259 if (trace_find_next_entry_inc(&iter) != NULL) {
10260 int ret;
10261
10262 ret = print_trace_line(&iter);
10263 if (ret != TRACE_TYPE_NO_CONSUME)
10264 trace_consume(&iter);
10265 }
10266 touch_nmi_watchdog();
10267
10268 trace_printk_seq(&iter.seq);
10269 }
10270
10271 if (!cnt)
10272 printk(KERN_TRACE " (ftrace buffer empty)\n");
10273 else
10274 printk(KERN_TRACE "---------------------------------\n");
10275
10276 out_enable:
10277 tr->trace_flags |= old_userobj;
10278
10279 for_each_tracing_cpu(cpu) {
10280 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10281 }
10282 atomic_dec(&dump_running);
10283 local_irq_restore(flags);
10284 }
10285 EXPORT_SYMBOL_GPL(ftrace_dump);
10286
10287 #define WRITE_BUFSIZE 4096
10288
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10289 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10290 size_t count, loff_t *ppos,
10291 int (*createfn)(const char *))
10292 {
10293 char *kbuf, *buf, *tmp;
10294 int ret = 0;
10295 size_t done = 0;
10296 size_t size;
10297
10298 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10299 if (!kbuf)
10300 return -ENOMEM;
10301
10302 while (done < count) {
10303 size = count - done;
10304
10305 if (size >= WRITE_BUFSIZE)
10306 size = WRITE_BUFSIZE - 1;
10307
10308 if (copy_from_user(kbuf, buffer + done, size)) {
10309 ret = -EFAULT;
10310 goto out;
10311 }
10312 kbuf[size] = '\0';
10313 buf = kbuf;
10314 do {
10315 tmp = strchr(buf, '\n');
10316 if (tmp) {
10317 *tmp = '\0';
10318 size = tmp - buf + 1;
10319 } else {
10320 size = strlen(buf);
10321 if (done + size < count) {
10322 if (buf != kbuf)
10323 break;
10324 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10325 pr_warn("Line length is too long: Should be less than %d\n",
10326 WRITE_BUFSIZE - 2);
10327 ret = -EINVAL;
10328 goto out;
10329 }
10330 }
10331 done += size;
10332
10333 /* Remove comments */
10334 tmp = strchr(buf, '#');
10335
10336 if (tmp)
10337 *tmp = '\0';
10338
10339 ret = createfn(buf);
10340 if (ret)
10341 goto out;
10342 buf += size;
10343
10344 } while (done < count);
10345 }
10346 ret = done;
10347
10348 out:
10349 kfree(kbuf);
10350
10351 return ret;
10352 }
10353
10354 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10355 __init static bool tr_needs_alloc_snapshot(const char *name)
10356 {
10357 char *test;
10358 int len = strlen(name);
10359 bool ret;
10360
10361 if (!boot_snapshot_index)
10362 return false;
10363
10364 if (strncmp(name, boot_snapshot_info, len) == 0 &&
10365 boot_snapshot_info[len] == '\t')
10366 return true;
10367
10368 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10369 if (!test)
10370 return false;
10371
10372 sprintf(test, "\t%s\t", name);
10373 ret = strstr(boot_snapshot_info, test) == NULL;
10374 kfree(test);
10375 return ret;
10376 }
10377
do_allocate_snapshot(const char * name)10378 __init static void do_allocate_snapshot(const char *name)
10379 {
10380 if (!tr_needs_alloc_snapshot(name))
10381 return;
10382
10383 /*
10384 * When allocate_snapshot is set, the next call to
10385 * allocate_trace_buffers() (called by trace_array_get_by_name())
10386 * will allocate the snapshot buffer. That will alse clear
10387 * this flag.
10388 */
10389 allocate_snapshot = true;
10390 }
10391 #else
do_allocate_snapshot(const char * name)10392 static inline void do_allocate_snapshot(const char *name) { }
10393 #endif
10394
enable_instances(void)10395 __init static void enable_instances(void)
10396 {
10397 struct trace_array *tr;
10398 char *curr_str;
10399 char *str;
10400 char *tok;
10401
10402 /* A tab is always appended */
10403 boot_instance_info[boot_instance_index - 1] = '\0';
10404 str = boot_instance_info;
10405
10406 while ((curr_str = strsep(&str, "\t"))) {
10407
10408 tok = strsep(&curr_str, ",");
10409
10410 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10411 do_allocate_snapshot(tok);
10412
10413 tr = trace_array_get_by_name(tok);
10414 if (!tr) {
10415 pr_warn("Failed to create instance buffer %s\n", curr_str);
10416 continue;
10417 }
10418 /* Allow user space to delete it */
10419 trace_array_put(tr);
10420
10421 while ((tok = strsep(&curr_str, ","))) {
10422 early_enable_events(tr, tok, true);
10423 }
10424 }
10425 }
10426
tracer_alloc_buffers(void)10427 __init static int tracer_alloc_buffers(void)
10428 {
10429 int ring_buf_size;
10430 int ret = -ENOMEM;
10431
10432
10433 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10434 pr_warn("Tracing disabled due to lockdown\n");
10435 return -EPERM;
10436 }
10437
10438 /*
10439 * Make sure we don't accidentally add more trace options
10440 * than we have bits for.
10441 */
10442 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10443
10444 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10445 goto out;
10446
10447 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10448 goto out_free_buffer_mask;
10449
10450 /* Only allocate trace_printk buffers if a trace_printk exists */
10451 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10452 /* Must be called before global_trace.buffer is allocated */
10453 trace_printk_init_buffers();
10454
10455 /* To save memory, keep the ring buffer size to its minimum */
10456 if (ring_buffer_expanded)
10457 ring_buf_size = trace_buf_size;
10458 else
10459 ring_buf_size = 1;
10460
10461 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10462 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10463
10464 raw_spin_lock_init(&global_trace.start_lock);
10465
10466 /*
10467 * The prepare callbacks allocates some memory for the ring buffer. We
10468 * don't free the buffer if the CPU goes down. If we were to free
10469 * the buffer, then the user would lose any trace that was in the
10470 * buffer. The memory will be removed once the "instance" is removed.
10471 */
10472 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10473 "trace/RB:prepare", trace_rb_cpu_prepare,
10474 NULL);
10475 if (ret < 0)
10476 goto out_free_cpumask;
10477 /* Used for event triggers */
10478 ret = -ENOMEM;
10479 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10480 if (!temp_buffer)
10481 goto out_rm_hp_state;
10482
10483 if (trace_create_savedcmd() < 0)
10484 goto out_free_temp_buffer;
10485
10486 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10487 goto out_free_savedcmd;
10488
10489 /* TODO: make the number of buffers hot pluggable with CPUS */
10490 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10491 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10492 goto out_free_pipe_cpumask;
10493 }
10494 if (global_trace.buffer_disabled)
10495 tracing_off();
10496
10497 if (trace_boot_clock) {
10498 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10499 if (ret < 0)
10500 pr_warn("Trace clock %s not defined, going back to default\n",
10501 trace_boot_clock);
10502 }
10503
10504 /*
10505 * register_tracer() might reference current_trace, so it
10506 * needs to be set before we register anything. This is
10507 * just a bootstrap of current_trace anyway.
10508 */
10509 global_trace.current_trace = &nop_trace;
10510
10511 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10512
10513 ftrace_init_global_array_ops(&global_trace);
10514
10515 init_trace_flags_index(&global_trace);
10516
10517 register_tracer(&nop_trace);
10518
10519 /* Function tracing may start here (via kernel command line) */
10520 init_function_trace();
10521
10522 /* All seems OK, enable tracing */
10523 tracing_disabled = 0;
10524
10525 atomic_notifier_chain_register(&panic_notifier_list,
10526 &trace_panic_notifier);
10527
10528 register_die_notifier(&trace_die_notifier);
10529
10530 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10531
10532 INIT_LIST_HEAD(&global_trace.systems);
10533 INIT_LIST_HEAD(&global_trace.events);
10534 INIT_LIST_HEAD(&global_trace.hist_vars);
10535 INIT_LIST_HEAD(&global_trace.err_log);
10536 list_add(&global_trace.list, &ftrace_trace_arrays);
10537
10538 apply_trace_boot_options();
10539
10540 register_snapshot_cmd();
10541
10542 test_can_verify();
10543
10544 return 0;
10545
10546 out_free_pipe_cpumask:
10547 free_cpumask_var(global_trace.pipe_cpumask);
10548 out_free_savedcmd:
10549 free_saved_cmdlines_buffer(savedcmd);
10550 out_free_temp_buffer:
10551 ring_buffer_free(temp_buffer);
10552 out_rm_hp_state:
10553 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10554 out_free_cpumask:
10555 free_cpumask_var(global_trace.tracing_cpumask);
10556 out_free_buffer_mask:
10557 free_cpumask_var(tracing_buffer_mask);
10558 out:
10559 return ret;
10560 }
10561
ftrace_boot_snapshot(void)10562 void __init ftrace_boot_snapshot(void)
10563 {
10564 #ifdef CONFIG_TRACER_MAX_TRACE
10565 struct trace_array *tr;
10566
10567 if (!snapshot_at_boot)
10568 return;
10569
10570 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10571 if (!tr->allocated_snapshot)
10572 continue;
10573
10574 tracing_snapshot_instance(tr);
10575 trace_array_puts(tr, "** Boot snapshot taken **\n");
10576 }
10577 #endif
10578 }
10579
early_trace_init(void)10580 void __init early_trace_init(void)
10581 {
10582 if (tracepoint_printk) {
10583 tracepoint_print_iter =
10584 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10585 if (MEM_FAIL(!tracepoint_print_iter,
10586 "Failed to allocate trace iterator\n"))
10587 tracepoint_printk = 0;
10588 else
10589 static_key_enable(&tracepoint_printk_key.key);
10590 }
10591 tracer_alloc_buffers();
10592
10593 init_events();
10594 }
10595
trace_init(void)10596 void __init trace_init(void)
10597 {
10598 trace_event_init();
10599
10600 if (boot_instance_index)
10601 enable_instances();
10602 }
10603
clear_boot_tracer(void)10604 __init static void clear_boot_tracer(void)
10605 {
10606 /*
10607 * The default tracer at boot buffer is an init section.
10608 * This function is called in lateinit. If we did not
10609 * find the boot tracer, then clear it out, to prevent
10610 * later registration from accessing the buffer that is
10611 * about to be freed.
10612 */
10613 if (!default_bootup_tracer)
10614 return;
10615
10616 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10617 default_bootup_tracer);
10618 default_bootup_tracer = NULL;
10619 }
10620
10621 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10622 __init static void tracing_set_default_clock(void)
10623 {
10624 /* sched_clock_stable() is determined in late_initcall */
10625 if (!trace_boot_clock && !sched_clock_stable()) {
10626 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10627 pr_warn("Can not set tracing clock due to lockdown\n");
10628 return;
10629 }
10630
10631 printk(KERN_WARNING
10632 "Unstable clock detected, switching default tracing clock to \"global\"\n"
10633 "If you want to keep using the local clock, then add:\n"
10634 " \"trace_clock=local\"\n"
10635 "on the kernel command line\n");
10636 tracing_set_clock(&global_trace, "global");
10637 }
10638 }
10639 #else
tracing_set_default_clock(void)10640 static inline void tracing_set_default_clock(void) { }
10641 #endif
10642
late_trace_init(void)10643 __init static int late_trace_init(void)
10644 {
10645 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10646 static_key_disable(&tracepoint_printk_key.key);
10647 tracepoint_printk = 0;
10648 }
10649
10650 tracing_set_default_clock();
10651 clear_boot_tracer();
10652 return 0;
10653 }
10654
10655 late_initcall_sync(late_trace_init);
10656