1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54
55 #include "trace.h"
56 #include "trace_output.h"
57
58 /*
59 * On boot up, the ring buffer is set to the minimum size, so that
60 * we do not waste memory on systems that are not using tracing.
61 */
62 bool ring_buffer_expanded;
63
64 #ifdef CONFIG_FTRACE_STARTUP_TEST
65 /*
66 * We need to change this state when a selftest is running.
67 * A selftest will lurk into the ring-buffer to count the
68 * entries inserted during the selftest although some concurrent
69 * insertions into the ring-buffer such as trace_printk could occurred
70 * at the same time, giving false positive or negative results.
71 */
72 static bool __read_mostly tracing_selftest_running;
73
74 /*
75 * If boot-time tracing including tracers/events via kernel cmdline
76 * is running, we do not want to run SELFTEST.
77 */
78 bool __read_mostly tracing_selftest_disabled;
79
disable_tracing_selftest(const char * reason)80 void __init disable_tracing_selftest(const char *reason)
81 {
82 if (!tracing_selftest_disabled) {
83 tracing_selftest_disabled = true;
84 pr_info("Ftrace startup test is disabled due to %s\n", reason);
85 }
86 }
87 #else
88 #define tracing_selftest_running 0
89 #define tracing_selftest_disabled 0
90 #endif
91
92 /* Pipe tracepoints to printk */
93 static struct trace_iterator *tracepoint_print_iter;
94 int tracepoint_printk;
95 static bool tracepoint_printk_stop_on_boot __initdata;
96 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
97
98 /* For tracers that don't implement custom flags */
99 static struct tracer_opt dummy_tracer_opt[] = {
100 { }
101 };
102
103 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)104 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
105 {
106 return 0;
107 }
108
109 /*
110 * To prevent the comm cache from being overwritten when no
111 * tracing is active, only save the comm when a trace event
112 * occurred.
113 */
114 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
115
116 /*
117 * Kill all tracing for good (never come back).
118 * It is initialized to 1 but will turn to zero if the initialization
119 * of the tracer is successful. But that is the only place that sets
120 * this back to zero.
121 */
122 static int tracing_disabled = 1;
123
124 cpumask_var_t __read_mostly tracing_buffer_mask;
125
126 /*
127 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
128 *
129 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
130 * is set, then ftrace_dump is called. This will output the contents
131 * of the ftrace buffers to the console. This is very useful for
132 * capturing traces that lead to crashes and outputing it to a
133 * serial console.
134 *
135 * It is default off, but you can enable it with either specifying
136 * "ftrace_dump_on_oops" in the kernel command line, or setting
137 * /proc/sys/kernel/ftrace_dump_on_oops
138 * Set 1 if you want to dump buffers of all CPUs
139 * Set 2 if you want to dump the buffer of the CPU that triggered oops
140 */
141
142 enum ftrace_dump_mode ftrace_dump_on_oops;
143
144 /* When set, tracing will stop when a WARN*() is hit */
145 int __disable_trace_on_warning;
146
147 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
148 /* Map of enums to their values, for "eval_map" file */
149 struct trace_eval_map_head {
150 struct module *mod;
151 unsigned long length;
152 };
153
154 union trace_eval_map_item;
155
156 struct trace_eval_map_tail {
157 /*
158 * "end" is first and points to NULL as it must be different
159 * than "mod" or "eval_string"
160 */
161 union trace_eval_map_item *next;
162 const char *end; /* points to NULL */
163 };
164
165 static DEFINE_MUTEX(trace_eval_mutex);
166
167 /*
168 * The trace_eval_maps are saved in an array with two extra elements,
169 * one at the beginning, and one at the end. The beginning item contains
170 * the count of the saved maps (head.length), and the module they
171 * belong to if not built in (head.mod). The ending item contains a
172 * pointer to the next array of saved eval_map items.
173 */
174 union trace_eval_map_item {
175 struct trace_eval_map map;
176 struct trace_eval_map_head head;
177 struct trace_eval_map_tail tail;
178 };
179
180 static union trace_eval_map_item *trace_eval_maps;
181 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
182
183 int tracing_set_tracer(struct trace_array *tr, const char *buf);
184 static void ftrace_trace_userstack(struct trace_array *tr,
185 struct trace_buffer *buffer,
186 unsigned int trace_ctx);
187
188 #define MAX_TRACER_SIZE 100
189 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
190 static char *default_bootup_tracer;
191
192 static bool allocate_snapshot;
193 static bool snapshot_at_boot;
194
195 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
196 static int boot_instance_index;
197
198 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
199 static int boot_snapshot_index;
200
set_cmdline_ftrace(char * str)201 static int __init set_cmdline_ftrace(char *str)
202 {
203 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
204 default_bootup_tracer = bootup_tracer_buf;
205 /* We are using ftrace early, expand it */
206 ring_buffer_expanded = true;
207 return 1;
208 }
209 __setup("ftrace=", set_cmdline_ftrace);
210
set_ftrace_dump_on_oops(char * str)211 static int __init set_ftrace_dump_on_oops(char *str)
212 {
213 if (*str++ != '=' || !*str || !strcmp("1", str)) {
214 ftrace_dump_on_oops = DUMP_ALL;
215 return 1;
216 }
217
218 if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
219 ftrace_dump_on_oops = DUMP_ORIG;
220 return 1;
221 }
222
223 return 0;
224 }
225 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
226
stop_trace_on_warning(char * str)227 static int __init stop_trace_on_warning(char *str)
228 {
229 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
230 __disable_trace_on_warning = 1;
231 return 1;
232 }
233 __setup("traceoff_on_warning", stop_trace_on_warning);
234
boot_alloc_snapshot(char * str)235 static int __init boot_alloc_snapshot(char *str)
236 {
237 char *slot = boot_snapshot_info + boot_snapshot_index;
238 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
239 int ret;
240
241 if (str[0] == '=') {
242 str++;
243 if (strlen(str) >= left)
244 return -1;
245
246 ret = snprintf(slot, left, "%s\t", str);
247 boot_snapshot_index += ret;
248 } else {
249 allocate_snapshot = true;
250 /* We also need the main ring buffer expanded */
251 ring_buffer_expanded = true;
252 }
253 return 1;
254 }
255 __setup("alloc_snapshot", boot_alloc_snapshot);
256
257
boot_snapshot(char * str)258 static int __init boot_snapshot(char *str)
259 {
260 snapshot_at_boot = true;
261 boot_alloc_snapshot(str);
262 return 1;
263 }
264 __setup("ftrace_boot_snapshot", boot_snapshot);
265
266
boot_instance(char * str)267 static int __init boot_instance(char *str)
268 {
269 char *slot = boot_instance_info + boot_instance_index;
270 int left = sizeof(boot_instance_info) - boot_instance_index;
271 int ret;
272
273 if (strlen(str) >= left)
274 return -1;
275
276 ret = snprintf(slot, left, "%s\t", str);
277 boot_instance_index += ret;
278
279 return 1;
280 }
281 __setup("trace_instance=", boot_instance);
282
283
284 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
285
set_trace_boot_options(char * str)286 static int __init set_trace_boot_options(char *str)
287 {
288 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
289 return 1;
290 }
291 __setup("trace_options=", set_trace_boot_options);
292
293 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
294 static char *trace_boot_clock __initdata;
295
set_trace_boot_clock(char * str)296 static int __init set_trace_boot_clock(char *str)
297 {
298 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
299 trace_boot_clock = trace_boot_clock_buf;
300 return 1;
301 }
302 __setup("trace_clock=", set_trace_boot_clock);
303
set_tracepoint_printk(char * str)304 static int __init set_tracepoint_printk(char *str)
305 {
306 /* Ignore the "tp_printk_stop_on_boot" param */
307 if (*str == '_')
308 return 0;
309
310 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
311 tracepoint_printk = 1;
312 return 1;
313 }
314 __setup("tp_printk", set_tracepoint_printk);
315
set_tracepoint_printk_stop(char * str)316 static int __init set_tracepoint_printk_stop(char *str)
317 {
318 tracepoint_printk_stop_on_boot = true;
319 return 1;
320 }
321 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
322
ns2usecs(u64 nsec)323 unsigned long long ns2usecs(u64 nsec)
324 {
325 nsec += 500;
326 do_div(nsec, 1000);
327 return nsec;
328 }
329
330 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)331 trace_process_export(struct trace_export *export,
332 struct ring_buffer_event *event, int flag)
333 {
334 struct trace_entry *entry;
335 unsigned int size = 0;
336
337 if (export->flags & flag) {
338 entry = ring_buffer_event_data(event);
339 size = ring_buffer_event_length(event);
340 export->write(export, entry, size);
341 }
342 }
343
344 static DEFINE_MUTEX(ftrace_export_lock);
345
346 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
347
348 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
350 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
351
ftrace_exports_enable(struct trace_export * export)352 static inline void ftrace_exports_enable(struct trace_export *export)
353 {
354 if (export->flags & TRACE_EXPORT_FUNCTION)
355 static_branch_inc(&trace_function_exports_enabled);
356
357 if (export->flags & TRACE_EXPORT_EVENT)
358 static_branch_inc(&trace_event_exports_enabled);
359
360 if (export->flags & TRACE_EXPORT_MARKER)
361 static_branch_inc(&trace_marker_exports_enabled);
362 }
363
ftrace_exports_disable(struct trace_export * export)364 static inline void ftrace_exports_disable(struct trace_export *export)
365 {
366 if (export->flags & TRACE_EXPORT_FUNCTION)
367 static_branch_dec(&trace_function_exports_enabled);
368
369 if (export->flags & TRACE_EXPORT_EVENT)
370 static_branch_dec(&trace_event_exports_enabled);
371
372 if (export->flags & TRACE_EXPORT_MARKER)
373 static_branch_dec(&trace_marker_exports_enabled);
374 }
375
ftrace_exports(struct ring_buffer_event * event,int flag)376 static void ftrace_exports(struct ring_buffer_event *event, int flag)
377 {
378 struct trace_export *export;
379
380 preempt_disable_notrace();
381
382 export = rcu_dereference_raw_check(ftrace_exports_list);
383 while (export) {
384 trace_process_export(export, event, flag);
385 export = rcu_dereference_raw_check(export->next);
386 }
387
388 preempt_enable_notrace();
389 }
390
391 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)392 add_trace_export(struct trace_export **list, struct trace_export *export)
393 {
394 rcu_assign_pointer(export->next, *list);
395 /*
396 * We are entering export into the list but another
397 * CPU might be walking that list. We need to make sure
398 * the export->next pointer is valid before another CPU sees
399 * the export pointer included into the list.
400 */
401 rcu_assign_pointer(*list, export);
402 }
403
404 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)405 rm_trace_export(struct trace_export **list, struct trace_export *export)
406 {
407 struct trace_export **p;
408
409 for (p = list; *p != NULL; p = &(*p)->next)
410 if (*p == export)
411 break;
412
413 if (*p != export)
414 return -1;
415
416 rcu_assign_pointer(*p, (*p)->next);
417
418 return 0;
419 }
420
421 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)422 add_ftrace_export(struct trace_export **list, struct trace_export *export)
423 {
424 ftrace_exports_enable(export);
425
426 add_trace_export(list, export);
427 }
428
429 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)430 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
431 {
432 int ret;
433
434 ret = rm_trace_export(list, export);
435 ftrace_exports_disable(export);
436
437 return ret;
438 }
439
register_ftrace_export(struct trace_export * export)440 int register_ftrace_export(struct trace_export *export)
441 {
442 if (WARN_ON_ONCE(!export->write))
443 return -1;
444
445 mutex_lock(&ftrace_export_lock);
446
447 add_ftrace_export(&ftrace_exports_list, export);
448
449 mutex_unlock(&ftrace_export_lock);
450
451 return 0;
452 }
453 EXPORT_SYMBOL_GPL(register_ftrace_export);
454
unregister_ftrace_export(struct trace_export * export)455 int unregister_ftrace_export(struct trace_export *export)
456 {
457 int ret;
458
459 mutex_lock(&ftrace_export_lock);
460
461 ret = rm_ftrace_export(&ftrace_exports_list, export);
462
463 mutex_unlock(&ftrace_export_lock);
464
465 return ret;
466 }
467 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
468
469 /* trace_flags holds trace_options default values */
470 #define TRACE_DEFAULT_FLAGS \
471 (FUNCTION_DEFAULT_FLAGS | \
472 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
473 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
474 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
475 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
476 TRACE_ITER_HASH_PTR)
477
478 /* trace_options that are only supported by global_trace */
479 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
480 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
481
482 /* trace_flags that are default zero for instances */
483 #define ZEROED_TRACE_FLAGS \
484 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
485
486 /*
487 * The global_trace is the descriptor that holds the top-level tracing
488 * buffers for the live tracing.
489 */
490 static struct trace_array global_trace = {
491 .trace_flags = TRACE_DEFAULT_FLAGS,
492 };
493
494 LIST_HEAD(ftrace_trace_arrays);
495
trace_array_get(struct trace_array * this_tr)496 int trace_array_get(struct trace_array *this_tr)
497 {
498 struct trace_array *tr;
499 int ret = -ENODEV;
500
501 mutex_lock(&trace_types_lock);
502 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503 if (tr == this_tr) {
504 tr->ref++;
505 ret = 0;
506 break;
507 }
508 }
509 mutex_unlock(&trace_types_lock);
510
511 return ret;
512 }
513
__trace_array_put(struct trace_array * this_tr)514 static void __trace_array_put(struct trace_array *this_tr)
515 {
516 WARN_ON(!this_tr->ref);
517 this_tr->ref--;
518 }
519
520 /**
521 * trace_array_put - Decrement the reference counter for this trace array.
522 * @this_tr : pointer to the trace array
523 *
524 * NOTE: Use this when we no longer need the trace array returned by
525 * trace_array_get_by_name(). This ensures the trace array can be later
526 * destroyed.
527 *
528 */
trace_array_put(struct trace_array * this_tr)529 void trace_array_put(struct trace_array *this_tr)
530 {
531 if (!this_tr)
532 return;
533
534 mutex_lock(&trace_types_lock);
535 __trace_array_put(this_tr);
536 mutex_unlock(&trace_types_lock);
537 }
538 EXPORT_SYMBOL_GPL(trace_array_put);
539
tracing_check_open_get_tr(struct trace_array * tr)540 int tracing_check_open_get_tr(struct trace_array *tr)
541 {
542 int ret;
543
544 ret = security_locked_down(LOCKDOWN_TRACEFS);
545 if (ret)
546 return ret;
547
548 if (tracing_disabled)
549 return -ENODEV;
550
551 if (tr && trace_array_get(tr) < 0)
552 return -ENODEV;
553
554 return 0;
555 }
556
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)557 int call_filter_check_discard(struct trace_event_call *call, void *rec,
558 struct trace_buffer *buffer,
559 struct ring_buffer_event *event)
560 {
561 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562 !filter_match_preds(call->filter, rec)) {
563 __trace_event_discard_commit(buffer, event);
564 return 1;
565 }
566
567 return 0;
568 }
569
570 /**
571 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572 * @filtered_pids: The list of pids to check
573 * @search_pid: The PID to find in @filtered_pids
574 *
575 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576 */
577 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 {
580 return trace_pid_list_is_set(filtered_pids, search_pid);
581 }
582
583 /**
584 * trace_ignore_this_task - should a task be ignored for tracing
585 * @filtered_pids: The list of pids to check
586 * @filtered_no_pids: The list of pids not to be traced
587 * @task: The task that should be ignored if not filtered
588 *
589 * Checks if @task should be traced or not from @filtered_pids.
590 * Returns true if @task should *NOT* be traced.
591 * Returns false if @task should be traced.
592 */
593 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)594 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595 struct trace_pid_list *filtered_no_pids,
596 struct task_struct *task)
597 {
598 /*
599 * If filtered_no_pids is not empty, and the task's pid is listed
600 * in filtered_no_pids, then return true.
601 * Otherwise, if filtered_pids is empty, that means we can
602 * trace all tasks. If it has content, then only trace pids
603 * within filtered_pids.
604 */
605
606 return (filtered_pids &&
607 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
608 (filtered_no_pids &&
609 trace_find_filtered_pid(filtered_no_pids, task->pid));
610 }
611
612 /**
613 * trace_filter_add_remove_task - Add or remove a task from a pid_list
614 * @pid_list: The list to modify
615 * @self: The current task for fork or NULL for exit
616 * @task: The task to add or remove
617 *
618 * If adding a task, if @self is defined, the task is only added if @self
619 * is also included in @pid_list. This happens on fork and tasks should
620 * only be added when the parent is listed. If @self is NULL, then the
621 * @task pid will be removed from the list, which would happen on exit
622 * of a task.
623 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625 struct task_struct *self,
626 struct task_struct *task)
627 {
628 if (!pid_list)
629 return;
630
631 /* For forks, we only add if the forking task is listed */
632 if (self) {
633 if (!trace_find_filtered_pid(pid_list, self->pid))
634 return;
635 }
636
637 /* "self" is set for forks, and NULL for exits */
638 if (self)
639 trace_pid_list_set(pid_list, task->pid);
640 else
641 trace_pid_list_clear(pid_list, task->pid);
642 }
643
644 /**
645 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646 * @pid_list: The pid list to show
647 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648 * @pos: The position of the file
649 *
650 * This is used by the seq_file "next" operation to iterate the pids
651 * listed in a trace_pid_list structure.
652 *
653 * Returns the pid+1 as we want to display pid of zero, but NULL would
654 * stop the iteration.
655 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 {
658 long pid = (unsigned long)v;
659 unsigned int next;
660
661 (*pos)++;
662
663 /* pid already is +1 of the actual previous bit */
664 if (trace_pid_list_next(pid_list, pid, &next) < 0)
665 return NULL;
666
667 pid = next;
668
669 /* Return pid + 1 to allow zero to be represented */
670 return (void *)(pid + 1);
671 }
672
673 /**
674 * trace_pid_start - Used for seq_file to start reading pid lists
675 * @pid_list: The pid list to show
676 * @pos: The position of the file
677 *
678 * This is used by seq_file "start" operation to start the iteration
679 * of listing pids.
680 *
681 * Returns the pid+1 as we want to display pid of zero, but NULL would
682 * stop the iteration.
683 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685 {
686 unsigned long pid;
687 unsigned int first;
688 loff_t l = 0;
689
690 if (trace_pid_list_first(pid_list, &first) < 0)
691 return NULL;
692
693 pid = first;
694
695 /* Return pid + 1 so that zero can be the exit value */
696 for (pid++; pid && l < *pos;
697 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698 ;
699 return (void *)pid;
700 }
701
702 /**
703 * trace_pid_show - show the current pid in seq_file processing
704 * @m: The seq_file structure to write into
705 * @v: A void pointer of the pid (+1) value to display
706 *
707 * Can be directly used by seq_file operations to display the current
708 * pid value.
709 */
trace_pid_show(struct seq_file * m,void * v)710 int trace_pid_show(struct seq_file *m, void *v)
711 {
712 unsigned long pid = (unsigned long)v - 1;
713
714 seq_printf(m, "%lu\n", pid);
715 return 0;
716 }
717
718 /* 128 should be much more than enough */
719 #define PID_BUF_SIZE 127
720
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)721 int trace_pid_write(struct trace_pid_list *filtered_pids,
722 struct trace_pid_list **new_pid_list,
723 const char __user *ubuf, size_t cnt)
724 {
725 struct trace_pid_list *pid_list;
726 struct trace_parser parser;
727 unsigned long val;
728 int nr_pids = 0;
729 ssize_t read = 0;
730 ssize_t ret;
731 loff_t pos;
732 pid_t pid;
733
734 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735 return -ENOMEM;
736
737 /*
738 * Always recreate a new array. The write is an all or nothing
739 * operation. Always create a new array when adding new pids by
740 * the user. If the operation fails, then the current list is
741 * not modified.
742 */
743 pid_list = trace_pid_list_alloc();
744 if (!pid_list) {
745 trace_parser_put(&parser);
746 return -ENOMEM;
747 }
748
749 if (filtered_pids) {
750 /* copy the current bits to the new max */
751 ret = trace_pid_list_first(filtered_pids, &pid);
752 while (!ret) {
753 trace_pid_list_set(pid_list, pid);
754 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755 nr_pids++;
756 }
757 }
758
759 ret = 0;
760 while (cnt > 0) {
761
762 pos = 0;
763
764 ret = trace_get_user(&parser, ubuf, cnt, &pos);
765 if (ret < 0)
766 break;
767
768 read += ret;
769 ubuf += ret;
770 cnt -= ret;
771
772 if (!trace_parser_loaded(&parser))
773 break;
774
775 ret = -EINVAL;
776 if (kstrtoul(parser.buffer, 0, &val))
777 break;
778
779 pid = (pid_t)val;
780
781 if (trace_pid_list_set(pid_list, pid) < 0) {
782 ret = -1;
783 break;
784 }
785 nr_pids++;
786
787 trace_parser_clear(&parser);
788 ret = 0;
789 }
790 trace_parser_put(&parser);
791
792 if (ret < 0) {
793 trace_pid_list_free(pid_list);
794 return ret;
795 }
796
797 if (!nr_pids) {
798 /* Cleared the list of pids */
799 trace_pid_list_free(pid_list);
800 pid_list = NULL;
801 }
802
803 *new_pid_list = pid_list;
804
805 return read;
806 }
807
buffer_ftrace_now(struct array_buffer * buf,int cpu)808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809 {
810 u64 ts;
811
812 /* Early boot up does not have a buffer yet */
813 if (!buf->buffer)
814 return trace_clock_local();
815
816 ts = ring_buffer_time_stamp(buf->buffer);
817 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818
819 return ts;
820 }
821
ftrace_now(int cpu)822 u64 ftrace_now(int cpu)
823 {
824 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825 }
826
827 /**
828 * tracing_is_enabled - Show if global_trace has been enabled
829 *
830 * Shows if the global trace has been enabled or not. It uses the
831 * mirror flag "buffer_disabled" to be used in fast paths such as for
832 * the irqsoff tracer. But it may be inaccurate due to races. If you
833 * need to know the accurate state, use tracing_is_on() which is a little
834 * slower, but accurate.
835 */
tracing_is_enabled(void)836 int tracing_is_enabled(void)
837 {
838 /*
839 * For quick access (irqsoff uses this in fast path), just
840 * return the mirror variable of the state of the ring buffer.
841 * It's a little racy, but we don't really care.
842 */
843 smp_rmb();
844 return !global_trace.buffer_disabled;
845 }
846
847 /*
848 * trace_buf_size is the size in bytes that is allocated
849 * for a buffer. Note, the number of bytes is always rounded
850 * to page size.
851 *
852 * This number is purposely set to a low number of 16384.
853 * If the dump on oops happens, it will be much appreciated
854 * to not have to wait for all that output. Anyway this can be
855 * boot time and run time configurable.
856 */
857 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
858
859 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860
861 /* trace_types holds a link list of available tracers. */
862 static struct tracer *trace_types __read_mostly;
863
864 /*
865 * trace_types_lock is used to protect the trace_types list.
866 */
867 DEFINE_MUTEX(trace_types_lock);
868
869 /*
870 * serialize the access of the ring buffer
871 *
872 * ring buffer serializes readers, but it is low level protection.
873 * The validity of the events (which returns by ring_buffer_peek() ..etc)
874 * are not protected by ring buffer.
875 *
876 * The content of events may become garbage if we allow other process consumes
877 * these events concurrently:
878 * A) the page of the consumed events may become a normal page
879 * (not reader page) in ring buffer, and this page will be rewritten
880 * by events producer.
881 * B) The page of the consumed events may become a page for splice_read,
882 * and this page will be returned to system.
883 *
884 * These primitives allow multi process access to different cpu ring buffer
885 * concurrently.
886 *
887 * These primitives don't distinguish read-only and read-consume access.
888 * Multi read-only access are also serialized.
889 */
890
891 #ifdef CONFIG_SMP
892 static DECLARE_RWSEM(all_cpu_access_lock);
893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894
trace_access_lock(int cpu)895 static inline void trace_access_lock(int cpu)
896 {
897 if (cpu == RING_BUFFER_ALL_CPUS) {
898 /* gain it for accessing the whole ring buffer. */
899 down_write(&all_cpu_access_lock);
900 } else {
901 /* gain it for accessing a cpu ring buffer. */
902
903 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904 down_read(&all_cpu_access_lock);
905
906 /* Secondly block other access to this @cpu ring buffer. */
907 mutex_lock(&per_cpu(cpu_access_lock, cpu));
908 }
909 }
910
trace_access_unlock(int cpu)911 static inline void trace_access_unlock(int cpu)
912 {
913 if (cpu == RING_BUFFER_ALL_CPUS) {
914 up_write(&all_cpu_access_lock);
915 } else {
916 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917 up_read(&all_cpu_access_lock);
918 }
919 }
920
trace_access_lock_init(void)921 static inline void trace_access_lock_init(void)
922 {
923 int cpu;
924
925 for_each_possible_cpu(cpu)
926 mutex_init(&per_cpu(cpu_access_lock, cpu));
927 }
928
929 #else
930
931 static DEFINE_MUTEX(access_lock);
932
trace_access_lock(int cpu)933 static inline void trace_access_lock(int cpu)
934 {
935 (void)cpu;
936 mutex_lock(&access_lock);
937 }
938
trace_access_unlock(int cpu)939 static inline void trace_access_unlock(int cpu)
940 {
941 (void)cpu;
942 mutex_unlock(&access_lock);
943 }
944
trace_access_lock_init(void)945 static inline void trace_access_lock_init(void)
946 {
947 }
948
949 #endif
950
951 #ifdef CONFIG_STACKTRACE
952 static void __ftrace_trace_stack(struct trace_buffer *buffer,
953 unsigned int trace_ctx,
954 int skip, struct pt_regs *regs);
955 static inline void ftrace_trace_stack(struct trace_array *tr,
956 struct trace_buffer *buffer,
957 unsigned int trace_ctx,
958 int skip, struct pt_regs *regs);
959
960 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962 unsigned int trace_ctx,
963 int skip, struct pt_regs *regs)
964 {
965 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)966 static inline void ftrace_trace_stack(struct trace_array *tr,
967 struct trace_buffer *buffer,
968 unsigned long trace_ctx,
969 int skip, struct pt_regs *regs)
970 {
971 }
972
973 #endif
974
975 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)976 trace_event_setup(struct ring_buffer_event *event,
977 int type, unsigned int trace_ctx)
978 {
979 struct trace_entry *ent = ring_buffer_event_data(event);
980
981 tracing_generic_entry_update(ent, type, trace_ctx);
982 }
983
984 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)985 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986 int type,
987 unsigned long len,
988 unsigned int trace_ctx)
989 {
990 struct ring_buffer_event *event;
991
992 event = ring_buffer_lock_reserve(buffer, len);
993 if (event != NULL)
994 trace_event_setup(event, type, trace_ctx);
995
996 return event;
997 }
998
tracer_tracing_on(struct trace_array * tr)999 void tracer_tracing_on(struct trace_array *tr)
1000 {
1001 if (tr->array_buffer.buffer)
1002 ring_buffer_record_on(tr->array_buffer.buffer);
1003 /*
1004 * This flag is looked at when buffers haven't been allocated
1005 * yet, or by some tracers (like irqsoff), that just want to
1006 * know if the ring buffer has been disabled, but it can handle
1007 * races of where it gets disabled but we still do a record.
1008 * As the check is in the fast path of the tracers, it is more
1009 * important to be fast than accurate.
1010 */
1011 tr->buffer_disabled = 0;
1012 /* Make the flag seen by readers */
1013 smp_wmb();
1014 }
1015
1016 /**
1017 * tracing_on - enable tracing buffers
1018 *
1019 * This function enables tracing buffers that may have been
1020 * disabled with tracing_off.
1021 */
tracing_on(void)1022 void tracing_on(void)
1023 {
1024 tracer_tracing_on(&global_trace);
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_on);
1027
1028
1029 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 {
1032 __this_cpu_write(trace_taskinfo_save, true);
1033
1034 /* If this is the temp buffer, we need to commit fully */
1035 if (this_cpu_read(trace_buffered_event) == event) {
1036 /* Length is in event->array[0] */
1037 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038 /* Release the temp buffer */
1039 this_cpu_dec(trace_buffered_event_cnt);
1040 /* ring_buffer_unlock_commit() enables preemption */
1041 preempt_enable_notrace();
1042 } else
1043 ring_buffer_unlock_commit(buffer);
1044 }
1045
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047 const char *str, int size)
1048 {
1049 struct ring_buffer_event *event;
1050 struct trace_buffer *buffer;
1051 struct print_entry *entry;
1052 unsigned int trace_ctx;
1053 int alloc;
1054
1055 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056 return 0;
1057
1058 if (unlikely(tracing_selftest_running && tr == &global_trace))
1059 return 0;
1060
1061 if (unlikely(tracing_disabled))
1062 return 0;
1063
1064 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065
1066 trace_ctx = tracing_gen_ctx();
1067 buffer = tr->array_buffer.buffer;
1068 ring_buffer_nest_start(buffer);
1069 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070 trace_ctx);
1071 if (!event) {
1072 size = 0;
1073 goto out;
1074 }
1075
1076 entry = ring_buffer_event_data(event);
1077 entry->ip = ip;
1078
1079 memcpy(&entry->buf, str, size);
1080
1081 /* Add a newline if necessary */
1082 if (entry->buf[size - 1] != '\n') {
1083 entry->buf[size] = '\n';
1084 entry->buf[size + 1] = '\0';
1085 } else
1086 entry->buf[size] = '\0';
1087
1088 __buffer_unlock_commit(buffer, event);
1089 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090 out:
1091 ring_buffer_nest_end(buffer);
1092 return size;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_array_puts);
1095
1096 /**
1097 * __trace_puts - write a constant string into the trace buffer.
1098 * @ip: The address of the caller
1099 * @str: The constant string to write
1100 * @size: The size of the string.
1101 */
__trace_puts(unsigned long ip,const char * str,int size)1102 int __trace_puts(unsigned long ip, const char *str, int size)
1103 {
1104 return __trace_array_puts(&global_trace, ip, str, size);
1105 }
1106 EXPORT_SYMBOL_GPL(__trace_puts);
1107
1108 /**
1109 * __trace_bputs - write the pointer to a constant string into trace buffer
1110 * @ip: The address of the caller
1111 * @str: The constant string to write to the buffer to
1112 */
__trace_bputs(unsigned long ip,const char * str)1113 int __trace_bputs(unsigned long ip, const char *str)
1114 {
1115 struct ring_buffer_event *event;
1116 struct trace_buffer *buffer;
1117 struct bputs_entry *entry;
1118 unsigned int trace_ctx;
1119 int size = sizeof(struct bputs_entry);
1120 int ret = 0;
1121
1122 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123 return 0;
1124
1125 if (unlikely(tracing_selftest_running || tracing_disabled))
1126 return 0;
1127
1128 trace_ctx = tracing_gen_ctx();
1129 buffer = global_trace.array_buffer.buffer;
1130
1131 ring_buffer_nest_start(buffer);
1132 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133 trace_ctx);
1134 if (!event)
1135 goto out;
1136
1137 entry = ring_buffer_event_data(event);
1138 entry->ip = ip;
1139 entry->str = str;
1140
1141 __buffer_unlock_commit(buffer, event);
1142 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143
1144 ret = 1;
1145 out:
1146 ring_buffer_nest_end(buffer);
1147 return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_bputs);
1150
1151 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1152 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153 void *cond_data)
1154 {
1155 struct tracer *tracer = tr->current_trace;
1156 unsigned long flags;
1157
1158 if (in_nmi()) {
1159 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1161 return;
1162 }
1163
1164 if (!tr->allocated_snapshot) {
1165 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166 trace_array_puts(tr, "*** stopping trace here! ***\n");
1167 tracer_tracing_off(tr);
1168 return;
1169 }
1170
1171 /* Note, snapshot can not be used when the tracer uses it */
1172 if (tracer->use_max_tr) {
1173 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175 return;
1176 }
1177
1178 local_irq_save(flags);
1179 update_max_tr(tr, current, smp_processor_id(), cond_data);
1180 local_irq_restore(flags);
1181 }
1182
tracing_snapshot_instance(struct trace_array * tr)1183 void tracing_snapshot_instance(struct trace_array *tr)
1184 {
1185 tracing_snapshot_instance_cond(tr, NULL);
1186 }
1187
1188 /**
1189 * tracing_snapshot - take a snapshot of the current buffer.
1190 *
1191 * This causes a swap between the snapshot buffer and the current live
1192 * tracing buffer. You can use this to take snapshots of the live
1193 * trace when some condition is triggered, but continue to trace.
1194 *
1195 * Note, make sure to allocate the snapshot with either
1196 * a tracing_snapshot_alloc(), or by doing it manually
1197 * with: echo 1 > /sys/kernel/tracing/snapshot
1198 *
1199 * If the snapshot buffer is not allocated, it will stop tracing.
1200 * Basically making a permanent snapshot.
1201 */
tracing_snapshot(void)1202 void tracing_snapshot(void)
1203 {
1204 struct trace_array *tr = &global_trace;
1205
1206 tracing_snapshot_instance(tr);
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot);
1209
1210 /**
1211 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212 * @tr: The tracing instance to snapshot
1213 * @cond_data: The data to be tested conditionally, and possibly saved
1214 *
1215 * This is the same as tracing_snapshot() except that the snapshot is
1216 * conditional - the snapshot will only happen if the
1217 * cond_snapshot.update() implementation receiving the cond_data
1218 * returns true, which means that the trace array's cond_snapshot
1219 * update() operation used the cond_data to determine whether the
1220 * snapshot should be taken, and if it was, presumably saved it along
1221 * with the snapshot.
1222 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224 {
1225 tracing_snapshot_instance_cond(tr, cond_data);
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228
1229 /**
1230 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231 * @tr: The tracing instance
1232 *
1233 * When the user enables a conditional snapshot using
1234 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235 * with the snapshot. This accessor is used to retrieve it.
1236 *
1237 * Should not be called from cond_snapshot.update(), since it takes
1238 * the tr->max_lock lock, which the code calling
1239 * cond_snapshot.update() has already done.
1240 *
1241 * Returns the cond_data associated with the trace array's snapshot.
1242 */
tracing_cond_snapshot_data(struct trace_array * tr)1243 void *tracing_cond_snapshot_data(struct trace_array *tr)
1244 {
1245 void *cond_data = NULL;
1246
1247 local_irq_disable();
1248 arch_spin_lock(&tr->max_lock);
1249
1250 if (tr->cond_snapshot)
1251 cond_data = tr->cond_snapshot->cond_data;
1252
1253 arch_spin_unlock(&tr->max_lock);
1254 local_irq_enable();
1255
1256 return cond_data;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259
1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261 struct array_buffer *size_buf, int cpu_id);
1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263
tracing_alloc_snapshot_instance(struct trace_array * tr)1264 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265 {
1266 int ret;
1267
1268 if (!tr->allocated_snapshot) {
1269
1270 /* allocate spare buffer */
1271 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1272 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1273 if (ret < 0)
1274 return ret;
1275
1276 tr->allocated_snapshot = true;
1277 }
1278
1279 return 0;
1280 }
1281
free_snapshot(struct trace_array * tr)1282 static void free_snapshot(struct trace_array *tr)
1283 {
1284 /*
1285 * We don't free the ring buffer. instead, resize it because
1286 * The max_tr ring buffer has some state (e.g. ring->clock) and
1287 * we want preserve it.
1288 */
1289 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1290 set_buffer_entries(&tr->max_buffer, 1);
1291 tracing_reset_online_cpus(&tr->max_buffer);
1292 tr->allocated_snapshot = false;
1293 }
1294
1295 /**
1296 * tracing_alloc_snapshot - allocate snapshot buffer.
1297 *
1298 * This only allocates the snapshot buffer if it isn't already
1299 * allocated - it doesn't also take a snapshot.
1300 *
1301 * This is meant to be used in cases where the snapshot buffer needs
1302 * to be set up for events that can't sleep but need to be able to
1303 * trigger a snapshot.
1304 */
tracing_alloc_snapshot(void)1305 int tracing_alloc_snapshot(void)
1306 {
1307 struct trace_array *tr = &global_trace;
1308 int ret;
1309
1310 ret = tracing_alloc_snapshot_instance(tr);
1311 WARN_ON(ret < 0);
1312
1313 return ret;
1314 }
1315 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1316
1317 /**
1318 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1319 *
1320 * This is similar to tracing_snapshot(), but it will allocate the
1321 * snapshot buffer if it isn't already allocated. Use this only
1322 * where it is safe to sleep, as the allocation may sleep.
1323 *
1324 * This causes a swap between the snapshot buffer and the current live
1325 * tracing buffer. You can use this to take snapshots of the live
1326 * trace when some condition is triggered, but continue to trace.
1327 */
tracing_snapshot_alloc(void)1328 void tracing_snapshot_alloc(void)
1329 {
1330 int ret;
1331
1332 ret = tracing_alloc_snapshot();
1333 if (ret < 0)
1334 return;
1335
1336 tracing_snapshot();
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1339
1340 /**
1341 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1342 * @tr: The tracing instance
1343 * @cond_data: User data to associate with the snapshot
1344 * @update: Implementation of the cond_snapshot update function
1345 *
1346 * Check whether the conditional snapshot for the given instance has
1347 * already been enabled, or if the current tracer is already using a
1348 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1349 * save the cond_data and update function inside.
1350 *
1351 * Returns 0 if successful, error otherwise.
1352 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1353 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1354 cond_update_fn_t update)
1355 {
1356 struct cond_snapshot *cond_snapshot;
1357 int ret = 0;
1358
1359 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1360 if (!cond_snapshot)
1361 return -ENOMEM;
1362
1363 cond_snapshot->cond_data = cond_data;
1364 cond_snapshot->update = update;
1365
1366 mutex_lock(&trace_types_lock);
1367
1368 ret = tracing_alloc_snapshot_instance(tr);
1369 if (ret)
1370 goto fail_unlock;
1371
1372 if (tr->current_trace->use_max_tr) {
1373 ret = -EBUSY;
1374 goto fail_unlock;
1375 }
1376
1377 /*
1378 * The cond_snapshot can only change to NULL without the
1379 * trace_types_lock. We don't care if we race with it going
1380 * to NULL, but we want to make sure that it's not set to
1381 * something other than NULL when we get here, which we can
1382 * do safely with only holding the trace_types_lock and not
1383 * having to take the max_lock.
1384 */
1385 if (tr->cond_snapshot) {
1386 ret = -EBUSY;
1387 goto fail_unlock;
1388 }
1389
1390 local_irq_disable();
1391 arch_spin_lock(&tr->max_lock);
1392 tr->cond_snapshot = cond_snapshot;
1393 arch_spin_unlock(&tr->max_lock);
1394 local_irq_enable();
1395
1396 mutex_unlock(&trace_types_lock);
1397
1398 return ret;
1399
1400 fail_unlock:
1401 mutex_unlock(&trace_types_lock);
1402 kfree(cond_snapshot);
1403 return ret;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1406
1407 /**
1408 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1409 * @tr: The tracing instance
1410 *
1411 * Check whether the conditional snapshot for the given instance is
1412 * enabled; if so, free the cond_snapshot associated with it,
1413 * otherwise return -EINVAL.
1414 *
1415 * Returns 0 if successful, error otherwise.
1416 */
tracing_snapshot_cond_disable(struct trace_array * tr)1417 int tracing_snapshot_cond_disable(struct trace_array *tr)
1418 {
1419 int ret = 0;
1420
1421 local_irq_disable();
1422 arch_spin_lock(&tr->max_lock);
1423
1424 if (!tr->cond_snapshot)
1425 ret = -EINVAL;
1426 else {
1427 kfree(tr->cond_snapshot);
1428 tr->cond_snapshot = NULL;
1429 }
1430
1431 arch_spin_unlock(&tr->max_lock);
1432 local_irq_enable();
1433
1434 return ret;
1435 }
1436 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1437 #else
tracing_snapshot(void)1438 void tracing_snapshot(void)
1439 {
1440 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1441 }
1442 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1443 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1444 {
1445 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1448 int tracing_alloc_snapshot(void)
1449 {
1450 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1451 return -ENODEV;
1452 }
1453 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1454 void tracing_snapshot_alloc(void)
1455 {
1456 /* Give warning */
1457 tracing_snapshot();
1458 }
1459 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1460 void *tracing_cond_snapshot_data(struct trace_array *tr)
1461 {
1462 return NULL;
1463 }
1464 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1465 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1466 {
1467 return -ENODEV;
1468 }
1469 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1470 int tracing_snapshot_cond_disable(struct trace_array *tr)
1471 {
1472 return false;
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1475 #define free_snapshot(tr) do { } while (0)
1476 #endif /* CONFIG_TRACER_SNAPSHOT */
1477
tracer_tracing_off(struct trace_array * tr)1478 void tracer_tracing_off(struct trace_array *tr)
1479 {
1480 if (tr->array_buffer.buffer)
1481 ring_buffer_record_off(tr->array_buffer.buffer);
1482 /*
1483 * This flag is looked at when buffers haven't been allocated
1484 * yet, or by some tracers (like irqsoff), that just want to
1485 * know if the ring buffer has been disabled, but it can handle
1486 * races of where it gets disabled but we still do a record.
1487 * As the check is in the fast path of the tracers, it is more
1488 * important to be fast than accurate.
1489 */
1490 tr->buffer_disabled = 1;
1491 /* Make the flag seen by readers */
1492 smp_wmb();
1493 }
1494
1495 /**
1496 * tracing_off - turn off tracing buffers
1497 *
1498 * This function stops the tracing buffers from recording data.
1499 * It does not disable any overhead the tracers themselves may
1500 * be causing. This function simply causes all recording to
1501 * the ring buffers to fail.
1502 */
tracing_off(void)1503 void tracing_off(void)
1504 {
1505 tracer_tracing_off(&global_trace);
1506 }
1507 EXPORT_SYMBOL_GPL(tracing_off);
1508
disable_trace_on_warning(void)1509 void disable_trace_on_warning(void)
1510 {
1511 if (__disable_trace_on_warning) {
1512 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1513 "Disabling tracing due to warning\n");
1514 tracing_off();
1515 }
1516 }
1517
1518 /**
1519 * tracer_tracing_is_on - show real state of ring buffer enabled
1520 * @tr : the trace array to know if ring buffer is enabled
1521 *
1522 * Shows real state of the ring buffer if it is enabled or not.
1523 */
tracer_tracing_is_on(struct trace_array * tr)1524 bool tracer_tracing_is_on(struct trace_array *tr)
1525 {
1526 if (tr->array_buffer.buffer)
1527 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1528 return !tr->buffer_disabled;
1529 }
1530
1531 /**
1532 * tracing_is_on - show state of ring buffers enabled
1533 */
tracing_is_on(void)1534 int tracing_is_on(void)
1535 {
1536 return tracer_tracing_is_on(&global_trace);
1537 }
1538 EXPORT_SYMBOL_GPL(tracing_is_on);
1539
set_buf_size(char * str)1540 static int __init set_buf_size(char *str)
1541 {
1542 unsigned long buf_size;
1543
1544 if (!str)
1545 return 0;
1546 buf_size = memparse(str, &str);
1547 /*
1548 * nr_entries can not be zero and the startup
1549 * tests require some buffer space. Therefore
1550 * ensure we have at least 4096 bytes of buffer.
1551 */
1552 trace_buf_size = max(4096UL, buf_size);
1553 return 1;
1554 }
1555 __setup("trace_buf_size=", set_buf_size);
1556
set_tracing_thresh(char * str)1557 static int __init set_tracing_thresh(char *str)
1558 {
1559 unsigned long threshold;
1560 int ret;
1561
1562 if (!str)
1563 return 0;
1564 ret = kstrtoul(str, 0, &threshold);
1565 if (ret < 0)
1566 return 0;
1567 tracing_thresh = threshold * 1000;
1568 return 1;
1569 }
1570 __setup("tracing_thresh=", set_tracing_thresh);
1571
nsecs_to_usecs(unsigned long nsecs)1572 unsigned long nsecs_to_usecs(unsigned long nsecs)
1573 {
1574 return nsecs / 1000;
1575 }
1576
1577 /*
1578 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1579 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1580 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1581 * of strings in the order that the evals (enum) were defined.
1582 */
1583 #undef C
1584 #define C(a, b) b
1585
1586 /* These must match the bit positions in trace_iterator_flags */
1587 static const char *trace_options[] = {
1588 TRACE_FLAGS
1589 NULL
1590 };
1591
1592 static struct {
1593 u64 (*func)(void);
1594 const char *name;
1595 int in_ns; /* is this clock in nanoseconds? */
1596 } trace_clocks[] = {
1597 { trace_clock_local, "local", 1 },
1598 { trace_clock_global, "global", 1 },
1599 { trace_clock_counter, "counter", 0 },
1600 { trace_clock_jiffies, "uptime", 0 },
1601 { trace_clock, "perf", 1 },
1602 { ktime_get_mono_fast_ns, "mono", 1 },
1603 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1604 { ktime_get_boot_fast_ns, "boot", 1 },
1605 { ktime_get_tai_fast_ns, "tai", 1 },
1606 ARCH_TRACE_CLOCKS
1607 };
1608
trace_clock_in_ns(struct trace_array * tr)1609 bool trace_clock_in_ns(struct trace_array *tr)
1610 {
1611 if (trace_clocks[tr->clock_id].in_ns)
1612 return true;
1613
1614 return false;
1615 }
1616
1617 /*
1618 * trace_parser_get_init - gets the buffer for trace parser
1619 */
trace_parser_get_init(struct trace_parser * parser,int size)1620 int trace_parser_get_init(struct trace_parser *parser, int size)
1621 {
1622 memset(parser, 0, sizeof(*parser));
1623
1624 parser->buffer = kmalloc(size, GFP_KERNEL);
1625 if (!parser->buffer)
1626 return 1;
1627
1628 parser->size = size;
1629 return 0;
1630 }
1631
1632 /*
1633 * trace_parser_put - frees the buffer for trace parser
1634 */
trace_parser_put(struct trace_parser * parser)1635 void trace_parser_put(struct trace_parser *parser)
1636 {
1637 kfree(parser->buffer);
1638 parser->buffer = NULL;
1639 }
1640
1641 /*
1642 * trace_get_user - reads the user input string separated by space
1643 * (matched by isspace(ch))
1644 *
1645 * For each string found the 'struct trace_parser' is updated,
1646 * and the function returns.
1647 *
1648 * Returns number of bytes read.
1649 *
1650 * See kernel/trace/trace.h for 'struct trace_parser' details.
1651 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1652 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1653 size_t cnt, loff_t *ppos)
1654 {
1655 char ch;
1656 size_t read = 0;
1657 ssize_t ret;
1658
1659 if (!*ppos)
1660 trace_parser_clear(parser);
1661
1662 ret = get_user(ch, ubuf++);
1663 if (ret)
1664 goto out;
1665
1666 read++;
1667 cnt--;
1668
1669 /*
1670 * The parser is not finished with the last write,
1671 * continue reading the user input without skipping spaces.
1672 */
1673 if (!parser->cont) {
1674 /* skip white space */
1675 while (cnt && isspace(ch)) {
1676 ret = get_user(ch, ubuf++);
1677 if (ret)
1678 goto out;
1679 read++;
1680 cnt--;
1681 }
1682
1683 parser->idx = 0;
1684
1685 /* only spaces were written */
1686 if (isspace(ch) || !ch) {
1687 *ppos += read;
1688 ret = read;
1689 goto out;
1690 }
1691 }
1692
1693 /* read the non-space input */
1694 while (cnt && !isspace(ch) && ch) {
1695 if (parser->idx < parser->size - 1)
1696 parser->buffer[parser->idx++] = ch;
1697 else {
1698 ret = -EINVAL;
1699 goto out;
1700 }
1701 ret = get_user(ch, ubuf++);
1702 if (ret)
1703 goto out;
1704 read++;
1705 cnt--;
1706 }
1707
1708 /* We either got finished input or we have to wait for another call. */
1709 if (isspace(ch) || !ch) {
1710 parser->buffer[parser->idx] = 0;
1711 parser->cont = false;
1712 } else if (parser->idx < parser->size - 1) {
1713 parser->cont = true;
1714 parser->buffer[parser->idx++] = ch;
1715 /* Make sure the parsed string always terminates with '\0'. */
1716 parser->buffer[parser->idx] = 0;
1717 } else {
1718 ret = -EINVAL;
1719 goto out;
1720 }
1721
1722 *ppos += read;
1723 ret = read;
1724
1725 out:
1726 return ret;
1727 }
1728
1729 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1730 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1731 {
1732 int len;
1733
1734 if (trace_seq_used(s) <= s->seq.readpos)
1735 return -EBUSY;
1736
1737 len = trace_seq_used(s) - s->seq.readpos;
1738 if (cnt > len)
1739 cnt = len;
1740 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1741
1742 s->seq.readpos += cnt;
1743 return cnt;
1744 }
1745
1746 unsigned long __read_mostly tracing_thresh;
1747
1748 #ifdef CONFIG_TRACER_MAX_TRACE
1749 static const struct file_operations tracing_max_lat_fops;
1750
1751 #ifdef LATENCY_FS_NOTIFY
1752
1753 static struct workqueue_struct *fsnotify_wq;
1754
latency_fsnotify_workfn(struct work_struct * work)1755 static void latency_fsnotify_workfn(struct work_struct *work)
1756 {
1757 struct trace_array *tr = container_of(work, struct trace_array,
1758 fsnotify_work);
1759 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1760 }
1761
latency_fsnotify_workfn_irq(struct irq_work * iwork)1762 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1763 {
1764 struct trace_array *tr = container_of(iwork, struct trace_array,
1765 fsnotify_irqwork);
1766 queue_work(fsnotify_wq, &tr->fsnotify_work);
1767 }
1768
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1769 static void trace_create_maxlat_file(struct trace_array *tr,
1770 struct dentry *d_tracer)
1771 {
1772 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1773 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1774 tr->d_max_latency = trace_create_file("tracing_max_latency",
1775 TRACE_MODE_WRITE,
1776 d_tracer, tr,
1777 &tracing_max_lat_fops);
1778 }
1779
latency_fsnotify_init(void)1780 __init static int latency_fsnotify_init(void)
1781 {
1782 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1783 WQ_UNBOUND | WQ_HIGHPRI, 0);
1784 if (!fsnotify_wq) {
1785 pr_err("Unable to allocate tr_max_lat_wq\n");
1786 return -ENOMEM;
1787 }
1788 return 0;
1789 }
1790
1791 late_initcall_sync(latency_fsnotify_init);
1792
latency_fsnotify(struct trace_array * tr)1793 void latency_fsnotify(struct trace_array *tr)
1794 {
1795 if (!fsnotify_wq)
1796 return;
1797 /*
1798 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1799 * possible that we are called from __schedule() or do_idle(), which
1800 * could cause a deadlock.
1801 */
1802 irq_work_queue(&tr->fsnotify_irqwork);
1803 }
1804
1805 #else /* !LATENCY_FS_NOTIFY */
1806
1807 #define trace_create_maxlat_file(tr, d_tracer) \
1808 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1809 d_tracer, tr, &tracing_max_lat_fops)
1810
1811 #endif
1812
1813 /*
1814 * Copy the new maximum trace into the separate maximum-trace
1815 * structure. (this way the maximum trace is permanently saved,
1816 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1817 */
1818 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1819 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1820 {
1821 struct array_buffer *trace_buf = &tr->array_buffer;
1822 struct array_buffer *max_buf = &tr->max_buffer;
1823 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1824 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1825
1826 max_buf->cpu = cpu;
1827 max_buf->time_start = data->preempt_timestamp;
1828
1829 max_data->saved_latency = tr->max_latency;
1830 max_data->critical_start = data->critical_start;
1831 max_data->critical_end = data->critical_end;
1832
1833 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1834 max_data->pid = tsk->pid;
1835 /*
1836 * If tsk == current, then use current_uid(), as that does not use
1837 * RCU. The irq tracer can be called out of RCU scope.
1838 */
1839 if (tsk == current)
1840 max_data->uid = current_uid();
1841 else
1842 max_data->uid = task_uid(tsk);
1843
1844 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1845 max_data->policy = tsk->policy;
1846 max_data->rt_priority = tsk->rt_priority;
1847
1848 /* record this tasks comm */
1849 tracing_record_cmdline(tsk);
1850 latency_fsnotify(tr);
1851 }
1852
1853 /**
1854 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1855 * @tr: tracer
1856 * @tsk: the task with the latency
1857 * @cpu: The cpu that initiated the trace.
1858 * @cond_data: User data associated with a conditional snapshot
1859 *
1860 * Flip the buffers between the @tr and the max_tr and record information
1861 * about which task was the cause of this latency.
1862 */
1863 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1864 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1865 void *cond_data)
1866 {
1867 if (tr->stop_count)
1868 return;
1869
1870 WARN_ON_ONCE(!irqs_disabled());
1871
1872 if (!tr->allocated_snapshot) {
1873 /* Only the nop tracer should hit this when disabling */
1874 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875 return;
1876 }
1877
1878 arch_spin_lock(&tr->max_lock);
1879
1880 /* Inherit the recordable setting from array_buffer */
1881 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1882 ring_buffer_record_on(tr->max_buffer.buffer);
1883 else
1884 ring_buffer_record_off(tr->max_buffer.buffer);
1885
1886 #ifdef CONFIG_TRACER_SNAPSHOT
1887 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1888 arch_spin_unlock(&tr->max_lock);
1889 return;
1890 }
1891 #endif
1892 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1893
1894 __update_max_tr(tr, tsk, cpu);
1895
1896 arch_spin_unlock(&tr->max_lock);
1897
1898 /* Any waiters on the old snapshot buffer need to wake up */
1899 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1900 }
1901
1902 /**
1903 * update_max_tr_single - only copy one trace over, and reset the rest
1904 * @tr: tracer
1905 * @tsk: task with the latency
1906 * @cpu: the cpu of the buffer to copy.
1907 *
1908 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1909 */
1910 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1911 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1912 {
1913 int ret;
1914
1915 if (tr->stop_count)
1916 return;
1917
1918 WARN_ON_ONCE(!irqs_disabled());
1919 if (!tr->allocated_snapshot) {
1920 /* Only the nop tracer should hit this when disabling */
1921 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1922 return;
1923 }
1924
1925 arch_spin_lock(&tr->max_lock);
1926
1927 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1928
1929 if (ret == -EBUSY) {
1930 /*
1931 * We failed to swap the buffer due to a commit taking
1932 * place on this CPU. We fail to record, but we reset
1933 * the max trace buffer (no one writes directly to it)
1934 * and flag that it failed.
1935 * Another reason is resize is in progress.
1936 */
1937 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1938 "Failed to swap buffers due to commit or resize in progress\n");
1939 }
1940
1941 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1942
1943 __update_max_tr(tr, tsk, cpu);
1944 arch_spin_unlock(&tr->max_lock);
1945 }
1946
1947 #endif /* CONFIG_TRACER_MAX_TRACE */
1948
wait_on_pipe(struct trace_iterator * iter,int full)1949 static int wait_on_pipe(struct trace_iterator *iter, int full)
1950 {
1951 int ret;
1952
1953 /* Iterators are static, they should be filled or empty */
1954 if (trace_buffer_iter(iter, iter->cpu_file))
1955 return 0;
1956
1957 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1958
1959 #ifdef CONFIG_TRACER_MAX_TRACE
1960 /*
1961 * Make sure this is still the snapshot buffer, as if a snapshot were
1962 * to happen, this would now be the main buffer.
1963 */
1964 if (iter->snapshot)
1965 iter->array_buffer = &iter->tr->max_buffer;
1966 #endif
1967 return ret;
1968 }
1969
1970 #ifdef CONFIG_FTRACE_STARTUP_TEST
1971 static bool selftests_can_run;
1972
1973 struct trace_selftests {
1974 struct list_head list;
1975 struct tracer *type;
1976 };
1977
1978 static LIST_HEAD(postponed_selftests);
1979
save_selftest(struct tracer * type)1980 static int save_selftest(struct tracer *type)
1981 {
1982 struct trace_selftests *selftest;
1983
1984 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1985 if (!selftest)
1986 return -ENOMEM;
1987
1988 selftest->type = type;
1989 list_add(&selftest->list, &postponed_selftests);
1990 return 0;
1991 }
1992
run_tracer_selftest(struct tracer * type)1993 static int run_tracer_selftest(struct tracer *type)
1994 {
1995 struct trace_array *tr = &global_trace;
1996 struct tracer *saved_tracer = tr->current_trace;
1997 int ret;
1998
1999 if (!type->selftest || tracing_selftest_disabled)
2000 return 0;
2001
2002 /*
2003 * If a tracer registers early in boot up (before scheduling is
2004 * initialized and such), then do not run its selftests yet.
2005 * Instead, run it a little later in the boot process.
2006 */
2007 if (!selftests_can_run)
2008 return save_selftest(type);
2009
2010 if (!tracing_is_on()) {
2011 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2012 type->name);
2013 return 0;
2014 }
2015
2016 /*
2017 * Run a selftest on this tracer.
2018 * Here we reset the trace buffer, and set the current
2019 * tracer to be this tracer. The tracer can then run some
2020 * internal tracing to verify that everything is in order.
2021 * If we fail, we do not register this tracer.
2022 */
2023 tracing_reset_online_cpus(&tr->array_buffer);
2024
2025 tr->current_trace = type;
2026
2027 #ifdef CONFIG_TRACER_MAX_TRACE
2028 if (type->use_max_tr) {
2029 /* If we expanded the buffers, make sure the max is expanded too */
2030 if (ring_buffer_expanded)
2031 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2032 RING_BUFFER_ALL_CPUS);
2033 tr->allocated_snapshot = true;
2034 }
2035 #endif
2036
2037 /* the test is responsible for initializing and enabling */
2038 pr_info("Testing tracer %s: ", type->name);
2039 ret = type->selftest(type, tr);
2040 /* the test is responsible for resetting too */
2041 tr->current_trace = saved_tracer;
2042 if (ret) {
2043 printk(KERN_CONT "FAILED!\n");
2044 /* Add the warning after printing 'FAILED' */
2045 WARN_ON(1);
2046 return -1;
2047 }
2048 /* Only reset on passing, to avoid touching corrupted buffers */
2049 tracing_reset_online_cpus(&tr->array_buffer);
2050
2051 #ifdef CONFIG_TRACER_MAX_TRACE
2052 if (type->use_max_tr) {
2053 tr->allocated_snapshot = false;
2054
2055 /* Shrink the max buffer again */
2056 if (ring_buffer_expanded)
2057 ring_buffer_resize(tr->max_buffer.buffer, 1,
2058 RING_BUFFER_ALL_CPUS);
2059 }
2060 #endif
2061
2062 printk(KERN_CONT "PASSED\n");
2063 return 0;
2064 }
2065
do_run_tracer_selftest(struct tracer * type)2066 static int do_run_tracer_selftest(struct tracer *type)
2067 {
2068 int ret;
2069
2070 /*
2071 * Tests can take a long time, especially if they are run one after the
2072 * other, as does happen during bootup when all the tracers are
2073 * registered. This could cause the soft lockup watchdog to trigger.
2074 */
2075 cond_resched();
2076
2077 tracing_selftest_running = true;
2078 ret = run_tracer_selftest(type);
2079 tracing_selftest_running = false;
2080
2081 return ret;
2082 }
2083
init_trace_selftests(void)2084 static __init int init_trace_selftests(void)
2085 {
2086 struct trace_selftests *p, *n;
2087 struct tracer *t, **last;
2088 int ret;
2089
2090 selftests_can_run = true;
2091
2092 mutex_lock(&trace_types_lock);
2093
2094 if (list_empty(&postponed_selftests))
2095 goto out;
2096
2097 pr_info("Running postponed tracer tests:\n");
2098
2099 tracing_selftest_running = true;
2100 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2101 /* This loop can take minutes when sanitizers are enabled, so
2102 * lets make sure we allow RCU processing.
2103 */
2104 cond_resched();
2105 ret = run_tracer_selftest(p->type);
2106 /* If the test fails, then warn and remove from available_tracers */
2107 if (ret < 0) {
2108 WARN(1, "tracer: %s failed selftest, disabling\n",
2109 p->type->name);
2110 last = &trace_types;
2111 for (t = trace_types; t; t = t->next) {
2112 if (t == p->type) {
2113 *last = t->next;
2114 break;
2115 }
2116 last = &t->next;
2117 }
2118 }
2119 list_del(&p->list);
2120 kfree(p);
2121 }
2122 tracing_selftest_running = false;
2123
2124 out:
2125 mutex_unlock(&trace_types_lock);
2126
2127 return 0;
2128 }
2129 core_initcall(init_trace_selftests);
2130 #else
run_tracer_selftest(struct tracer * type)2131 static inline int run_tracer_selftest(struct tracer *type)
2132 {
2133 return 0;
2134 }
do_run_tracer_selftest(struct tracer * type)2135 static inline int do_run_tracer_selftest(struct tracer *type)
2136 {
2137 return 0;
2138 }
2139 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2140
2141 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2142
2143 static void __init apply_trace_boot_options(void);
2144
2145 /**
2146 * register_tracer - register a tracer with the ftrace system.
2147 * @type: the plugin for the tracer
2148 *
2149 * Register a new plugin tracer.
2150 */
register_tracer(struct tracer * type)2151 int __init register_tracer(struct tracer *type)
2152 {
2153 struct tracer *t;
2154 int ret = 0;
2155
2156 if (!type->name) {
2157 pr_info("Tracer must have a name\n");
2158 return -1;
2159 }
2160
2161 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2162 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2163 return -1;
2164 }
2165
2166 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2167 pr_warn("Can not register tracer %s due to lockdown\n",
2168 type->name);
2169 return -EPERM;
2170 }
2171
2172 mutex_lock(&trace_types_lock);
2173
2174 for (t = trace_types; t; t = t->next) {
2175 if (strcmp(type->name, t->name) == 0) {
2176 /* already found */
2177 pr_info("Tracer %s already registered\n",
2178 type->name);
2179 ret = -1;
2180 goto out;
2181 }
2182 }
2183
2184 if (!type->set_flag)
2185 type->set_flag = &dummy_set_flag;
2186 if (!type->flags) {
2187 /*allocate a dummy tracer_flags*/
2188 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2189 if (!type->flags) {
2190 ret = -ENOMEM;
2191 goto out;
2192 }
2193 type->flags->val = 0;
2194 type->flags->opts = dummy_tracer_opt;
2195 } else
2196 if (!type->flags->opts)
2197 type->flags->opts = dummy_tracer_opt;
2198
2199 /* store the tracer for __set_tracer_option */
2200 type->flags->trace = type;
2201
2202 ret = do_run_tracer_selftest(type);
2203 if (ret < 0)
2204 goto out;
2205
2206 type->next = trace_types;
2207 trace_types = type;
2208 add_tracer_options(&global_trace, type);
2209
2210 out:
2211 mutex_unlock(&trace_types_lock);
2212
2213 if (ret || !default_bootup_tracer)
2214 goto out_unlock;
2215
2216 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2217 goto out_unlock;
2218
2219 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2220 /* Do we want this tracer to start on bootup? */
2221 tracing_set_tracer(&global_trace, type->name);
2222 default_bootup_tracer = NULL;
2223
2224 apply_trace_boot_options();
2225
2226 /* disable other selftests, since this will break it. */
2227 disable_tracing_selftest("running a tracer");
2228
2229 out_unlock:
2230 return ret;
2231 }
2232
tracing_reset_cpu(struct array_buffer * buf,int cpu)2233 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2234 {
2235 struct trace_buffer *buffer = buf->buffer;
2236
2237 if (!buffer)
2238 return;
2239
2240 ring_buffer_record_disable(buffer);
2241
2242 /* Make sure all commits have finished */
2243 synchronize_rcu();
2244 ring_buffer_reset_cpu(buffer, cpu);
2245
2246 ring_buffer_record_enable(buffer);
2247 }
2248
tracing_reset_online_cpus(struct array_buffer * buf)2249 void tracing_reset_online_cpus(struct array_buffer *buf)
2250 {
2251 struct trace_buffer *buffer = buf->buffer;
2252
2253 if (!buffer)
2254 return;
2255
2256 ring_buffer_record_disable(buffer);
2257
2258 /* Make sure all commits have finished */
2259 synchronize_rcu();
2260
2261 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2262
2263 ring_buffer_reset_online_cpus(buffer);
2264
2265 ring_buffer_record_enable(buffer);
2266 }
2267
2268 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2269 void tracing_reset_all_online_cpus_unlocked(void)
2270 {
2271 struct trace_array *tr;
2272
2273 lockdep_assert_held(&trace_types_lock);
2274
2275 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2276 if (!tr->clear_trace)
2277 continue;
2278 tr->clear_trace = false;
2279 tracing_reset_online_cpus(&tr->array_buffer);
2280 #ifdef CONFIG_TRACER_MAX_TRACE
2281 tracing_reset_online_cpus(&tr->max_buffer);
2282 #endif
2283 }
2284 }
2285
tracing_reset_all_online_cpus(void)2286 void tracing_reset_all_online_cpus(void)
2287 {
2288 mutex_lock(&trace_types_lock);
2289 tracing_reset_all_online_cpus_unlocked();
2290 mutex_unlock(&trace_types_lock);
2291 }
2292
2293 /*
2294 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2295 * is the tgid last observed corresponding to pid=i.
2296 */
2297 static int *tgid_map;
2298
2299 /* The maximum valid index into tgid_map. */
2300 static size_t tgid_map_max;
2301
2302 #define SAVED_CMDLINES_DEFAULT 128
2303 #define NO_CMDLINE_MAP UINT_MAX
2304 /*
2305 * Preemption must be disabled before acquiring trace_cmdline_lock.
2306 * The various trace_arrays' max_lock must be acquired in a context
2307 * where interrupt is disabled.
2308 */
2309 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2310 struct saved_cmdlines_buffer {
2311 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2312 unsigned *map_cmdline_to_pid;
2313 unsigned cmdline_num;
2314 int cmdline_idx;
2315 char saved_cmdlines[];
2316 };
2317 static struct saved_cmdlines_buffer *savedcmd;
2318
2319 /* Holds the size of a cmdline and pid element */
2320 #define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s) \
2321 (TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0]))
2322
get_saved_cmdlines(int idx)2323 static inline char *get_saved_cmdlines(int idx)
2324 {
2325 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2326 }
2327
set_cmdline(int idx,const char * cmdline)2328 static inline void set_cmdline(int idx, const char *cmdline)
2329 {
2330 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2331 }
2332
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)2333 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2334 {
2335 int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2336
2337 kmemleak_free(s);
2338 free_pages((unsigned long)s, order);
2339 }
2340
allocate_cmdlines_buffer(unsigned int val)2341 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2342 {
2343 struct saved_cmdlines_buffer *s;
2344 struct page *page;
2345 int orig_size, size;
2346 int order;
2347
2348 /* Figure out how much is needed to hold the given number of cmdlines */
2349 orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
2350 order = get_order(orig_size);
2351 size = 1 << (order + PAGE_SHIFT);
2352 page = alloc_pages(GFP_KERNEL, order);
2353 if (!page)
2354 return NULL;
2355
2356 s = page_address(page);
2357 kmemleak_alloc(s, size, 1, GFP_KERNEL);
2358 memset(s, 0, sizeof(*s));
2359
2360 /* Round up to actual allocation */
2361 val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
2362 s->cmdline_num = val;
2363
2364 /* Place map_cmdline_to_pid array right after saved_cmdlines */
2365 s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN];
2366
2367 s->cmdline_idx = 0;
2368 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2369 sizeof(s->map_pid_to_cmdline));
2370 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2371 val * sizeof(*s->map_cmdline_to_pid));
2372
2373 return s;
2374 }
2375
trace_create_savedcmd(void)2376 static int trace_create_savedcmd(void)
2377 {
2378 savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2379
2380 return savedcmd ? 0 : -ENOMEM;
2381 }
2382
is_tracing_stopped(void)2383 int is_tracing_stopped(void)
2384 {
2385 return global_trace.stop_count;
2386 }
2387
tracing_start_tr(struct trace_array * tr)2388 static void tracing_start_tr(struct trace_array *tr)
2389 {
2390 struct trace_buffer *buffer;
2391 unsigned long flags;
2392
2393 if (tracing_disabled)
2394 return;
2395
2396 raw_spin_lock_irqsave(&tr->start_lock, flags);
2397 if (--tr->stop_count) {
2398 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2399 /* Someone screwed up their debugging */
2400 tr->stop_count = 0;
2401 }
2402 goto out;
2403 }
2404
2405 /* Prevent the buffers from switching */
2406 arch_spin_lock(&tr->max_lock);
2407
2408 buffer = tr->array_buffer.buffer;
2409 if (buffer)
2410 ring_buffer_record_enable(buffer);
2411
2412 #ifdef CONFIG_TRACER_MAX_TRACE
2413 buffer = tr->max_buffer.buffer;
2414 if (buffer)
2415 ring_buffer_record_enable(buffer);
2416 #endif
2417
2418 arch_spin_unlock(&tr->max_lock);
2419
2420 out:
2421 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2422 }
2423
2424 /**
2425 * tracing_start - quick start of the tracer
2426 *
2427 * If tracing is enabled but was stopped by tracing_stop,
2428 * this will start the tracer back up.
2429 */
tracing_start(void)2430 void tracing_start(void)
2431
2432 {
2433 return tracing_start_tr(&global_trace);
2434 }
2435
tracing_stop_tr(struct trace_array * tr)2436 static void tracing_stop_tr(struct trace_array *tr)
2437 {
2438 struct trace_buffer *buffer;
2439 unsigned long flags;
2440
2441 raw_spin_lock_irqsave(&tr->start_lock, flags);
2442 if (tr->stop_count++)
2443 goto out;
2444
2445 /* Prevent the buffers from switching */
2446 arch_spin_lock(&tr->max_lock);
2447
2448 buffer = tr->array_buffer.buffer;
2449 if (buffer)
2450 ring_buffer_record_disable(buffer);
2451
2452 #ifdef CONFIG_TRACER_MAX_TRACE
2453 buffer = tr->max_buffer.buffer;
2454 if (buffer)
2455 ring_buffer_record_disable(buffer);
2456 #endif
2457
2458 arch_spin_unlock(&tr->max_lock);
2459
2460 out:
2461 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2462 }
2463
2464 /**
2465 * tracing_stop - quick stop of the tracer
2466 *
2467 * Light weight way to stop tracing. Use in conjunction with
2468 * tracing_start.
2469 */
tracing_stop(void)2470 void tracing_stop(void)
2471 {
2472 return tracing_stop_tr(&global_trace);
2473 }
2474
trace_save_cmdline(struct task_struct * tsk)2475 static int trace_save_cmdline(struct task_struct *tsk)
2476 {
2477 unsigned tpid, idx;
2478
2479 /* treat recording of idle task as a success */
2480 if (!tsk->pid)
2481 return 1;
2482
2483 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2484
2485 /*
2486 * It's not the end of the world if we don't get
2487 * the lock, but we also don't want to spin
2488 * nor do we want to disable interrupts,
2489 * so if we miss here, then better luck next time.
2490 *
2491 * This is called within the scheduler and wake up, so interrupts
2492 * had better been disabled and run queue lock been held.
2493 */
2494 lockdep_assert_preemption_disabled();
2495 if (!arch_spin_trylock(&trace_cmdline_lock))
2496 return 0;
2497
2498 idx = savedcmd->map_pid_to_cmdline[tpid];
2499 if (idx == NO_CMDLINE_MAP) {
2500 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2501
2502 savedcmd->map_pid_to_cmdline[tpid] = idx;
2503 savedcmd->cmdline_idx = idx;
2504 }
2505
2506 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2507 set_cmdline(idx, tsk->comm);
2508
2509 arch_spin_unlock(&trace_cmdline_lock);
2510
2511 return 1;
2512 }
2513
__trace_find_cmdline(int pid,char comm[])2514 static void __trace_find_cmdline(int pid, char comm[])
2515 {
2516 unsigned map;
2517 int tpid;
2518
2519 if (!pid) {
2520 strcpy(comm, "<idle>");
2521 return;
2522 }
2523
2524 if (WARN_ON_ONCE(pid < 0)) {
2525 strcpy(comm, "<XXX>");
2526 return;
2527 }
2528
2529 tpid = pid & (PID_MAX_DEFAULT - 1);
2530 map = savedcmd->map_pid_to_cmdline[tpid];
2531 if (map != NO_CMDLINE_MAP) {
2532 tpid = savedcmd->map_cmdline_to_pid[map];
2533 if (tpid == pid) {
2534 strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2535 return;
2536 }
2537 }
2538 strcpy(comm, "<...>");
2539 }
2540
trace_find_cmdline(int pid,char comm[])2541 void trace_find_cmdline(int pid, char comm[])
2542 {
2543 preempt_disable();
2544 arch_spin_lock(&trace_cmdline_lock);
2545
2546 __trace_find_cmdline(pid, comm);
2547
2548 arch_spin_unlock(&trace_cmdline_lock);
2549 preempt_enable();
2550 }
2551
trace_find_tgid_ptr(int pid)2552 static int *trace_find_tgid_ptr(int pid)
2553 {
2554 /*
2555 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2556 * if we observe a non-NULL tgid_map then we also observe the correct
2557 * tgid_map_max.
2558 */
2559 int *map = smp_load_acquire(&tgid_map);
2560
2561 if (unlikely(!map || pid > tgid_map_max))
2562 return NULL;
2563
2564 return &map[pid];
2565 }
2566
trace_find_tgid(int pid)2567 int trace_find_tgid(int pid)
2568 {
2569 int *ptr = trace_find_tgid_ptr(pid);
2570
2571 return ptr ? *ptr : 0;
2572 }
2573
trace_save_tgid(struct task_struct * tsk)2574 static int trace_save_tgid(struct task_struct *tsk)
2575 {
2576 int *ptr;
2577
2578 /* treat recording of idle task as a success */
2579 if (!tsk->pid)
2580 return 1;
2581
2582 ptr = trace_find_tgid_ptr(tsk->pid);
2583 if (!ptr)
2584 return 0;
2585
2586 *ptr = tsk->tgid;
2587 return 1;
2588 }
2589
tracing_record_taskinfo_skip(int flags)2590 static bool tracing_record_taskinfo_skip(int flags)
2591 {
2592 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2593 return true;
2594 if (!__this_cpu_read(trace_taskinfo_save))
2595 return true;
2596 return false;
2597 }
2598
2599 /**
2600 * tracing_record_taskinfo - record the task info of a task
2601 *
2602 * @task: task to record
2603 * @flags: TRACE_RECORD_CMDLINE for recording comm
2604 * TRACE_RECORD_TGID for recording tgid
2605 */
tracing_record_taskinfo(struct task_struct * task,int flags)2606 void tracing_record_taskinfo(struct task_struct *task, int flags)
2607 {
2608 bool done;
2609
2610 if (tracing_record_taskinfo_skip(flags))
2611 return;
2612
2613 /*
2614 * Record as much task information as possible. If some fail, continue
2615 * to try to record the others.
2616 */
2617 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2618 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2619
2620 /* If recording any information failed, retry again soon. */
2621 if (!done)
2622 return;
2623
2624 __this_cpu_write(trace_taskinfo_save, false);
2625 }
2626
2627 /**
2628 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2629 *
2630 * @prev: previous task during sched_switch
2631 * @next: next task during sched_switch
2632 * @flags: TRACE_RECORD_CMDLINE for recording comm
2633 * TRACE_RECORD_TGID for recording tgid
2634 */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2635 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2636 struct task_struct *next, int flags)
2637 {
2638 bool done;
2639
2640 if (tracing_record_taskinfo_skip(flags))
2641 return;
2642
2643 /*
2644 * Record as much task information as possible. If some fail, continue
2645 * to try to record the others.
2646 */
2647 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2648 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2649 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2650 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2651
2652 /* If recording any information failed, retry again soon. */
2653 if (!done)
2654 return;
2655
2656 __this_cpu_write(trace_taskinfo_save, false);
2657 }
2658
2659 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2660 void tracing_record_cmdline(struct task_struct *task)
2661 {
2662 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2663 }
2664
tracing_record_tgid(struct task_struct * task)2665 void tracing_record_tgid(struct task_struct *task)
2666 {
2667 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2668 }
2669
2670 /*
2671 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2672 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2673 * simplifies those functions and keeps them in sync.
2674 */
trace_handle_return(struct trace_seq * s)2675 enum print_line_t trace_handle_return(struct trace_seq *s)
2676 {
2677 return trace_seq_has_overflowed(s) ?
2678 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2679 }
2680 EXPORT_SYMBOL_GPL(trace_handle_return);
2681
migration_disable_value(void)2682 static unsigned short migration_disable_value(void)
2683 {
2684 #if defined(CONFIG_SMP)
2685 return current->migration_disabled;
2686 #else
2687 return 0;
2688 #endif
2689 }
2690
tracing_gen_ctx_irq_test(unsigned int irqs_status)2691 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2692 {
2693 unsigned int trace_flags = irqs_status;
2694 unsigned int pc;
2695
2696 pc = preempt_count();
2697
2698 if (pc & NMI_MASK)
2699 trace_flags |= TRACE_FLAG_NMI;
2700 if (pc & HARDIRQ_MASK)
2701 trace_flags |= TRACE_FLAG_HARDIRQ;
2702 if (in_serving_softirq())
2703 trace_flags |= TRACE_FLAG_SOFTIRQ;
2704 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2705 trace_flags |= TRACE_FLAG_BH_OFF;
2706
2707 if (tif_need_resched())
2708 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2709 if (test_preempt_need_resched())
2710 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2711 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2712 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2713 }
2714
2715 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2716 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2717 int type,
2718 unsigned long len,
2719 unsigned int trace_ctx)
2720 {
2721 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2722 }
2723
2724 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2725 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2726 static int trace_buffered_event_ref;
2727
2728 /**
2729 * trace_buffered_event_enable - enable buffering events
2730 *
2731 * When events are being filtered, it is quicker to use a temporary
2732 * buffer to write the event data into if there's a likely chance
2733 * that it will not be committed. The discard of the ring buffer
2734 * is not as fast as committing, and is much slower than copying
2735 * a commit.
2736 *
2737 * When an event is to be filtered, allocate per cpu buffers to
2738 * write the event data into, and if the event is filtered and discarded
2739 * it is simply dropped, otherwise, the entire data is to be committed
2740 * in one shot.
2741 */
trace_buffered_event_enable(void)2742 void trace_buffered_event_enable(void)
2743 {
2744 struct ring_buffer_event *event;
2745 struct page *page;
2746 int cpu;
2747
2748 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2749
2750 if (trace_buffered_event_ref++)
2751 return;
2752
2753 for_each_tracing_cpu(cpu) {
2754 page = alloc_pages_node(cpu_to_node(cpu),
2755 GFP_KERNEL | __GFP_NORETRY, 0);
2756 /* This is just an optimization and can handle failures */
2757 if (!page) {
2758 pr_err("Failed to allocate event buffer\n");
2759 break;
2760 }
2761
2762 event = page_address(page);
2763 memset(event, 0, sizeof(*event));
2764
2765 per_cpu(trace_buffered_event, cpu) = event;
2766
2767 preempt_disable();
2768 if (cpu == smp_processor_id() &&
2769 __this_cpu_read(trace_buffered_event) !=
2770 per_cpu(trace_buffered_event, cpu))
2771 WARN_ON_ONCE(1);
2772 preempt_enable();
2773 }
2774 }
2775
enable_trace_buffered_event(void * data)2776 static void enable_trace_buffered_event(void *data)
2777 {
2778 /* Probably not needed, but do it anyway */
2779 smp_rmb();
2780 this_cpu_dec(trace_buffered_event_cnt);
2781 }
2782
disable_trace_buffered_event(void * data)2783 static void disable_trace_buffered_event(void *data)
2784 {
2785 this_cpu_inc(trace_buffered_event_cnt);
2786 }
2787
2788 /**
2789 * trace_buffered_event_disable - disable buffering events
2790 *
2791 * When a filter is removed, it is faster to not use the buffered
2792 * events, and to commit directly into the ring buffer. Free up
2793 * the temp buffers when there are no more users. This requires
2794 * special synchronization with current events.
2795 */
trace_buffered_event_disable(void)2796 void trace_buffered_event_disable(void)
2797 {
2798 int cpu;
2799
2800 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2801
2802 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2803 return;
2804
2805 if (--trace_buffered_event_ref)
2806 return;
2807
2808 /* For each CPU, set the buffer as used. */
2809 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2810 NULL, true);
2811
2812 /* Wait for all current users to finish */
2813 synchronize_rcu();
2814
2815 for_each_tracing_cpu(cpu) {
2816 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2817 per_cpu(trace_buffered_event, cpu) = NULL;
2818 }
2819
2820 /*
2821 * Wait for all CPUs that potentially started checking if they can use
2822 * their event buffer only after the previous synchronize_rcu() call and
2823 * they still read a valid pointer from trace_buffered_event. It must be
2824 * ensured they don't see cleared trace_buffered_event_cnt else they
2825 * could wrongly decide to use the pointed-to buffer which is now freed.
2826 */
2827 synchronize_rcu();
2828
2829 /* For each CPU, relinquish the buffer */
2830 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2831 true);
2832 }
2833
2834 static struct trace_buffer *temp_buffer;
2835
2836 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2837 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2838 struct trace_event_file *trace_file,
2839 int type, unsigned long len,
2840 unsigned int trace_ctx)
2841 {
2842 struct ring_buffer_event *entry;
2843 struct trace_array *tr = trace_file->tr;
2844 int val;
2845
2846 *current_rb = tr->array_buffer.buffer;
2847
2848 if (!tr->no_filter_buffering_ref &&
2849 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2850 preempt_disable_notrace();
2851 /*
2852 * Filtering is on, so try to use the per cpu buffer first.
2853 * This buffer will simulate a ring_buffer_event,
2854 * where the type_len is zero and the array[0] will
2855 * hold the full length.
2856 * (see include/linux/ring-buffer.h for details on
2857 * how the ring_buffer_event is structured).
2858 *
2859 * Using a temp buffer during filtering and copying it
2860 * on a matched filter is quicker than writing directly
2861 * into the ring buffer and then discarding it when
2862 * it doesn't match. That is because the discard
2863 * requires several atomic operations to get right.
2864 * Copying on match and doing nothing on a failed match
2865 * is still quicker than no copy on match, but having
2866 * to discard out of the ring buffer on a failed match.
2867 */
2868 if ((entry = __this_cpu_read(trace_buffered_event))) {
2869 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2870
2871 val = this_cpu_inc_return(trace_buffered_event_cnt);
2872
2873 /*
2874 * Preemption is disabled, but interrupts and NMIs
2875 * can still come in now. If that happens after
2876 * the above increment, then it will have to go
2877 * back to the old method of allocating the event
2878 * on the ring buffer, and if the filter fails, it
2879 * will have to call ring_buffer_discard_commit()
2880 * to remove it.
2881 *
2882 * Need to also check the unlikely case that the
2883 * length is bigger than the temp buffer size.
2884 * If that happens, then the reserve is pretty much
2885 * guaranteed to fail, as the ring buffer currently
2886 * only allows events less than a page. But that may
2887 * change in the future, so let the ring buffer reserve
2888 * handle the failure in that case.
2889 */
2890 if (val == 1 && likely(len <= max_len)) {
2891 trace_event_setup(entry, type, trace_ctx);
2892 entry->array[0] = len;
2893 /* Return with preemption disabled */
2894 return entry;
2895 }
2896 this_cpu_dec(trace_buffered_event_cnt);
2897 }
2898 /* __trace_buffer_lock_reserve() disables preemption */
2899 preempt_enable_notrace();
2900 }
2901
2902 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2903 trace_ctx);
2904 /*
2905 * If tracing is off, but we have triggers enabled
2906 * we still need to look at the event data. Use the temp_buffer
2907 * to store the trace event for the trigger to use. It's recursive
2908 * safe and will not be recorded anywhere.
2909 */
2910 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2911 *current_rb = temp_buffer;
2912 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2913 trace_ctx);
2914 }
2915 return entry;
2916 }
2917 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2918
2919 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2920 static DEFINE_MUTEX(tracepoint_printk_mutex);
2921
output_printk(struct trace_event_buffer * fbuffer)2922 static void output_printk(struct trace_event_buffer *fbuffer)
2923 {
2924 struct trace_event_call *event_call;
2925 struct trace_event_file *file;
2926 struct trace_event *event;
2927 unsigned long flags;
2928 struct trace_iterator *iter = tracepoint_print_iter;
2929
2930 /* We should never get here if iter is NULL */
2931 if (WARN_ON_ONCE(!iter))
2932 return;
2933
2934 event_call = fbuffer->trace_file->event_call;
2935 if (!event_call || !event_call->event.funcs ||
2936 !event_call->event.funcs->trace)
2937 return;
2938
2939 file = fbuffer->trace_file;
2940 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2941 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2942 !filter_match_preds(file->filter, fbuffer->entry)))
2943 return;
2944
2945 event = &fbuffer->trace_file->event_call->event;
2946
2947 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2948 trace_seq_init(&iter->seq);
2949 iter->ent = fbuffer->entry;
2950 event_call->event.funcs->trace(iter, 0, event);
2951 trace_seq_putc(&iter->seq, 0);
2952 printk("%s", iter->seq.buffer);
2953
2954 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2955 }
2956
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2957 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2958 void *buffer, size_t *lenp,
2959 loff_t *ppos)
2960 {
2961 int save_tracepoint_printk;
2962 int ret;
2963
2964 mutex_lock(&tracepoint_printk_mutex);
2965 save_tracepoint_printk = tracepoint_printk;
2966
2967 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2968
2969 /*
2970 * This will force exiting early, as tracepoint_printk
2971 * is always zero when tracepoint_printk_iter is not allocated
2972 */
2973 if (!tracepoint_print_iter)
2974 tracepoint_printk = 0;
2975
2976 if (save_tracepoint_printk == tracepoint_printk)
2977 goto out;
2978
2979 if (tracepoint_printk)
2980 static_key_enable(&tracepoint_printk_key.key);
2981 else
2982 static_key_disable(&tracepoint_printk_key.key);
2983
2984 out:
2985 mutex_unlock(&tracepoint_printk_mutex);
2986
2987 return ret;
2988 }
2989
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2990 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2991 {
2992 enum event_trigger_type tt = ETT_NONE;
2993 struct trace_event_file *file = fbuffer->trace_file;
2994
2995 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2996 fbuffer->entry, &tt))
2997 goto discard;
2998
2999 if (static_key_false(&tracepoint_printk_key.key))
3000 output_printk(fbuffer);
3001
3002 if (static_branch_unlikely(&trace_event_exports_enabled))
3003 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3004
3005 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3006 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3007
3008 discard:
3009 if (tt)
3010 event_triggers_post_call(file, tt);
3011
3012 }
3013 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3014
3015 /*
3016 * Skip 3:
3017 *
3018 * trace_buffer_unlock_commit_regs()
3019 * trace_event_buffer_commit()
3020 * trace_event_raw_event_xxx()
3021 */
3022 # define STACK_SKIP 3
3023
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)3024 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3025 struct trace_buffer *buffer,
3026 struct ring_buffer_event *event,
3027 unsigned int trace_ctx,
3028 struct pt_regs *regs)
3029 {
3030 __buffer_unlock_commit(buffer, event);
3031
3032 /*
3033 * If regs is not set, then skip the necessary functions.
3034 * Note, we can still get here via blktrace, wakeup tracer
3035 * and mmiotrace, but that's ok if they lose a function or
3036 * two. They are not that meaningful.
3037 */
3038 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3039 ftrace_trace_userstack(tr, buffer, trace_ctx);
3040 }
3041
3042 /*
3043 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3044 */
3045 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)3046 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3047 struct ring_buffer_event *event)
3048 {
3049 __buffer_unlock_commit(buffer, event);
3050 }
3051
3052 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)3053 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3054 parent_ip, unsigned int trace_ctx)
3055 {
3056 struct trace_event_call *call = &event_function;
3057 struct trace_buffer *buffer = tr->array_buffer.buffer;
3058 struct ring_buffer_event *event;
3059 struct ftrace_entry *entry;
3060
3061 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3062 trace_ctx);
3063 if (!event)
3064 return;
3065 entry = ring_buffer_event_data(event);
3066 entry->ip = ip;
3067 entry->parent_ip = parent_ip;
3068
3069 if (!call_filter_check_discard(call, entry, buffer, event)) {
3070 if (static_branch_unlikely(&trace_function_exports_enabled))
3071 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3072 __buffer_unlock_commit(buffer, event);
3073 }
3074 }
3075
3076 #ifdef CONFIG_STACKTRACE
3077
3078 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3079 #define FTRACE_KSTACK_NESTING 4
3080
3081 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3082
3083 struct ftrace_stack {
3084 unsigned long calls[FTRACE_KSTACK_ENTRIES];
3085 };
3086
3087
3088 struct ftrace_stacks {
3089 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3090 };
3091
3092 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3093 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3094
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3095 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3096 unsigned int trace_ctx,
3097 int skip, struct pt_regs *regs)
3098 {
3099 struct trace_event_call *call = &event_kernel_stack;
3100 struct ring_buffer_event *event;
3101 unsigned int size, nr_entries;
3102 struct ftrace_stack *fstack;
3103 struct stack_entry *entry;
3104 int stackidx;
3105
3106 /*
3107 * Add one, for this function and the call to save_stack_trace()
3108 * If regs is set, then these functions will not be in the way.
3109 */
3110 #ifndef CONFIG_UNWINDER_ORC
3111 if (!regs)
3112 skip++;
3113 #endif
3114
3115 preempt_disable_notrace();
3116
3117 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3118
3119 /* This should never happen. If it does, yell once and skip */
3120 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3121 goto out;
3122
3123 /*
3124 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3125 * interrupt will either see the value pre increment or post
3126 * increment. If the interrupt happens pre increment it will have
3127 * restored the counter when it returns. We just need a barrier to
3128 * keep gcc from moving things around.
3129 */
3130 barrier();
3131
3132 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3133 size = ARRAY_SIZE(fstack->calls);
3134
3135 if (regs) {
3136 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3137 size, skip);
3138 } else {
3139 nr_entries = stack_trace_save(fstack->calls, size, skip);
3140 }
3141
3142 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3143 struct_size(entry, caller, nr_entries),
3144 trace_ctx);
3145 if (!event)
3146 goto out;
3147 entry = ring_buffer_event_data(event);
3148
3149 entry->size = nr_entries;
3150 memcpy(&entry->caller, fstack->calls,
3151 flex_array_size(entry, caller, nr_entries));
3152
3153 if (!call_filter_check_discard(call, entry, buffer, event))
3154 __buffer_unlock_commit(buffer, event);
3155
3156 out:
3157 /* Again, don't let gcc optimize things here */
3158 barrier();
3159 __this_cpu_dec(ftrace_stack_reserve);
3160 preempt_enable_notrace();
3161
3162 }
3163
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3164 static inline void ftrace_trace_stack(struct trace_array *tr,
3165 struct trace_buffer *buffer,
3166 unsigned int trace_ctx,
3167 int skip, struct pt_regs *regs)
3168 {
3169 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3170 return;
3171
3172 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3173 }
3174
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3175 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3176 int skip)
3177 {
3178 struct trace_buffer *buffer = tr->array_buffer.buffer;
3179
3180 if (rcu_is_watching()) {
3181 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3182 return;
3183 }
3184
3185 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3186 return;
3187
3188 /*
3189 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3190 * but if the above rcu_is_watching() failed, then the NMI
3191 * triggered someplace critical, and ct_irq_enter() should
3192 * not be called from NMI.
3193 */
3194 if (unlikely(in_nmi()))
3195 return;
3196
3197 ct_irq_enter_irqson();
3198 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3199 ct_irq_exit_irqson();
3200 }
3201
3202 /**
3203 * trace_dump_stack - record a stack back trace in the trace buffer
3204 * @skip: Number of functions to skip (helper handlers)
3205 */
trace_dump_stack(int skip)3206 void trace_dump_stack(int skip)
3207 {
3208 if (tracing_disabled || tracing_selftest_running)
3209 return;
3210
3211 #ifndef CONFIG_UNWINDER_ORC
3212 /* Skip 1 to skip this function. */
3213 skip++;
3214 #endif
3215 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3216 tracing_gen_ctx(), skip, NULL);
3217 }
3218 EXPORT_SYMBOL_GPL(trace_dump_stack);
3219
3220 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3221 static DEFINE_PER_CPU(int, user_stack_count);
3222
3223 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3224 ftrace_trace_userstack(struct trace_array *tr,
3225 struct trace_buffer *buffer, unsigned int trace_ctx)
3226 {
3227 struct trace_event_call *call = &event_user_stack;
3228 struct ring_buffer_event *event;
3229 struct userstack_entry *entry;
3230
3231 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3232 return;
3233
3234 /*
3235 * NMIs can not handle page faults, even with fix ups.
3236 * The save user stack can (and often does) fault.
3237 */
3238 if (unlikely(in_nmi()))
3239 return;
3240
3241 /*
3242 * prevent recursion, since the user stack tracing may
3243 * trigger other kernel events.
3244 */
3245 preempt_disable();
3246 if (__this_cpu_read(user_stack_count))
3247 goto out;
3248
3249 __this_cpu_inc(user_stack_count);
3250
3251 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3252 sizeof(*entry), trace_ctx);
3253 if (!event)
3254 goto out_drop_count;
3255 entry = ring_buffer_event_data(event);
3256
3257 entry->tgid = current->tgid;
3258 memset(&entry->caller, 0, sizeof(entry->caller));
3259
3260 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3261 if (!call_filter_check_discard(call, entry, buffer, event))
3262 __buffer_unlock_commit(buffer, event);
3263
3264 out_drop_count:
3265 __this_cpu_dec(user_stack_count);
3266 out:
3267 preempt_enable();
3268 }
3269 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3270 static void ftrace_trace_userstack(struct trace_array *tr,
3271 struct trace_buffer *buffer,
3272 unsigned int trace_ctx)
3273 {
3274 }
3275 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3276
3277 #endif /* CONFIG_STACKTRACE */
3278
3279 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3280 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3281 unsigned long long delta)
3282 {
3283 entry->bottom_delta_ts = delta & U32_MAX;
3284 entry->top_delta_ts = (delta >> 32);
3285 }
3286
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3287 void trace_last_func_repeats(struct trace_array *tr,
3288 struct trace_func_repeats *last_info,
3289 unsigned int trace_ctx)
3290 {
3291 struct trace_buffer *buffer = tr->array_buffer.buffer;
3292 struct func_repeats_entry *entry;
3293 struct ring_buffer_event *event;
3294 u64 delta;
3295
3296 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3297 sizeof(*entry), trace_ctx);
3298 if (!event)
3299 return;
3300
3301 delta = ring_buffer_event_time_stamp(buffer, event) -
3302 last_info->ts_last_call;
3303
3304 entry = ring_buffer_event_data(event);
3305 entry->ip = last_info->ip;
3306 entry->parent_ip = last_info->parent_ip;
3307 entry->count = last_info->count;
3308 func_repeats_set_delta_ts(entry, delta);
3309
3310 __buffer_unlock_commit(buffer, event);
3311 }
3312
3313 /* created for use with alloc_percpu */
3314 struct trace_buffer_struct {
3315 int nesting;
3316 char buffer[4][TRACE_BUF_SIZE];
3317 };
3318
3319 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3320
3321 /*
3322 * This allows for lockless recording. If we're nested too deeply, then
3323 * this returns NULL.
3324 */
get_trace_buf(void)3325 static char *get_trace_buf(void)
3326 {
3327 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3328
3329 if (!trace_percpu_buffer || buffer->nesting >= 4)
3330 return NULL;
3331
3332 buffer->nesting++;
3333
3334 /* Interrupts must see nesting incremented before we use the buffer */
3335 barrier();
3336 return &buffer->buffer[buffer->nesting - 1][0];
3337 }
3338
put_trace_buf(void)3339 static void put_trace_buf(void)
3340 {
3341 /* Don't let the decrement of nesting leak before this */
3342 barrier();
3343 this_cpu_dec(trace_percpu_buffer->nesting);
3344 }
3345
alloc_percpu_trace_buffer(void)3346 static int alloc_percpu_trace_buffer(void)
3347 {
3348 struct trace_buffer_struct __percpu *buffers;
3349
3350 if (trace_percpu_buffer)
3351 return 0;
3352
3353 buffers = alloc_percpu(struct trace_buffer_struct);
3354 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3355 return -ENOMEM;
3356
3357 trace_percpu_buffer = buffers;
3358 return 0;
3359 }
3360
3361 static int buffers_allocated;
3362
trace_printk_init_buffers(void)3363 void trace_printk_init_buffers(void)
3364 {
3365 if (buffers_allocated)
3366 return;
3367
3368 if (alloc_percpu_trace_buffer())
3369 return;
3370
3371 /* trace_printk() is for debug use only. Don't use it in production. */
3372
3373 pr_warn("\n");
3374 pr_warn("**********************************************************\n");
3375 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3376 pr_warn("** **\n");
3377 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3378 pr_warn("** **\n");
3379 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3380 pr_warn("** unsafe for production use. **\n");
3381 pr_warn("** **\n");
3382 pr_warn("** If you see this message and you are not debugging **\n");
3383 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3384 pr_warn("** **\n");
3385 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3386 pr_warn("**********************************************************\n");
3387
3388 /* Expand the buffers to set size */
3389 tracing_update_buffers();
3390
3391 buffers_allocated = 1;
3392
3393 /*
3394 * trace_printk_init_buffers() can be called by modules.
3395 * If that happens, then we need to start cmdline recording
3396 * directly here. If the global_trace.buffer is already
3397 * allocated here, then this was called by module code.
3398 */
3399 if (global_trace.array_buffer.buffer)
3400 tracing_start_cmdline_record();
3401 }
3402 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3403
trace_printk_start_comm(void)3404 void trace_printk_start_comm(void)
3405 {
3406 /* Start tracing comms if trace printk is set */
3407 if (!buffers_allocated)
3408 return;
3409 tracing_start_cmdline_record();
3410 }
3411
trace_printk_start_stop_comm(int enabled)3412 static void trace_printk_start_stop_comm(int enabled)
3413 {
3414 if (!buffers_allocated)
3415 return;
3416
3417 if (enabled)
3418 tracing_start_cmdline_record();
3419 else
3420 tracing_stop_cmdline_record();
3421 }
3422
3423 /**
3424 * trace_vbprintk - write binary msg to tracing buffer
3425 * @ip: The address of the caller
3426 * @fmt: The string format to write to the buffer
3427 * @args: Arguments for @fmt
3428 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3429 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3430 {
3431 struct trace_event_call *call = &event_bprint;
3432 struct ring_buffer_event *event;
3433 struct trace_buffer *buffer;
3434 struct trace_array *tr = &global_trace;
3435 struct bprint_entry *entry;
3436 unsigned int trace_ctx;
3437 char *tbuffer;
3438 int len = 0, size;
3439
3440 if (unlikely(tracing_selftest_running || tracing_disabled))
3441 return 0;
3442
3443 /* Don't pollute graph traces with trace_vprintk internals */
3444 pause_graph_tracing();
3445
3446 trace_ctx = tracing_gen_ctx();
3447 preempt_disable_notrace();
3448
3449 tbuffer = get_trace_buf();
3450 if (!tbuffer) {
3451 len = 0;
3452 goto out_nobuffer;
3453 }
3454
3455 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3456
3457 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3458 goto out_put;
3459
3460 size = sizeof(*entry) + sizeof(u32) * len;
3461 buffer = tr->array_buffer.buffer;
3462 ring_buffer_nest_start(buffer);
3463 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3464 trace_ctx);
3465 if (!event)
3466 goto out;
3467 entry = ring_buffer_event_data(event);
3468 entry->ip = ip;
3469 entry->fmt = fmt;
3470
3471 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3472 if (!call_filter_check_discard(call, entry, buffer, event)) {
3473 __buffer_unlock_commit(buffer, event);
3474 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3475 }
3476
3477 out:
3478 ring_buffer_nest_end(buffer);
3479 out_put:
3480 put_trace_buf();
3481
3482 out_nobuffer:
3483 preempt_enable_notrace();
3484 unpause_graph_tracing();
3485
3486 return len;
3487 }
3488 EXPORT_SYMBOL_GPL(trace_vbprintk);
3489
3490 __printf(3, 0)
3491 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3492 __trace_array_vprintk(struct trace_buffer *buffer,
3493 unsigned long ip, const char *fmt, va_list args)
3494 {
3495 struct trace_event_call *call = &event_print;
3496 struct ring_buffer_event *event;
3497 int len = 0, size;
3498 struct print_entry *entry;
3499 unsigned int trace_ctx;
3500 char *tbuffer;
3501
3502 if (tracing_disabled)
3503 return 0;
3504
3505 /* Don't pollute graph traces with trace_vprintk internals */
3506 pause_graph_tracing();
3507
3508 trace_ctx = tracing_gen_ctx();
3509 preempt_disable_notrace();
3510
3511
3512 tbuffer = get_trace_buf();
3513 if (!tbuffer) {
3514 len = 0;
3515 goto out_nobuffer;
3516 }
3517
3518 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3519
3520 size = sizeof(*entry) + len + 1;
3521 ring_buffer_nest_start(buffer);
3522 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3523 trace_ctx);
3524 if (!event)
3525 goto out;
3526 entry = ring_buffer_event_data(event);
3527 entry->ip = ip;
3528
3529 memcpy(&entry->buf, tbuffer, len + 1);
3530 if (!call_filter_check_discard(call, entry, buffer, event)) {
3531 __buffer_unlock_commit(buffer, event);
3532 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3533 }
3534
3535 out:
3536 ring_buffer_nest_end(buffer);
3537 put_trace_buf();
3538
3539 out_nobuffer:
3540 preempt_enable_notrace();
3541 unpause_graph_tracing();
3542
3543 return len;
3544 }
3545
3546 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3547 int trace_array_vprintk(struct trace_array *tr,
3548 unsigned long ip, const char *fmt, va_list args)
3549 {
3550 if (tracing_selftest_running && tr == &global_trace)
3551 return 0;
3552
3553 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3554 }
3555
3556 /**
3557 * trace_array_printk - Print a message to a specific instance
3558 * @tr: The instance trace_array descriptor
3559 * @ip: The instruction pointer that this is called from.
3560 * @fmt: The format to print (printf format)
3561 *
3562 * If a subsystem sets up its own instance, they have the right to
3563 * printk strings into their tracing instance buffer using this
3564 * function. Note, this function will not write into the top level
3565 * buffer (use trace_printk() for that), as writing into the top level
3566 * buffer should only have events that can be individually disabled.
3567 * trace_printk() is only used for debugging a kernel, and should not
3568 * be ever incorporated in normal use.
3569 *
3570 * trace_array_printk() can be used, as it will not add noise to the
3571 * top level tracing buffer.
3572 *
3573 * Note, trace_array_init_printk() must be called on @tr before this
3574 * can be used.
3575 */
3576 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3577 int trace_array_printk(struct trace_array *tr,
3578 unsigned long ip, const char *fmt, ...)
3579 {
3580 int ret;
3581 va_list ap;
3582
3583 if (!tr)
3584 return -ENOENT;
3585
3586 /* This is only allowed for created instances */
3587 if (tr == &global_trace)
3588 return 0;
3589
3590 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3591 return 0;
3592
3593 va_start(ap, fmt);
3594 ret = trace_array_vprintk(tr, ip, fmt, ap);
3595 va_end(ap);
3596 return ret;
3597 }
3598 EXPORT_SYMBOL_GPL(trace_array_printk);
3599
3600 /**
3601 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3602 * @tr: The trace array to initialize the buffers for
3603 *
3604 * As trace_array_printk() only writes into instances, they are OK to
3605 * have in the kernel (unlike trace_printk()). This needs to be called
3606 * before trace_array_printk() can be used on a trace_array.
3607 */
trace_array_init_printk(struct trace_array * tr)3608 int trace_array_init_printk(struct trace_array *tr)
3609 {
3610 if (!tr)
3611 return -ENOENT;
3612
3613 /* This is only allowed for created instances */
3614 if (tr == &global_trace)
3615 return -EINVAL;
3616
3617 return alloc_percpu_trace_buffer();
3618 }
3619 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3620
3621 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3622 int trace_array_printk_buf(struct trace_buffer *buffer,
3623 unsigned long ip, const char *fmt, ...)
3624 {
3625 int ret;
3626 va_list ap;
3627
3628 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3629 return 0;
3630
3631 va_start(ap, fmt);
3632 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3633 va_end(ap);
3634 return ret;
3635 }
3636
3637 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3638 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3639 {
3640 return trace_array_vprintk(&global_trace, ip, fmt, args);
3641 }
3642 EXPORT_SYMBOL_GPL(trace_vprintk);
3643
trace_iterator_increment(struct trace_iterator * iter)3644 static void trace_iterator_increment(struct trace_iterator *iter)
3645 {
3646 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3647
3648 iter->idx++;
3649 if (buf_iter)
3650 ring_buffer_iter_advance(buf_iter);
3651 }
3652
3653 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3654 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3655 unsigned long *lost_events)
3656 {
3657 struct ring_buffer_event *event;
3658 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3659
3660 if (buf_iter) {
3661 event = ring_buffer_iter_peek(buf_iter, ts);
3662 if (lost_events)
3663 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3664 (unsigned long)-1 : 0;
3665 } else {
3666 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3667 lost_events);
3668 }
3669
3670 if (event) {
3671 iter->ent_size = ring_buffer_event_length(event);
3672 return ring_buffer_event_data(event);
3673 }
3674 iter->ent_size = 0;
3675 return NULL;
3676 }
3677
3678 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3679 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3680 unsigned long *missing_events, u64 *ent_ts)
3681 {
3682 struct trace_buffer *buffer = iter->array_buffer->buffer;
3683 struct trace_entry *ent, *next = NULL;
3684 unsigned long lost_events = 0, next_lost = 0;
3685 int cpu_file = iter->cpu_file;
3686 u64 next_ts = 0, ts;
3687 int next_cpu = -1;
3688 int next_size = 0;
3689 int cpu;
3690
3691 /*
3692 * If we are in a per_cpu trace file, don't bother by iterating over
3693 * all cpu and peek directly.
3694 */
3695 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3696 if (ring_buffer_empty_cpu(buffer, cpu_file))
3697 return NULL;
3698 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3699 if (ent_cpu)
3700 *ent_cpu = cpu_file;
3701
3702 return ent;
3703 }
3704
3705 for_each_tracing_cpu(cpu) {
3706
3707 if (ring_buffer_empty_cpu(buffer, cpu))
3708 continue;
3709
3710 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3711
3712 /*
3713 * Pick the entry with the smallest timestamp:
3714 */
3715 if (ent && (!next || ts < next_ts)) {
3716 next = ent;
3717 next_cpu = cpu;
3718 next_ts = ts;
3719 next_lost = lost_events;
3720 next_size = iter->ent_size;
3721 }
3722 }
3723
3724 iter->ent_size = next_size;
3725
3726 if (ent_cpu)
3727 *ent_cpu = next_cpu;
3728
3729 if (ent_ts)
3730 *ent_ts = next_ts;
3731
3732 if (missing_events)
3733 *missing_events = next_lost;
3734
3735 return next;
3736 }
3737
3738 #define STATIC_FMT_BUF_SIZE 128
3739 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3740
trace_iter_expand_format(struct trace_iterator * iter)3741 char *trace_iter_expand_format(struct trace_iterator *iter)
3742 {
3743 char *tmp;
3744
3745 /*
3746 * iter->tr is NULL when used with tp_printk, which makes
3747 * this get called where it is not safe to call krealloc().
3748 */
3749 if (!iter->tr || iter->fmt == static_fmt_buf)
3750 return NULL;
3751
3752 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3753 GFP_KERNEL);
3754 if (tmp) {
3755 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3756 iter->fmt = tmp;
3757 }
3758
3759 return tmp;
3760 }
3761
3762 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str,bool star,int len)3763 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3764 bool star, int len)
3765 {
3766 unsigned long addr = (unsigned long)str;
3767 struct trace_event *trace_event;
3768 struct trace_event_call *event;
3769
3770 /* Ignore strings with no length */
3771 if (star && !len)
3772 return true;
3773
3774 /* OK if part of the event data */
3775 if ((addr >= (unsigned long)iter->ent) &&
3776 (addr < (unsigned long)iter->ent + iter->ent_size))
3777 return true;
3778
3779 /* OK if part of the temp seq buffer */
3780 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3781 (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3782 return true;
3783
3784 /* Core rodata can not be freed */
3785 if (is_kernel_rodata(addr))
3786 return true;
3787
3788 if (trace_is_tracepoint_string(str))
3789 return true;
3790
3791 /*
3792 * Now this could be a module event, referencing core module
3793 * data, which is OK.
3794 */
3795 if (!iter->ent)
3796 return false;
3797
3798 trace_event = ftrace_find_event(iter->ent->type);
3799 if (!trace_event)
3800 return false;
3801
3802 event = container_of(trace_event, struct trace_event_call, event);
3803 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3804 return false;
3805
3806 /* Would rather have rodata, but this will suffice */
3807 if (within_module_core(addr, event->module))
3808 return true;
3809
3810 return false;
3811 }
3812
show_buffer(struct trace_seq * s)3813 static const char *show_buffer(struct trace_seq *s)
3814 {
3815 struct seq_buf *seq = &s->seq;
3816
3817 seq_buf_terminate(seq);
3818
3819 return seq->buffer;
3820 }
3821
3822 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3823
test_can_verify_check(const char * fmt,...)3824 static int test_can_verify_check(const char *fmt, ...)
3825 {
3826 char buf[16];
3827 va_list ap;
3828 int ret;
3829
3830 /*
3831 * The verifier is dependent on vsnprintf() modifies the va_list
3832 * passed to it, where it is sent as a reference. Some architectures
3833 * (like x86_32) passes it by value, which means that vsnprintf()
3834 * does not modify the va_list passed to it, and the verifier
3835 * would then need to be able to understand all the values that
3836 * vsnprintf can use. If it is passed by value, then the verifier
3837 * is disabled.
3838 */
3839 va_start(ap, fmt);
3840 vsnprintf(buf, 16, "%d", ap);
3841 ret = va_arg(ap, int);
3842 va_end(ap);
3843
3844 return ret;
3845 }
3846
test_can_verify(void)3847 static void test_can_verify(void)
3848 {
3849 if (!test_can_verify_check("%d %d", 0, 1)) {
3850 pr_info("trace event string verifier disabled\n");
3851 static_branch_inc(&trace_no_verify);
3852 }
3853 }
3854
3855 /**
3856 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3857 * @iter: The iterator that holds the seq buffer and the event being printed
3858 * @fmt: The format used to print the event
3859 * @ap: The va_list holding the data to print from @fmt.
3860 *
3861 * This writes the data into the @iter->seq buffer using the data from
3862 * @fmt and @ap. If the format has a %s, then the source of the string
3863 * is examined to make sure it is safe to print, otherwise it will
3864 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3865 * pointer.
3866 */
trace_check_vprintf(struct trace_iterator * iter,const char * fmt,va_list ap)3867 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3868 va_list ap)
3869 {
3870 const char *p = fmt;
3871 const char *str;
3872 int i, j;
3873
3874 if (WARN_ON_ONCE(!fmt))
3875 return;
3876
3877 if (static_branch_unlikely(&trace_no_verify))
3878 goto print;
3879
3880 /* Don't bother checking when doing a ftrace_dump() */
3881 if (iter->fmt == static_fmt_buf)
3882 goto print;
3883
3884 while (*p) {
3885 bool star = false;
3886 int len = 0;
3887
3888 j = 0;
3889
3890 /* We only care about %s and variants */
3891 for (i = 0; p[i]; i++) {
3892 if (i + 1 >= iter->fmt_size) {
3893 /*
3894 * If we can't expand the copy buffer,
3895 * just print it.
3896 */
3897 if (!trace_iter_expand_format(iter))
3898 goto print;
3899 }
3900
3901 if (p[i] == '\\' && p[i+1]) {
3902 i++;
3903 continue;
3904 }
3905 if (p[i] == '%') {
3906 /* Need to test cases like %08.*s */
3907 for (j = 1; p[i+j]; j++) {
3908 if (isdigit(p[i+j]) ||
3909 p[i+j] == '.')
3910 continue;
3911 if (p[i+j] == '*') {
3912 star = true;
3913 continue;
3914 }
3915 break;
3916 }
3917 if (p[i+j] == 's')
3918 break;
3919 star = false;
3920 }
3921 j = 0;
3922 }
3923 /* If no %s found then just print normally */
3924 if (!p[i])
3925 break;
3926
3927 /* Copy up to the %s, and print that */
3928 strncpy(iter->fmt, p, i);
3929 iter->fmt[i] = '\0';
3930 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3931
3932 /*
3933 * If iter->seq is full, the above call no longer guarantees
3934 * that ap is in sync with fmt processing, and further calls
3935 * to va_arg() can return wrong positional arguments.
3936 *
3937 * Ensure that ap is no longer used in this case.
3938 */
3939 if (iter->seq.full) {
3940 p = "";
3941 break;
3942 }
3943
3944 if (star)
3945 len = va_arg(ap, int);
3946
3947 /* The ap now points to the string data of the %s */
3948 str = va_arg(ap, const char *);
3949
3950 /*
3951 * If you hit this warning, it is likely that the
3952 * trace event in question used %s on a string that
3953 * was saved at the time of the event, but may not be
3954 * around when the trace is read. Use __string(),
3955 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3956 * instead. See samples/trace_events/trace-events-sample.h
3957 * for reference.
3958 */
3959 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3960 "fmt: '%s' current_buffer: '%s'",
3961 fmt, show_buffer(&iter->seq))) {
3962 int ret;
3963
3964 /* Try to safely read the string */
3965 if (star) {
3966 if (len + 1 > iter->fmt_size)
3967 len = iter->fmt_size - 1;
3968 if (len < 0)
3969 len = 0;
3970 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3971 iter->fmt[len] = 0;
3972 star = false;
3973 } else {
3974 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3975 iter->fmt_size);
3976 }
3977 if (ret < 0)
3978 trace_seq_printf(&iter->seq, "(0x%px)", str);
3979 else
3980 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3981 str, iter->fmt);
3982 str = "[UNSAFE-MEMORY]";
3983 strcpy(iter->fmt, "%s");
3984 } else {
3985 strncpy(iter->fmt, p + i, j + 1);
3986 iter->fmt[j+1] = '\0';
3987 }
3988 if (star)
3989 trace_seq_printf(&iter->seq, iter->fmt, len, str);
3990 else
3991 trace_seq_printf(&iter->seq, iter->fmt, str);
3992
3993 p += i + j + 1;
3994 }
3995 print:
3996 if (*p)
3997 trace_seq_vprintf(&iter->seq, p, ap);
3998 }
3999
trace_event_format(struct trace_iterator * iter,const char * fmt)4000 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4001 {
4002 const char *p, *new_fmt;
4003 char *q;
4004
4005 if (WARN_ON_ONCE(!fmt))
4006 return fmt;
4007
4008 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4009 return fmt;
4010
4011 p = fmt;
4012 new_fmt = q = iter->fmt;
4013 while (*p) {
4014 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4015 if (!trace_iter_expand_format(iter))
4016 return fmt;
4017
4018 q += iter->fmt - new_fmt;
4019 new_fmt = iter->fmt;
4020 }
4021
4022 *q++ = *p++;
4023
4024 /* Replace %p with %px */
4025 if (p[-1] == '%') {
4026 if (p[0] == '%') {
4027 *q++ = *p++;
4028 } else if (p[0] == 'p' && !isalnum(p[1])) {
4029 *q++ = *p++;
4030 *q++ = 'x';
4031 }
4032 }
4033 }
4034 *q = '\0';
4035
4036 return new_fmt;
4037 }
4038
4039 #define STATIC_TEMP_BUF_SIZE 128
4040 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4041
4042 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)4043 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4044 int *ent_cpu, u64 *ent_ts)
4045 {
4046 /* __find_next_entry will reset ent_size */
4047 int ent_size = iter->ent_size;
4048 struct trace_entry *entry;
4049
4050 /*
4051 * If called from ftrace_dump(), then the iter->temp buffer
4052 * will be the static_temp_buf and not created from kmalloc.
4053 * If the entry size is greater than the buffer, we can
4054 * not save it. Just return NULL in that case. This is only
4055 * used to add markers when two consecutive events' time
4056 * stamps have a large delta. See trace_print_lat_context()
4057 */
4058 if (iter->temp == static_temp_buf &&
4059 STATIC_TEMP_BUF_SIZE < ent_size)
4060 return NULL;
4061
4062 /*
4063 * The __find_next_entry() may call peek_next_entry(), which may
4064 * call ring_buffer_peek() that may make the contents of iter->ent
4065 * undefined. Need to copy iter->ent now.
4066 */
4067 if (iter->ent && iter->ent != iter->temp) {
4068 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4069 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4070 void *temp;
4071 temp = kmalloc(iter->ent_size, GFP_KERNEL);
4072 if (!temp)
4073 return NULL;
4074 kfree(iter->temp);
4075 iter->temp = temp;
4076 iter->temp_size = iter->ent_size;
4077 }
4078 memcpy(iter->temp, iter->ent, iter->ent_size);
4079 iter->ent = iter->temp;
4080 }
4081 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4082 /* Put back the original ent_size */
4083 iter->ent_size = ent_size;
4084
4085 return entry;
4086 }
4087
4088 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)4089 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4090 {
4091 iter->ent = __find_next_entry(iter, &iter->cpu,
4092 &iter->lost_events, &iter->ts);
4093
4094 if (iter->ent)
4095 trace_iterator_increment(iter);
4096
4097 return iter->ent ? iter : NULL;
4098 }
4099
trace_consume(struct trace_iterator * iter)4100 static void trace_consume(struct trace_iterator *iter)
4101 {
4102 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4103 &iter->lost_events);
4104 }
4105
s_next(struct seq_file * m,void * v,loff_t * pos)4106 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4107 {
4108 struct trace_iterator *iter = m->private;
4109 int i = (int)*pos;
4110 void *ent;
4111
4112 WARN_ON_ONCE(iter->leftover);
4113
4114 (*pos)++;
4115
4116 /* can't go backwards */
4117 if (iter->idx > i)
4118 return NULL;
4119
4120 if (iter->idx < 0)
4121 ent = trace_find_next_entry_inc(iter);
4122 else
4123 ent = iter;
4124
4125 while (ent && iter->idx < i)
4126 ent = trace_find_next_entry_inc(iter);
4127
4128 iter->pos = *pos;
4129
4130 return ent;
4131 }
4132
tracing_iter_reset(struct trace_iterator * iter,int cpu)4133 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4134 {
4135 struct ring_buffer_iter *buf_iter;
4136 unsigned long entries = 0;
4137 u64 ts;
4138
4139 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4140
4141 buf_iter = trace_buffer_iter(iter, cpu);
4142 if (!buf_iter)
4143 return;
4144
4145 ring_buffer_iter_reset(buf_iter);
4146
4147 /*
4148 * We could have the case with the max latency tracers
4149 * that a reset never took place on a cpu. This is evident
4150 * by the timestamp being before the start of the buffer.
4151 */
4152 while (ring_buffer_iter_peek(buf_iter, &ts)) {
4153 if (ts >= iter->array_buffer->time_start)
4154 break;
4155 entries++;
4156 ring_buffer_iter_advance(buf_iter);
4157 /* This could be a big loop */
4158 cond_resched();
4159 }
4160
4161 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4162 }
4163
4164 /*
4165 * The current tracer is copied to avoid a global locking
4166 * all around.
4167 */
s_start(struct seq_file * m,loff_t * pos)4168 static void *s_start(struct seq_file *m, loff_t *pos)
4169 {
4170 struct trace_iterator *iter = m->private;
4171 struct trace_array *tr = iter->tr;
4172 int cpu_file = iter->cpu_file;
4173 void *p = NULL;
4174 loff_t l = 0;
4175 int cpu;
4176
4177 mutex_lock(&trace_types_lock);
4178 if (unlikely(tr->current_trace != iter->trace)) {
4179 /* Close iter->trace before switching to the new current tracer */
4180 if (iter->trace->close)
4181 iter->trace->close(iter);
4182 iter->trace = tr->current_trace;
4183 /* Reopen the new current tracer */
4184 if (iter->trace->open)
4185 iter->trace->open(iter);
4186 }
4187 mutex_unlock(&trace_types_lock);
4188
4189 #ifdef CONFIG_TRACER_MAX_TRACE
4190 if (iter->snapshot && iter->trace->use_max_tr)
4191 return ERR_PTR(-EBUSY);
4192 #endif
4193
4194 if (*pos != iter->pos) {
4195 iter->ent = NULL;
4196 iter->cpu = 0;
4197 iter->idx = -1;
4198
4199 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4200 for_each_tracing_cpu(cpu)
4201 tracing_iter_reset(iter, cpu);
4202 } else
4203 tracing_iter_reset(iter, cpu_file);
4204
4205 iter->leftover = 0;
4206 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4207 ;
4208
4209 } else {
4210 /*
4211 * If we overflowed the seq_file before, then we want
4212 * to just reuse the trace_seq buffer again.
4213 */
4214 if (iter->leftover)
4215 p = iter;
4216 else {
4217 l = *pos - 1;
4218 p = s_next(m, p, &l);
4219 }
4220 }
4221
4222 trace_event_read_lock();
4223 trace_access_lock(cpu_file);
4224 return p;
4225 }
4226
s_stop(struct seq_file * m,void * p)4227 static void s_stop(struct seq_file *m, void *p)
4228 {
4229 struct trace_iterator *iter = m->private;
4230
4231 #ifdef CONFIG_TRACER_MAX_TRACE
4232 if (iter->snapshot && iter->trace->use_max_tr)
4233 return;
4234 #endif
4235
4236 trace_access_unlock(iter->cpu_file);
4237 trace_event_read_unlock();
4238 }
4239
4240 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4241 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4242 unsigned long *entries, int cpu)
4243 {
4244 unsigned long count;
4245
4246 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4247 /*
4248 * If this buffer has skipped entries, then we hold all
4249 * entries for the trace and we need to ignore the
4250 * ones before the time stamp.
4251 */
4252 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4253 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4254 /* total is the same as the entries */
4255 *total = count;
4256 } else
4257 *total = count +
4258 ring_buffer_overrun_cpu(buf->buffer, cpu);
4259 *entries = count;
4260 }
4261
4262 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4263 get_total_entries(struct array_buffer *buf,
4264 unsigned long *total, unsigned long *entries)
4265 {
4266 unsigned long t, e;
4267 int cpu;
4268
4269 *total = 0;
4270 *entries = 0;
4271
4272 for_each_tracing_cpu(cpu) {
4273 get_total_entries_cpu(buf, &t, &e, cpu);
4274 *total += t;
4275 *entries += e;
4276 }
4277 }
4278
trace_total_entries_cpu(struct trace_array * tr,int cpu)4279 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4280 {
4281 unsigned long total, entries;
4282
4283 if (!tr)
4284 tr = &global_trace;
4285
4286 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4287
4288 return entries;
4289 }
4290
trace_total_entries(struct trace_array * tr)4291 unsigned long trace_total_entries(struct trace_array *tr)
4292 {
4293 unsigned long total, entries;
4294
4295 if (!tr)
4296 tr = &global_trace;
4297
4298 get_total_entries(&tr->array_buffer, &total, &entries);
4299
4300 return entries;
4301 }
4302
print_lat_help_header(struct seq_file * m)4303 static void print_lat_help_header(struct seq_file *m)
4304 {
4305 seq_puts(m, "# _------=> CPU# \n"
4306 "# / _-----=> irqs-off/BH-disabled\n"
4307 "# | / _----=> need-resched \n"
4308 "# || / _---=> hardirq/softirq \n"
4309 "# ||| / _--=> preempt-depth \n"
4310 "# |||| / _-=> migrate-disable \n"
4311 "# ||||| / delay \n"
4312 "# cmd pid |||||| time | caller \n"
4313 "# \\ / |||||| \\ | / \n");
4314 }
4315
print_event_info(struct array_buffer * buf,struct seq_file * m)4316 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4317 {
4318 unsigned long total;
4319 unsigned long entries;
4320
4321 get_total_entries(buf, &total, &entries);
4322 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4323 entries, total, num_online_cpus());
4324 seq_puts(m, "#\n");
4325 }
4326
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4327 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4328 unsigned int flags)
4329 {
4330 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4331
4332 print_event_info(buf, m);
4333
4334 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4335 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4336 }
4337
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4338 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4339 unsigned int flags)
4340 {
4341 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4342 static const char space[] = " ";
4343 int prec = tgid ? 12 : 2;
4344
4345 print_event_info(buf, m);
4346
4347 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4348 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4349 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4350 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4351 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4352 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4353 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4354 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4355 }
4356
4357 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4358 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4359 {
4360 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4361 struct array_buffer *buf = iter->array_buffer;
4362 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4363 struct tracer *type = iter->trace;
4364 unsigned long entries;
4365 unsigned long total;
4366 const char *name = type->name;
4367
4368 get_total_entries(buf, &total, &entries);
4369
4370 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4371 name, UTS_RELEASE);
4372 seq_puts(m, "# -----------------------------------"
4373 "---------------------------------\n");
4374 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4375 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4376 nsecs_to_usecs(data->saved_latency),
4377 entries,
4378 total,
4379 buf->cpu,
4380 preempt_model_none() ? "server" :
4381 preempt_model_voluntary() ? "desktop" :
4382 preempt_model_full() ? "preempt" :
4383 preempt_model_rt() ? "preempt_rt" :
4384 "unknown",
4385 /* These are reserved for later use */
4386 0, 0, 0, 0);
4387 #ifdef CONFIG_SMP
4388 seq_printf(m, " #P:%d)\n", num_online_cpus());
4389 #else
4390 seq_puts(m, ")\n");
4391 #endif
4392 seq_puts(m, "# -----------------\n");
4393 seq_printf(m, "# | task: %.16s-%d "
4394 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4395 data->comm, data->pid,
4396 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4397 data->policy, data->rt_priority);
4398 seq_puts(m, "# -----------------\n");
4399
4400 if (data->critical_start) {
4401 seq_puts(m, "# => started at: ");
4402 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4403 trace_print_seq(m, &iter->seq);
4404 seq_puts(m, "\n# => ended at: ");
4405 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4406 trace_print_seq(m, &iter->seq);
4407 seq_puts(m, "\n#\n");
4408 }
4409
4410 seq_puts(m, "#\n");
4411 }
4412
test_cpu_buff_start(struct trace_iterator * iter)4413 static void test_cpu_buff_start(struct trace_iterator *iter)
4414 {
4415 struct trace_seq *s = &iter->seq;
4416 struct trace_array *tr = iter->tr;
4417
4418 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4419 return;
4420
4421 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4422 return;
4423
4424 if (cpumask_available(iter->started) &&
4425 cpumask_test_cpu(iter->cpu, iter->started))
4426 return;
4427
4428 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4429 return;
4430
4431 if (cpumask_available(iter->started))
4432 cpumask_set_cpu(iter->cpu, iter->started);
4433
4434 /* Don't print started cpu buffer for the first entry of the trace */
4435 if (iter->idx > 1)
4436 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4437 iter->cpu);
4438 }
4439
print_trace_fmt(struct trace_iterator * iter)4440 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4441 {
4442 struct trace_array *tr = iter->tr;
4443 struct trace_seq *s = &iter->seq;
4444 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4445 struct trace_entry *entry;
4446 struct trace_event *event;
4447
4448 entry = iter->ent;
4449
4450 test_cpu_buff_start(iter);
4451
4452 event = ftrace_find_event(entry->type);
4453
4454 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4455 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4456 trace_print_lat_context(iter);
4457 else
4458 trace_print_context(iter);
4459 }
4460
4461 if (trace_seq_has_overflowed(s))
4462 return TRACE_TYPE_PARTIAL_LINE;
4463
4464 if (event) {
4465 if (tr->trace_flags & TRACE_ITER_FIELDS)
4466 return print_event_fields(iter, event);
4467 return event->funcs->trace(iter, sym_flags, event);
4468 }
4469
4470 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4471
4472 return trace_handle_return(s);
4473 }
4474
print_raw_fmt(struct trace_iterator * iter)4475 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4476 {
4477 struct trace_array *tr = iter->tr;
4478 struct trace_seq *s = &iter->seq;
4479 struct trace_entry *entry;
4480 struct trace_event *event;
4481
4482 entry = iter->ent;
4483
4484 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4485 trace_seq_printf(s, "%d %d %llu ",
4486 entry->pid, iter->cpu, iter->ts);
4487
4488 if (trace_seq_has_overflowed(s))
4489 return TRACE_TYPE_PARTIAL_LINE;
4490
4491 event = ftrace_find_event(entry->type);
4492 if (event)
4493 return event->funcs->raw(iter, 0, event);
4494
4495 trace_seq_printf(s, "%d ?\n", entry->type);
4496
4497 return trace_handle_return(s);
4498 }
4499
print_hex_fmt(struct trace_iterator * iter)4500 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4501 {
4502 struct trace_array *tr = iter->tr;
4503 struct trace_seq *s = &iter->seq;
4504 unsigned char newline = '\n';
4505 struct trace_entry *entry;
4506 struct trace_event *event;
4507
4508 entry = iter->ent;
4509
4510 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4511 SEQ_PUT_HEX_FIELD(s, entry->pid);
4512 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4513 SEQ_PUT_HEX_FIELD(s, iter->ts);
4514 if (trace_seq_has_overflowed(s))
4515 return TRACE_TYPE_PARTIAL_LINE;
4516 }
4517
4518 event = ftrace_find_event(entry->type);
4519 if (event) {
4520 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4521 if (ret != TRACE_TYPE_HANDLED)
4522 return ret;
4523 }
4524
4525 SEQ_PUT_FIELD(s, newline);
4526
4527 return trace_handle_return(s);
4528 }
4529
print_bin_fmt(struct trace_iterator * iter)4530 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4531 {
4532 struct trace_array *tr = iter->tr;
4533 struct trace_seq *s = &iter->seq;
4534 struct trace_entry *entry;
4535 struct trace_event *event;
4536
4537 entry = iter->ent;
4538
4539 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4540 SEQ_PUT_FIELD(s, entry->pid);
4541 SEQ_PUT_FIELD(s, iter->cpu);
4542 SEQ_PUT_FIELD(s, iter->ts);
4543 if (trace_seq_has_overflowed(s))
4544 return TRACE_TYPE_PARTIAL_LINE;
4545 }
4546
4547 event = ftrace_find_event(entry->type);
4548 return event ? event->funcs->binary(iter, 0, event) :
4549 TRACE_TYPE_HANDLED;
4550 }
4551
trace_empty(struct trace_iterator * iter)4552 int trace_empty(struct trace_iterator *iter)
4553 {
4554 struct ring_buffer_iter *buf_iter;
4555 int cpu;
4556
4557 /* If we are looking at one CPU buffer, only check that one */
4558 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4559 cpu = iter->cpu_file;
4560 buf_iter = trace_buffer_iter(iter, cpu);
4561 if (buf_iter) {
4562 if (!ring_buffer_iter_empty(buf_iter))
4563 return 0;
4564 } else {
4565 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4566 return 0;
4567 }
4568 return 1;
4569 }
4570
4571 for_each_tracing_cpu(cpu) {
4572 buf_iter = trace_buffer_iter(iter, cpu);
4573 if (buf_iter) {
4574 if (!ring_buffer_iter_empty(buf_iter))
4575 return 0;
4576 } else {
4577 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4578 return 0;
4579 }
4580 }
4581
4582 return 1;
4583 }
4584
4585 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4586 enum print_line_t print_trace_line(struct trace_iterator *iter)
4587 {
4588 struct trace_array *tr = iter->tr;
4589 unsigned long trace_flags = tr->trace_flags;
4590 enum print_line_t ret;
4591
4592 if (iter->lost_events) {
4593 if (iter->lost_events == (unsigned long)-1)
4594 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4595 iter->cpu);
4596 else
4597 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4598 iter->cpu, iter->lost_events);
4599 if (trace_seq_has_overflowed(&iter->seq))
4600 return TRACE_TYPE_PARTIAL_LINE;
4601 }
4602
4603 if (iter->trace && iter->trace->print_line) {
4604 ret = iter->trace->print_line(iter);
4605 if (ret != TRACE_TYPE_UNHANDLED)
4606 return ret;
4607 }
4608
4609 if (iter->ent->type == TRACE_BPUTS &&
4610 trace_flags & TRACE_ITER_PRINTK &&
4611 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4612 return trace_print_bputs_msg_only(iter);
4613
4614 if (iter->ent->type == TRACE_BPRINT &&
4615 trace_flags & TRACE_ITER_PRINTK &&
4616 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4617 return trace_print_bprintk_msg_only(iter);
4618
4619 if (iter->ent->type == TRACE_PRINT &&
4620 trace_flags & TRACE_ITER_PRINTK &&
4621 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4622 return trace_print_printk_msg_only(iter);
4623
4624 if (trace_flags & TRACE_ITER_BIN)
4625 return print_bin_fmt(iter);
4626
4627 if (trace_flags & TRACE_ITER_HEX)
4628 return print_hex_fmt(iter);
4629
4630 if (trace_flags & TRACE_ITER_RAW)
4631 return print_raw_fmt(iter);
4632
4633 return print_trace_fmt(iter);
4634 }
4635
trace_latency_header(struct seq_file * m)4636 void trace_latency_header(struct seq_file *m)
4637 {
4638 struct trace_iterator *iter = m->private;
4639 struct trace_array *tr = iter->tr;
4640
4641 /* print nothing if the buffers are empty */
4642 if (trace_empty(iter))
4643 return;
4644
4645 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4646 print_trace_header(m, iter);
4647
4648 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4649 print_lat_help_header(m);
4650 }
4651
trace_default_header(struct seq_file * m)4652 void trace_default_header(struct seq_file *m)
4653 {
4654 struct trace_iterator *iter = m->private;
4655 struct trace_array *tr = iter->tr;
4656 unsigned long trace_flags = tr->trace_flags;
4657
4658 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4659 return;
4660
4661 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4662 /* print nothing if the buffers are empty */
4663 if (trace_empty(iter))
4664 return;
4665 print_trace_header(m, iter);
4666 if (!(trace_flags & TRACE_ITER_VERBOSE))
4667 print_lat_help_header(m);
4668 } else {
4669 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4670 if (trace_flags & TRACE_ITER_IRQ_INFO)
4671 print_func_help_header_irq(iter->array_buffer,
4672 m, trace_flags);
4673 else
4674 print_func_help_header(iter->array_buffer, m,
4675 trace_flags);
4676 }
4677 }
4678 }
4679
test_ftrace_alive(struct seq_file * m)4680 static void test_ftrace_alive(struct seq_file *m)
4681 {
4682 if (!ftrace_is_dead())
4683 return;
4684 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4685 "# MAY BE MISSING FUNCTION EVENTS\n");
4686 }
4687
4688 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4689 static void show_snapshot_main_help(struct seq_file *m)
4690 {
4691 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4692 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4693 "# Takes a snapshot of the main buffer.\n"
4694 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4695 "# (Doesn't have to be '2' works with any number that\n"
4696 "# is not a '0' or '1')\n");
4697 }
4698
show_snapshot_percpu_help(struct seq_file * m)4699 static void show_snapshot_percpu_help(struct seq_file *m)
4700 {
4701 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4702 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4703 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4704 "# Takes a snapshot of the main buffer for this cpu.\n");
4705 #else
4706 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4707 "# Must use main snapshot file to allocate.\n");
4708 #endif
4709 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4710 "# (Doesn't have to be '2' works with any number that\n"
4711 "# is not a '0' or '1')\n");
4712 }
4713
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4714 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4715 {
4716 if (iter->tr->allocated_snapshot)
4717 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4718 else
4719 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4720
4721 seq_puts(m, "# Snapshot commands:\n");
4722 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4723 show_snapshot_main_help(m);
4724 else
4725 show_snapshot_percpu_help(m);
4726 }
4727 #else
4728 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4729 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4730 #endif
4731
s_show(struct seq_file * m,void * v)4732 static int s_show(struct seq_file *m, void *v)
4733 {
4734 struct trace_iterator *iter = v;
4735 int ret;
4736
4737 if (iter->ent == NULL) {
4738 if (iter->tr) {
4739 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4740 seq_puts(m, "#\n");
4741 test_ftrace_alive(m);
4742 }
4743 if (iter->snapshot && trace_empty(iter))
4744 print_snapshot_help(m, iter);
4745 else if (iter->trace && iter->trace->print_header)
4746 iter->trace->print_header(m);
4747 else
4748 trace_default_header(m);
4749
4750 } else if (iter->leftover) {
4751 /*
4752 * If we filled the seq_file buffer earlier, we
4753 * want to just show it now.
4754 */
4755 ret = trace_print_seq(m, &iter->seq);
4756
4757 /* ret should this time be zero, but you never know */
4758 iter->leftover = ret;
4759
4760 } else {
4761 ret = print_trace_line(iter);
4762 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4763 iter->seq.full = 0;
4764 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4765 }
4766 ret = trace_print_seq(m, &iter->seq);
4767 /*
4768 * If we overflow the seq_file buffer, then it will
4769 * ask us for this data again at start up.
4770 * Use that instead.
4771 * ret is 0 if seq_file write succeeded.
4772 * -1 otherwise.
4773 */
4774 iter->leftover = ret;
4775 }
4776
4777 return 0;
4778 }
4779
4780 /*
4781 * Should be used after trace_array_get(), trace_types_lock
4782 * ensures that i_cdev was already initialized.
4783 */
tracing_get_cpu(struct inode * inode)4784 static inline int tracing_get_cpu(struct inode *inode)
4785 {
4786 if (inode->i_cdev) /* See trace_create_cpu_file() */
4787 return (long)inode->i_cdev - 1;
4788 return RING_BUFFER_ALL_CPUS;
4789 }
4790
4791 static const struct seq_operations tracer_seq_ops = {
4792 .start = s_start,
4793 .next = s_next,
4794 .stop = s_stop,
4795 .show = s_show,
4796 };
4797
4798 /*
4799 * Note, as iter itself can be allocated and freed in different
4800 * ways, this function is only used to free its content, and not
4801 * the iterator itself. The only requirement to all the allocations
4802 * is that it must zero all fields (kzalloc), as freeing works with
4803 * ethier allocated content or NULL.
4804 */
free_trace_iter_content(struct trace_iterator * iter)4805 static void free_trace_iter_content(struct trace_iterator *iter)
4806 {
4807 /* The fmt is either NULL, allocated or points to static_fmt_buf */
4808 if (iter->fmt != static_fmt_buf)
4809 kfree(iter->fmt);
4810
4811 kfree(iter->temp);
4812 kfree(iter->buffer_iter);
4813 mutex_destroy(&iter->mutex);
4814 free_cpumask_var(iter->started);
4815 }
4816
4817 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4818 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4819 {
4820 struct trace_array *tr = inode->i_private;
4821 struct trace_iterator *iter;
4822 int cpu;
4823
4824 if (tracing_disabled)
4825 return ERR_PTR(-ENODEV);
4826
4827 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4828 if (!iter)
4829 return ERR_PTR(-ENOMEM);
4830
4831 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4832 GFP_KERNEL);
4833 if (!iter->buffer_iter)
4834 goto release;
4835
4836 /*
4837 * trace_find_next_entry() may need to save off iter->ent.
4838 * It will place it into the iter->temp buffer. As most
4839 * events are less than 128, allocate a buffer of that size.
4840 * If one is greater, then trace_find_next_entry() will
4841 * allocate a new buffer to adjust for the bigger iter->ent.
4842 * It's not critical if it fails to get allocated here.
4843 */
4844 iter->temp = kmalloc(128, GFP_KERNEL);
4845 if (iter->temp)
4846 iter->temp_size = 128;
4847
4848 /*
4849 * trace_event_printf() may need to modify given format
4850 * string to replace %p with %px so that it shows real address
4851 * instead of hash value. However, that is only for the event
4852 * tracing, other tracer may not need. Defer the allocation
4853 * until it is needed.
4854 */
4855 iter->fmt = NULL;
4856 iter->fmt_size = 0;
4857
4858 mutex_lock(&trace_types_lock);
4859 iter->trace = tr->current_trace;
4860
4861 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4862 goto fail;
4863
4864 iter->tr = tr;
4865
4866 #ifdef CONFIG_TRACER_MAX_TRACE
4867 /* Currently only the top directory has a snapshot */
4868 if (tr->current_trace->print_max || snapshot)
4869 iter->array_buffer = &tr->max_buffer;
4870 else
4871 #endif
4872 iter->array_buffer = &tr->array_buffer;
4873 iter->snapshot = snapshot;
4874 iter->pos = -1;
4875 iter->cpu_file = tracing_get_cpu(inode);
4876 mutex_init(&iter->mutex);
4877
4878 /* Notify the tracer early; before we stop tracing. */
4879 if (iter->trace->open)
4880 iter->trace->open(iter);
4881
4882 /* Annotate start of buffers if we had overruns */
4883 if (ring_buffer_overruns(iter->array_buffer->buffer))
4884 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4885
4886 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4887 if (trace_clocks[tr->clock_id].in_ns)
4888 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4889
4890 /*
4891 * If pause-on-trace is enabled, then stop the trace while
4892 * dumping, unless this is the "snapshot" file
4893 */
4894 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4895 tracing_stop_tr(tr);
4896
4897 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4898 for_each_tracing_cpu(cpu) {
4899 iter->buffer_iter[cpu] =
4900 ring_buffer_read_prepare(iter->array_buffer->buffer,
4901 cpu, GFP_KERNEL);
4902 }
4903 ring_buffer_read_prepare_sync();
4904 for_each_tracing_cpu(cpu) {
4905 ring_buffer_read_start(iter->buffer_iter[cpu]);
4906 tracing_iter_reset(iter, cpu);
4907 }
4908 } else {
4909 cpu = iter->cpu_file;
4910 iter->buffer_iter[cpu] =
4911 ring_buffer_read_prepare(iter->array_buffer->buffer,
4912 cpu, GFP_KERNEL);
4913 ring_buffer_read_prepare_sync();
4914 ring_buffer_read_start(iter->buffer_iter[cpu]);
4915 tracing_iter_reset(iter, cpu);
4916 }
4917
4918 mutex_unlock(&trace_types_lock);
4919
4920 return iter;
4921
4922 fail:
4923 mutex_unlock(&trace_types_lock);
4924 free_trace_iter_content(iter);
4925 release:
4926 seq_release_private(inode, file);
4927 return ERR_PTR(-ENOMEM);
4928 }
4929
tracing_open_generic(struct inode * inode,struct file * filp)4930 int tracing_open_generic(struct inode *inode, struct file *filp)
4931 {
4932 int ret;
4933
4934 ret = tracing_check_open_get_tr(NULL);
4935 if (ret)
4936 return ret;
4937
4938 filp->private_data = inode->i_private;
4939 return 0;
4940 }
4941
tracing_is_disabled(void)4942 bool tracing_is_disabled(void)
4943 {
4944 return (tracing_disabled) ? true: false;
4945 }
4946
4947 /*
4948 * Open and update trace_array ref count.
4949 * Must have the current trace_array passed to it.
4950 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4951 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4952 {
4953 struct trace_array *tr = inode->i_private;
4954 int ret;
4955
4956 ret = tracing_check_open_get_tr(tr);
4957 if (ret)
4958 return ret;
4959
4960 filp->private_data = inode->i_private;
4961
4962 return 0;
4963 }
4964
4965 /*
4966 * The private pointer of the inode is the trace_event_file.
4967 * Update the tr ref count associated to it.
4968 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4969 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4970 {
4971 struct trace_event_file *file = inode->i_private;
4972 int ret;
4973
4974 ret = tracing_check_open_get_tr(file->tr);
4975 if (ret)
4976 return ret;
4977
4978 mutex_lock(&event_mutex);
4979
4980 /* Fail if the file is marked for removal */
4981 if (file->flags & EVENT_FILE_FL_FREED) {
4982 trace_array_put(file->tr);
4983 ret = -ENODEV;
4984 } else {
4985 event_file_get(file);
4986 }
4987
4988 mutex_unlock(&event_mutex);
4989 if (ret)
4990 return ret;
4991
4992 filp->private_data = inode->i_private;
4993
4994 return 0;
4995 }
4996
tracing_release_file_tr(struct inode * inode,struct file * filp)4997 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4998 {
4999 struct trace_event_file *file = inode->i_private;
5000
5001 trace_array_put(file->tr);
5002 event_file_put(file);
5003
5004 return 0;
5005 }
5006
tracing_single_release_file_tr(struct inode * inode,struct file * filp)5007 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
5008 {
5009 tracing_release_file_tr(inode, filp);
5010 return single_release(inode, filp);
5011 }
5012
tracing_mark_open(struct inode * inode,struct file * filp)5013 static int tracing_mark_open(struct inode *inode, struct file *filp)
5014 {
5015 stream_open(inode, filp);
5016 return tracing_open_generic_tr(inode, filp);
5017 }
5018
tracing_release(struct inode * inode,struct file * file)5019 static int tracing_release(struct inode *inode, struct file *file)
5020 {
5021 struct trace_array *tr = inode->i_private;
5022 struct seq_file *m = file->private_data;
5023 struct trace_iterator *iter;
5024 int cpu;
5025
5026 if (!(file->f_mode & FMODE_READ)) {
5027 trace_array_put(tr);
5028 return 0;
5029 }
5030
5031 /* Writes do not use seq_file */
5032 iter = m->private;
5033 mutex_lock(&trace_types_lock);
5034
5035 for_each_tracing_cpu(cpu) {
5036 if (iter->buffer_iter[cpu])
5037 ring_buffer_read_finish(iter->buffer_iter[cpu]);
5038 }
5039
5040 if (iter->trace && iter->trace->close)
5041 iter->trace->close(iter);
5042
5043 if (!iter->snapshot && tr->stop_count)
5044 /* reenable tracing if it was previously enabled */
5045 tracing_start_tr(tr);
5046
5047 __trace_array_put(tr);
5048
5049 mutex_unlock(&trace_types_lock);
5050
5051 free_trace_iter_content(iter);
5052 seq_release_private(inode, file);
5053
5054 return 0;
5055 }
5056
tracing_release_generic_tr(struct inode * inode,struct file * file)5057 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5058 {
5059 struct trace_array *tr = inode->i_private;
5060
5061 trace_array_put(tr);
5062 return 0;
5063 }
5064
tracing_single_release_tr(struct inode * inode,struct file * file)5065 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5066 {
5067 struct trace_array *tr = inode->i_private;
5068
5069 trace_array_put(tr);
5070
5071 return single_release(inode, file);
5072 }
5073
tracing_open(struct inode * inode,struct file * file)5074 static int tracing_open(struct inode *inode, struct file *file)
5075 {
5076 struct trace_array *tr = inode->i_private;
5077 struct trace_iterator *iter;
5078 int ret;
5079
5080 ret = tracing_check_open_get_tr(tr);
5081 if (ret)
5082 return ret;
5083
5084 /* If this file was open for write, then erase contents */
5085 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5086 int cpu = tracing_get_cpu(inode);
5087 struct array_buffer *trace_buf = &tr->array_buffer;
5088
5089 #ifdef CONFIG_TRACER_MAX_TRACE
5090 if (tr->current_trace->print_max)
5091 trace_buf = &tr->max_buffer;
5092 #endif
5093
5094 if (cpu == RING_BUFFER_ALL_CPUS)
5095 tracing_reset_online_cpus(trace_buf);
5096 else
5097 tracing_reset_cpu(trace_buf, cpu);
5098 }
5099
5100 if (file->f_mode & FMODE_READ) {
5101 iter = __tracing_open(inode, file, false);
5102 if (IS_ERR(iter))
5103 ret = PTR_ERR(iter);
5104 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5105 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5106 }
5107
5108 if (ret < 0)
5109 trace_array_put(tr);
5110
5111 return ret;
5112 }
5113
5114 /*
5115 * Some tracers are not suitable for instance buffers.
5116 * A tracer is always available for the global array (toplevel)
5117 * or if it explicitly states that it is.
5118 */
5119 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)5120 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5121 {
5122 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5123 }
5124
5125 /* Find the next tracer that this trace array may use */
5126 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)5127 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5128 {
5129 while (t && !trace_ok_for_array(t, tr))
5130 t = t->next;
5131
5132 return t;
5133 }
5134
5135 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)5136 t_next(struct seq_file *m, void *v, loff_t *pos)
5137 {
5138 struct trace_array *tr = m->private;
5139 struct tracer *t = v;
5140
5141 (*pos)++;
5142
5143 if (t)
5144 t = get_tracer_for_array(tr, t->next);
5145
5146 return t;
5147 }
5148
t_start(struct seq_file * m,loff_t * pos)5149 static void *t_start(struct seq_file *m, loff_t *pos)
5150 {
5151 struct trace_array *tr = m->private;
5152 struct tracer *t;
5153 loff_t l = 0;
5154
5155 mutex_lock(&trace_types_lock);
5156
5157 t = get_tracer_for_array(tr, trace_types);
5158 for (; t && l < *pos; t = t_next(m, t, &l))
5159 ;
5160
5161 return t;
5162 }
5163
t_stop(struct seq_file * m,void * p)5164 static void t_stop(struct seq_file *m, void *p)
5165 {
5166 mutex_unlock(&trace_types_lock);
5167 }
5168
t_show(struct seq_file * m,void * v)5169 static int t_show(struct seq_file *m, void *v)
5170 {
5171 struct tracer *t = v;
5172
5173 if (!t)
5174 return 0;
5175
5176 seq_puts(m, t->name);
5177 if (t->next)
5178 seq_putc(m, ' ');
5179 else
5180 seq_putc(m, '\n');
5181
5182 return 0;
5183 }
5184
5185 static const struct seq_operations show_traces_seq_ops = {
5186 .start = t_start,
5187 .next = t_next,
5188 .stop = t_stop,
5189 .show = t_show,
5190 };
5191
show_traces_open(struct inode * inode,struct file * file)5192 static int show_traces_open(struct inode *inode, struct file *file)
5193 {
5194 struct trace_array *tr = inode->i_private;
5195 struct seq_file *m;
5196 int ret;
5197
5198 ret = tracing_check_open_get_tr(tr);
5199 if (ret)
5200 return ret;
5201
5202 ret = seq_open(file, &show_traces_seq_ops);
5203 if (ret) {
5204 trace_array_put(tr);
5205 return ret;
5206 }
5207
5208 m = file->private_data;
5209 m->private = tr;
5210
5211 return 0;
5212 }
5213
show_traces_release(struct inode * inode,struct file * file)5214 static int show_traces_release(struct inode *inode, struct file *file)
5215 {
5216 struct trace_array *tr = inode->i_private;
5217
5218 trace_array_put(tr);
5219 return seq_release(inode, file);
5220 }
5221
5222 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5223 tracing_write_stub(struct file *filp, const char __user *ubuf,
5224 size_t count, loff_t *ppos)
5225 {
5226 return count;
5227 }
5228
tracing_lseek(struct file * file,loff_t offset,int whence)5229 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5230 {
5231 int ret;
5232
5233 if (file->f_mode & FMODE_READ)
5234 ret = seq_lseek(file, offset, whence);
5235 else
5236 file->f_pos = ret = 0;
5237
5238 return ret;
5239 }
5240
5241 static const struct file_operations tracing_fops = {
5242 .open = tracing_open,
5243 .read = seq_read,
5244 .read_iter = seq_read_iter,
5245 .splice_read = copy_splice_read,
5246 .write = tracing_write_stub,
5247 .llseek = tracing_lseek,
5248 .release = tracing_release,
5249 };
5250
5251 static const struct file_operations show_traces_fops = {
5252 .open = show_traces_open,
5253 .read = seq_read,
5254 .llseek = seq_lseek,
5255 .release = show_traces_release,
5256 };
5257
5258 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5259 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5260 size_t count, loff_t *ppos)
5261 {
5262 struct trace_array *tr = file_inode(filp)->i_private;
5263 char *mask_str;
5264 int len;
5265
5266 len = snprintf(NULL, 0, "%*pb\n",
5267 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5268 mask_str = kmalloc(len, GFP_KERNEL);
5269 if (!mask_str)
5270 return -ENOMEM;
5271
5272 len = snprintf(mask_str, len, "%*pb\n",
5273 cpumask_pr_args(tr->tracing_cpumask));
5274 if (len >= count) {
5275 count = -EINVAL;
5276 goto out_err;
5277 }
5278 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5279
5280 out_err:
5281 kfree(mask_str);
5282
5283 return count;
5284 }
5285
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5286 int tracing_set_cpumask(struct trace_array *tr,
5287 cpumask_var_t tracing_cpumask_new)
5288 {
5289 int cpu;
5290
5291 if (!tr)
5292 return -EINVAL;
5293
5294 local_irq_disable();
5295 arch_spin_lock(&tr->max_lock);
5296 for_each_tracing_cpu(cpu) {
5297 /*
5298 * Increase/decrease the disabled counter if we are
5299 * about to flip a bit in the cpumask:
5300 */
5301 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5302 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5303 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5304 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5305 #ifdef CONFIG_TRACER_MAX_TRACE
5306 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5307 #endif
5308 }
5309 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5310 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5311 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5312 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5313 #ifdef CONFIG_TRACER_MAX_TRACE
5314 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5315 #endif
5316 }
5317 }
5318 arch_spin_unlock(&tr->max_lock);
5319 local_irq_enable();
5320
5321 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5322
5323 return 0;
5324 }
5325
5326 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5327 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5328 size_t count, loff_t *ppos)
5329 {
5330 struct trace_array *tr = file_inode(filp)->i_private;
5331 cpumask_var_t tracing_cpumask_new;
5332 int err;
5333
5334 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5335 return -ENOMEM;
5336
5337 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5338 if (err)
5339 goto err_free;
5340
5341 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5342 if (err)
5343 goto err_free;
5344
5345 free_cpumask_var(tracing_cpumask_new);
5346
5347 return count;
5348
5349 err_free:
5350 free_cpumask_var(tracing_cpumask_new);
5351
5352 return err;
5353 }
5354
5355 static const struct file_operations tracing_cpumask_fops = {
5356 .open = tracing_open_generic_tr,
5357 .read = tracing_cpumask_read,
5358 .write = tracing_cpumask_write,
5359 .release = tracing_release_generic_tr,
5360 .llseek = generic_file_llseek,
5361 };
5362
tracing_trace_options_show(struct seq_file * m,void * v)5363 static int tracing_trace_options_show(struct seq_file *m, void *v)
5364 {
5365 struct tracer_opt *trace_opts;
5366 struct trace_array *tr = m->private;
5367 u32 tracer_flags;
5368 int i;
5369
5370 mutex_lock(&trace_types_lock);
5371 tracer_flags = tr->current_trace->flags->val;
5372 trace_opts = tr->current_trace->flags->opts;
5373
5374 for (i = 0; trace_options[i]; i++) {
5375 if (tr->trace_flags & (1 << i))
5376 seq_printf(m, "%s\n", trace_options[i]);
5377 else
5378 seq_printf(m, "no%s\n", trace_options[i]);
5379 }
5380
5381 for (i = 0; trace_opts[i].name; i++) {
5382 if (tracer_flags & trace_opts[i].bit)
5383 seq_printf(m, "%s\n", trace_opts[i].name);
5384 else
5385 seq_printf(m, "no%s\n", trace_opts[i].name);
5386 }
5387 mutex_unlock(&trace_types_lock);
5388
5389 return 0;
5390 }
5391
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5392 static int __set_tracer_option(struct trace_array *tr,
5393 struct tracer_flags *tracer_flags,
5394 struct tracer_opt *opts, int neg)
5395 {
5396 struct tracer *trace = tracer_flags->trace;
5397 int ret;
5398
5399 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5400 if (ret)
5401 return ret;
5402
5403 if (neg)
5404 tracer_flags->val &= ~opts->bit;
5405 else
5406 tracer_flags->val |= opts->bit;
5407 return 0;
5408 }
5409
5410 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5411 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5412 {
5413 struct tracer *trace = tr->current_trace;
5414 struct tracer_flags *tracer_flags = trace->flags;
5415 struct tracer_opt *opts = NULL;
5416 int i;
5417
5418 for (i = 0; tracer_flags->opts[i].name; i++) {
5419 opts = &tracer_flags->opts[i];
5420
5421 if (strcmp(cmp, opts->name) == 0)
5422 return __set_tracer_option(tr, trace->flags, opts, neg);
5423 }
5424
5425 return -EINVAL;
5426 }
5427
5428 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5429 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5430 {
5431 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5432 return -1;
5433
5434 return 0;
5435 }
5436
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5437 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5438 {
5439 int *map;
5440
5441 if ((mask == TRACE_ITER_RECORD_TGID) ||
5442 (mask == TRACE_ITER_RECORD_CMD))
5443 lockdep_assert_held(&event_mutex);
5444
5445 /* do nothing if flag is already set */
5446 if (!!(tr->trace_flags & mask) == !!enabled)
5447 return 0;
5448
5449 /* Give the tracer a chance to approve the change */
5450 if (tr->current_trace->flag_changed)
5451 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5452 return -EINVAL;
5453
5454 if (enabled)
5455 tr->trace_flags |= mask;
5456 else
5457 tr->trace_flags &= ~mask;
5458
5459 if (mask == TRACE_ITER_RECORD_CMD)
5460 trace_event_enable_cmd_record(enabled);
5461
5462 if (mask == TRACE_ITER_RECORD_TGID) {
5463 if (!tgid_map) {
5464 tgid_map_max = pid_max;
5465 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5466 GFP_KERNEL);
5467
5468 /*
5469 * Pairs with smp_load_acquire() in
5470 * trace_find_tgid_ptr() to ensure that if it observes
5471 * the tgid_map we just allocated then it also observes
5472 * the corresponding tgid_map_max value.
5473 */
5474 smp_store_release(&tgid_map, map);
5475 }
5476 if (!tgid_map) {
5477 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5478 return -ENOMEM;
5479 }
5480
5481 trace_event_enable_tgid_record(enabled);
5482 }
5483
5484 if (mask == TRACE_ITER_EVENT_FORK)
5485 trace_event_follow_fork(tr, enabled);
5486
5487 if (mask == TRACE_ITER_FUNC_FORK)
5488 ftrace_pid_follow_fork(tr, enabled);
5489
5490 if (mask == TRACE_ITER_OVERWRITE) {
5491 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5492 #ifdef CONFIG_TRACER_MAX_TRACE
5493 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5494 #endif
5495 }
5496
5497 if (mask == TRACE_ITER_PRINTK) {
5498 trace_printk_start_stop_comm(enabled);
5499 trace_printk_control(enabled);
5500 }
5501
5502 return 0;
5503 }
5504
trace_set_options(struct trace_array * tr,char * option)5505 int trace_set_options(struct trace_array *tr, char *option)
5506 {
5507 char *cmp;
5508 int neg = 0;
5509 int ret;
5510 size_t orig_len = strlen(option);
5511 int len;
5512
5513 cmp = strstrip(option);
5514
5515 len = str_has_prefix(cmp, "no");
5516 if (len)
5517 neg = 1;
5518
5519 cmp += len;
5520
5521 mutex_lock(&event_mutex);
5522 mutex_lock(&trace_types_lock);
5523
5524 ret = match_string(trace_options, -1, cmp);
5525 /* If no option could be set, test the specific tracer options */
5526 if (ret < 0)
5527 ret = set_tracer_option(tr, cmp, neg);
5528 else
5529 ret = set_tracer_flag(tr, 1 << ret, !neg);
5530
5531 mutex_unlock(&trace_types_lock);
5532 mutex_unlock(&event_mutex);
5533
5534 /*
5535 * If the first trailing whitespace is replaced with '\0' by strstrip,
5536 * turn it back into a space.
5537 */
5538 if (orig_len > strlen(option))
5539 option[strlen(option)] = ' ';
5540
5541 return ret;
5542 }
5543
apply_trace_boot_options(void)5544 static void __init apply_trace_boot_options(void)
5545 {
5546 char *buf = trace_boot_options_buf;
5547 char *option;
5548
5549 while (true) {
5550 option = strsep(&buf, ",");
5551
5552 if (!option)
5553 break;
5554
5555 if (*option)
5556 trace_set_options(&global_trace, option);
5557
5558 /* Put back the comma to allow this to be called again */
5559 if (buf)
5560 *(buf - 1) = ',';
5561 }
5562 }
5563
5564 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5565 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5566 size_t cnt, loff_t *ppos)
5567 {
5568 struct seq_file *m = filp->private_data;
5569 struct trace_array *tr = m->private;
5570 char buf[64];
5571 int ret;
5572
5573 if (cnt >= sizeof(buf))
5574 return -EINVAL;
5575
5576 if (copy_from_user(buf, ubuf, cnt))
5577 return -EFAULT;
5578
5579 buf[cnt] = 0;
5580
5581 ret = trace_set_options(tr, buf);
5582 if (ret < 0)
5583 return ret;
5584
5585 *ppos += cnt;
5586
5587 return cnt;
5588 }
5589
tracing_trace_options_open(struct inode * inode,struct file * file)5590 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5591 {
5592 struct trace_array *tr = inode->i_private;
5593 int ret;
5594
5595 ret = tracing_check_open_get_tr(tr);
5596 if (ret)
5597 return ret;
5598
5599 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5600 if (ret < 0)
5601 trace_array_put(tr);
5602
5603 return ret;
5604 }
5605
5606 static const struct file_operations tracing_iter_fops = {
5607 .open = tracing_trace_options_open,
5608 .read = seq_read,
5609 .llseek = seq_lseek,
5610 .release = tracing_single_release_tr,
5611 .write = tracing_trace_options_write,
5612 };
5613
5614 static const char readme_msg[] =
5615 "tracing mini-HOWTO:\n\n"
5616 "# echo 0 > tracing_on : quick way to disable tracing\n"
5617 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5618 " Important files:\n"
5619 " trace\t\t\t- The static contents of the buffer\n"
5620 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5621 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5622 " current_tracer\t- function and latency tracers\n"
5623 " available_tracers\t- list of configured tracers for current_tracer\n"
5624 " error_log\t- error log for failed commands (that support it)\n"
5625 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5626 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5627 " trace_clock\t\t- change the clock used to order events\n"
5628 " local: Per cpu clock but may not be synced across CPUs\n"
5629 " global: Synced across CPUs but slows tracing down.\n"
5630 " counter: Not a clock, but just an increment\n"
5631 " uptime: Jiffy counter from time of boot\n"
5632 " perf: Same clock that perf events use\n"
5633 #ifdef CONFIG_X86_64
5634 " x86-tsc: TSC cycle counter\n"
5635 #endif
5636 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5637 " delta: Delta difference against a buffer-wide timestamp\n"
5638 " absolute: Absolute (standalone) timestamp\n"
5639 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5640 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5641 " tracing_cpumask\t- Limit which CPUs to trace\n"
5642 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5643 "\t\t\t Remove sub-buffer with rmdir\n"
5644 " trace_options\t\t- Set format or modify how tracing happens\n"
5645 "\t\t\t Disable an option by prefixing 'no' to the\n"
5646 "\t\t\t option name\n"
5647 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5648 #ifdef CONFIG_DYNAMIC_FTRACE
5649 "\n available_filter_functions - list of functions that can be filtered on\n"
5650 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5651 "\t\t\t functions\n"
5652 "\t accepts: func_full_name or glob-matching-pattern\n"
5653 "\t modules: Can select a group via module\n"
5654 "\t Format: :mod:<module-name>\n"
5655 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5656 "\t triggers: a command to perform when function is hit\n"
5657 "\t Format: <function>:<trigger>[:count]\n"
5658 "\t trigger: traceon, traceoff\n"
5659 "\t\t enable_event:<system>:<event>\n"
5660 "\t\t disable_event:<system>:<event>\n"
5661 #ifdef CONFIG_STACKTRACE
5662 "\t\t stacktrace\n"
5663 #endif
5664 #ifdef CONFIG_TRACER_SNAPSHOT
5665 "\t\t snapshot\n"
5666 #endif
5667 "\t\t dump\n"
5668 "\t\t cpudump\n"
5669 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5670 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5671 "\t The first one will disable tracing every time do_fault is hit\n"
5672 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5673 "\t The first time do trap is hit and it disables tracing, the\n"
5674 "\t counter will decrement to 2. If tracing is already disabled,\n"
5675 "\t the counter will not decrement. It only decrements when the\n"
5676 "\t trigger did work\n"
5677 "\t To remove trigger without count:\n"
5678 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5679 "\t To remove trigger with a count:\n"
5680 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5681 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5682 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5683 "\t modules: Can select a group via module command :mod:\n"
5684 "\t Does not accept triggers\n"
5685 #endif /* CONFIG_DYNAMIC_FTRACE */
5686 #ifdef CONFIG_FUNCTION_TRACER
5687 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5688 "\t\t (function)\n"
5689 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5690 "\t\t (function)\n"
5691 #endif
5692 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5693 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5694 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5695 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5696 #endif
5697 #ifdef CONFIG_TRACER_SNAPSHOT
5698 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5699 "\t\t\t snapshot buffer. Read the contents for more\n"
5700 "\t\t\t information\n"
5701 #endif
5702 #ifdef CONFIG_STACK_TRACER
5703 " stack_trace\t\t- Shows the max stack trace when active\n"
5704 " stack_max_size\t- Shows current max stack size that was traced\n"
5705 "\t\t\t Write into this file to reset the max size (trigger a\n"
5706 "\t\t\t new trace)\n"
5707 #ifdef CONFIG_DYNAMIC_FTRACE
5708 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5709 "\t\t\t traces\n"
5710 #endif
5711 #endif /* CONFIG_STACK_TRACER */
5712 #ifdef CONFIG_DYNAMIC_EVENTS
5713 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5714 "\t\t\t Write into this file to define/undefine new trace events.\n"
5715 #endif
5716 #ifdef CONFIG_KPROBE_EVENTS
5717 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5718 "\t\t\t Write into this file to define/undefine new trace events.\n"
5719 #endif
5720 #ifdef CONFIG_UPROBE_EVENTS
5721 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5722 "\t\t\t Write into this file to define/undefine new trace events.\n"
5723 #endif
5724 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5725 defined(CONFIG_FPROBE_EVENTS)
5726 "\t accepts: event-definitions (one definition per line)\n"
5727 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5728 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5729 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5730 #endif
5731 #ifdef CONFIG_FPROBE_EVENTS
5732 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5733 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5734 #endif
5735 #ifdef CONFIG_HIST_TRIGGERS
5736 "\t s:[synthetic/]<event> <field> [<field>]\n"
5737 #endif
5738 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5739 "\t -:[<group>/][<event>]\n"
5740 #ifdef CONFIG_KPROBE_EVENTS
5741 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5742 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5743 #endif
5744 #ifdef CONFIG_UPROBE_EVENTS
5745 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5746 #endif
5747 "\t args: <name>=fetcharg[:type]\n"
5748 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5749 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5750 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5751 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5752 "\t <argname>[->field[->field|.field...]],\n"
5753 #else
5754 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5755 #endif
5756 #else
5757 "\t $stack<index>, $stack, $retval, $comm,\n"
5758 #endif
5759 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5760 "\t kernel return probes support: $retval, $arg<N>, $comm\n"
5761 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5762 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5763 "\t symstr, <type>\\[<array-size>\\]\n"
5764 #ifdef CONFIG_HIST_TRIGGERS
5765 "\t field: <stype> <name>;\n"
5766 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5767 "\t [unsigned] char/int/long\n"
5768 #endif
5769 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5770 "\t of the <attached-group>/<attached-event>.\n"
5771 #endif
5772 " events/\t\t- Directory containing all trace event subsystems:\n"
5773 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5774 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5775 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5776 "\t\t\t events\n"
5777 " filter\t\t- If set, only events passing filter are traced\n"
5778 " events/<system>/<event>/\t- Directory containing control files for\n"
5779 "\t\t\t <event>:\n"
5780 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5781 " filter\t\t- If set, only events passing filter are traced\n"
5782 " trigger\t\t- If set, a command to perform when event is hit\n"
5783 "\t Format: <trigger>[:count][if <filter>]\n"
5784 "\t trigger: traceon, traceoff\n"
5785 "\t enable_event:<system>:<event>\n"
5786 "\t disable_event:<system>:<event>\n"
5787 #ifdef CONFIG_HIST_TRIGGERS
5788 "\t enable_hist:<system>:<event>\n"
5789 "\t disable_hist:<system>:<event>\n"
5790 #endif
5791 #ifdef CONFIG_STACKTRACE
5792 "\t\t stacktrace\n"
5793 #endif
5794 #ifdef CONFIG_TRACER_SNAPSHOT
5795 "\t\t snapshot\n"
5796 #endif
5797 #ifdef CONFIG_HIST_TRIGGERS
5798 "\t\t hist (see below)\n"
5799 #endif
5800 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5801 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5802 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5803 "\t events/block/block_unplug/trigger\n"
5804 "\t The first disables tracing every time block_unplug is hit.\n"
5805 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5806 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5807 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5808 "\t Like function triggers, the counter is only decremented if it\n"
5809 "\t enabled or disabled tracing.\n"
5810 "\t To remove a trigger without a count:\n"
5811 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5812 "\t To remove a trigger with a count:\n"
5813 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5814 "\t Filters can be ignored when removing a trigger.\n"
5815 #ifdef CONFIG_HIST_TRIGGERS
5816 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5817 "\t Format: hist:keys=<field1[,field2,...]>\n"
5818 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5819 "\t [:values=<field1[,field2,...]>]\n"
5820 "\t [:sort=<field1[,field2,...]>]\n"
5821 "\t [:size=#entries]\n"
5822 "\t [:pause][:continue][:clear]\n"
5823 "\t [:name=histname1]\n"
5824 "\t [:nohitcount]\n"
5825 "\t [:<handler>.<action>]\n"
5826 "\t [if <filter>]\n\n"
5827 "\t Note, special fields can be used as well:\n"
5828 "\t common_timestamp - to record current timestamp\n"
5829 "\t common_cpu - to record the CPU the event happened on\n"
5830 "\n"
5831 "\t A hist trigger variable can be:\n"
5832 "\t - a reference to a field e.g. x=current_timestamp,\n"
5833 "\t - a reference to another variable e.g. y=$x,\n"
5834 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5835 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5836 "\n"
5837 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5838 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5839 "\t variable reference, field or numeric literal.\n"
5840 "\n"
5841 "\t When a matching event is hit, an entry is added to a hash\n"
5842 "\t table using the key(s) and value(s) named, and the value of a\n"
5843 "\t sum called 'hitcount' is incremented. Keys and values\n"
5844 "\t correspond to fields in the event's format description. Keys\n"
5845 "\t can be any field, or the special string 'common_stacktrace'.\n"
5846 "\t Compound keys consisting of up to two fields can be specified\n"
5847 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5848 "\t fields. Sort keys consisting of up to two fields can be\n"
5849 "\t specified using the 'sort' keyword. The sort direction can\n"
5850 "\t be modified by appending '.descending' or '.ascending' to a\n"
5851 "\t sort field. The 'size' parameter can be used to specify more\n"
5852 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5853 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5854 "\t its histogram data will be shared with other triggers of the\n"
5855 "\t same name, and trigger hits will update this common data.\n\n"
5856 "\t Reading the 'hist' file for the event will dump the hash\n"
5857 "\t table in its entirety to stdout. If there are multiple hist\n"
5858 "\t triggers attached to an event, there will be a table for each\n"
5859 "\t trigger in the output. The table displayed for a named\n"
5860 "\t trigger will be the same as any other instance having the\n"
5861 "\t same name. The default format used to display a given field\n"
5862 "\t can be modified by appending any of the following modifiers\n"
5863 "\t to the field name, as applicable:\n\n"
5864 "\t .hex display a number as a hex value\n"
5865 "\t .sym display an address as a symbol\n"
5866 "\t .sym-offset display an address as a symbol and offset\n"
5867 "\t .execname display a common_pid as a program name\n"
5868 "\t .syscall display a syscall id as a syscall name\n"
5869 "\t .log2 display log2 value rather than raw number\n"
5870 "\t .buckets=size display values in groups of size rather than raw number\n"
5871 "\t .usecs display a common_timestamp in microseconds\n"
5872 "\t .percent display a number of percentage value\n"
5873 "\t .graph display a bar-graph of a value\n\n"
5874 "\t The 'pause' parameter can be used to pause an existing hist\n"
5875 "\t trigger or to start a hist trigger but not log any events\n"
5876 "\t until told to do so. 'continue' can be used to start or\n"
5877 "\t restart a paused hist trigger.\n\n"
5878 "\t The 'clear' parameter will clear the contents of a running\n"
5879 "\t hist trigger and leave its current paused/active state\n"
5880 "\t unchanged.\n\n"
5881 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5882 "\t raw hitcount in the histogram.\n\n"
5883 "\t The enable_hist and disable_hist triggers can be used to\n"
5884 "\t have one event conditionally start and stop another event's\n"
5885 "\t already-attached hist trigger. The syntax is analogous to\n"
5886 "\t the enable_event and disable_event triggers.\n\n"
5887 "\t Hist trigger handlers and actions are executed whenever a\n"
5888 "\t a histogram entry is added or updated. They take the form:\n\n"
5889 "\t <handler>.<action>\n\n"
5890 "\t The available handlers are:\n\n"
5891 "\t onmatch(matching.event) - invoke on addition or update\n"
5892 "\t onmax(var) - invoke if var exceeds current max\n"
5893 "\t onchange(var) - invoke action if var changes\n\n"
5894 "\t The available actions are:\n\n"
5895 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5896 "\t save(field,...) - save current event fields\n"
5897 #ifdef CONFIG_TRACER_SNAPSHOT
5898 "\t snapshot() - snapshot the trace buffer\n\n"
5899 #endif
5900 #ifdef CONFIG_SYNTH_EVENTS
5901 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5902 "\t Write into this file to define/undefine new synthetic events.\n"
5903 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5904 #endif
5905 #endif
5906 ;
5907
5908 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5909 tracing_readme_read(struct file *filp, char __user *ubuf,
5910 size_t cnt, loff_t *ppos)
5911 {
5912 return simple_read_from_buffer(ubuf, cnt, ppos,
5913 readme_msg, strlen(readme_msg));
5914 }
5915
5916 static const struct file_operations tracing_readme_fops = {
5917 .open = tracing_open_generic,
5918 .read = tracing_readme_read,
5919 .llseek = generic_file_llseek,
5920 };
5921
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5922 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5923 {
5924 int pid = ++(*pos);
5925
5926 return trace_find_tgid_ptr(pid);
5927 }
5928
saved_tgids_start(struct seq_file * m,loff_t * pos)5929 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5930 {
5931 int pid = *pos;
5932
5933 return trace_find_tgid_ptr(pid);
5934 }
5935
saved_tgids_stop(struct seq_file * m,void * v)5936 static void saved_tgids_stop(struct seq_file *m, void *v)
5937 {
5938 }
5939
saved_tgids_show(struct seq_file * m,void * v)5940 static int saved_tgids_show(struct seq_file *m, void *v)
5941 {
5942 int *entry = (int *)v;
5943 int pid = entry - tgid_map;
5944 int tgid = *entry;
5945
5946 if (tgid == 0)
5947 return SEQ_SKIP;
5948
5949 seq_printf(m, "%d %d\n", pid, tgid);
5950 return 0;
5951 }
5952
5953 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5954 .start = saved_tgids_start,
5955 .stop = saved_tgids_stop,
5956 .next = saved_tgids_next,
5957 .show = saved_tgids_show,
5958 };
5959
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5960 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5961 {
5962 int ret;
5963
5964 ret = tracing_check_open_get_tr(NULL);
5965 if (ret)
5966 return ret;
5967
5968 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5969 }
5970
5971
5972 static const struct file_operations tracing_saved_tgids_fops = {
5973 .open = tracing_saved_tgids_open,
5974 .read = seq_read,
5975 .llseek = seq_lseek,
5976 .release = seq_release,
5977 };
5978
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5979 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5980 {
5981 unsigned int *ptr = v;
5982
5983 if (*pos || m->count)
5984 ptr++;
5985
5986 (*pos)++;
5987
5988 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5989 ptr++) {
5990 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5991 continue;
5992
5993 return ptr;
5994 }
5995
5996 return NULL;
5997 }
5998
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5999 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
6000 {
6001 void *v;
6002 loff_t l = 0;
6003
6004 preempt_disable();
6005 arch_spin_lock(&trace_cmdline_lock);
6006
6007 v = &savedcmd->map_cmdline_to_pid[0];
6008 while (l <= *pos) {
6009 v = saved_cmdlines_next(m, v, &l);
6010 if (!v)
6011 return NULL;
6012 }
6013
6014 return v;
6015 }
6016
saved_cmdlines_stop(struct seq_file * m,void * v)6017 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6018 {
6019 arch_spin_unlock(&trace_cmdline_lock);
6020 preempt_enable();
6021 }
6022
saved_cmdlines_show(struct seq_file * m,void * v)6023 static int saved_cmdlines_show(struct seq_file *m, void *v)
6024 {
6025 char buf[TASK_COMM_LEN];
6026 unsigned int *pid = v;
6027
6028 __trace_find_cmdline(*pid, buf);
6029 seq_printf(m, "%d %s\n", *pid, buf);
6030 return 0;
6031 }
6032
6033 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6034 .start = saved_cmdlines_start,
6035 .next = saved_cmdlines_next,
6036 .stop = saved_cmdlines_stop,
6037 .show = saved_cmdlines_show,
6038 };
6039
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)6040 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6041 {
6042 int ret;
6043
6044 ret = tracing_check_open_get_tr(NULL);
6045 if (ret)
6046 return ret;
6047
6048 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6049 }
6050
6051 static const struct file_operations tracing_saved_cmdlines_fops = {
6052 .open = tracing_saved_cmdlines_open,
6053 .read = seq_read,
6054 .llseek = seq_lseek,
6055 .release = seq_release,
6056 };
6057
6058 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6059 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6060 size_t cnt, loff_t *ppos)
6061 {
6062 char buf[64];
6063 int r;
6064
6065 preempt_disable();
6066 arch_spin_lock(&trace_cmdline_lock);
6067 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6068 arch_spin_unlock(&trace_cmdline_lock);
6069 preempt_enable();
6070
6071 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6072 }
6073
tracing_resize_saved_cmdlines(unsigned int val)6074 static int tracing_resize_saved_cmdlines(unsigned int val)
6075 {
6076 struct saved_cmdlines_buffer *s, *savedcmd_temp;
6077
6078 s = allocate_cmdlines_buffer(val);
6079 if (!s)
6080 return -ENOMEM;
6081
6082 preempt_disable();
6083 arch_spin_lock(&trace_cmdline_lock);
6084 savedcmd_temp = savedcmd;
6085 savedcmd = s;
6086 arch_spin_unlock(&trace_cmdline_lock);
6087 preempt_enable();
6088 free_saved_cmdlines_buffer(savedcmd_temp);
6089
6090 return 0;
6091 }
6092
6093 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6094 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6095 size_t cnt, loff_t *ppos)
6096 {
6097 unsigned long val;
6098 int ret;
6099
6100 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6101 if (ret)
6102 return ret;
6103
6104 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6105 if (!val || val > PID_MAX_DEFAULT)
6106 return -EINVAL;
6107
6108 ret = tracing_resize_saved_cmdlines((unsigned int)val);
6109 if (ret < 0)
6110 return ret;
6111
6112 *ppos += cnt;
6113
6114 return cnt;
6115 }
6116
6117 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6118 .open = tracing_open_generic,
6119 .read = tracing_saved_cmdlines_size_read,
6120 .write = tracing_saved_cmdlines_size_write,
6121 };
6122
6123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6124 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)6125 update_eval_map(union trace_eval_map_item *ptr)
6126 {
6127 if (!ptr->map.eval_string) {
6128 if (ptr->tail.next) {
6129 ptr = ptr->tail.next;
6130 /* Set ptr to the next real item (skip head) */
6131 ptr++;
6132 } else
6133 return NULL;
6134 }
6135 return ptr;
6136 }
6137
eval_map_next(struct seq_file * m,void * v,loff_t * pos)6138 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6139 {
6140 union trace_eval_map_item *ptr = v;
6141
6142 /*
6143 * Paranoid! If ptr points to end, we don't want to increment past it.
6144 * This really should never happen.
6145 */
6146 (*pos)++;
6147 ptr = update_eval_map(ptr);
6148 if (WARN_ON_ONCE(!ptr))
6149 return NULL;
6150
6151 ptr++;
6152 ptr = update_eval_map(ptr);
6153
6154 return ptr;
6155 }
6156
eval_map_start(struct seq_file * m,loff_t * pos)6157 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6158 {
6159 union trace_eval_map_item *v;
6160 loff_t l = 0;
6161
6162 mutex_lock(&trace_eval_mutex);
6163
6164 v = trace_eval_maps;
6165 if (v)
6166 v++;
6167
6168 while (v && l < *pos) {
6169 v = eval_map_next(m, v, &l);
6170 }
6171
6172 return v;
6173 }
6174
eval_map_stop(struct seq_file * m,void * v)6175 static void eval_map_stop(struct seq_file *m, void *v)
6176 {
6177 mutex_unlock(&trace_eval_mutex);
6178 }
6179
eval_map_show(struct seq_file * m,void * v)6180 static int eval_map_show(struct seq_file *m, void *v)
6181 {
6182 union trace_eval_map_item *ptr = v;
6183
6184 seq_printf(m, "%s %ld (%s)\n",
6185 ptr->map.eval_string, ptr->map.eval_value,
6186 ptr->map.system);
6187
6188 return 0;
6189 }
6190
6191 static const struct seq_operations tracing_eval_map_seq_ops = {
6192 .start = eval_map_start,
6193 .next = eval_map_next,
6194 .stop = eval_map_stop,
6195 .show = eval_map_show,
6196 };
6197
tracing_eval_map_open(struct inode * inode,struct file * filp)6198 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6199 {
6200 int ret;
6201
6202 ret = tracing_check_open_get_tr(NULL);
6203 if (ret)
6204 return ret;
6205
6206 return seq_open(filp, &tracing_eval_map_seq_ops);
6207 }
6208
6209 static const struct file_operations tracing_eval_map_fops = {
6210 .open = tracing_eval_map_open,
6211 .read = seq_read,
6212 .llseek = seq_lseek,
6213 .release = seq_release,
6214 };
6215
6216 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)6217 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6218 {
6219 /* Return tail of array given the head */
6220 return ptr + ptr->head.length + 1;
6221 }
6222
6223 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6224 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6225 int len)
6226 {
6227 struct trace_eval_map **stop;
6228 struct trace_eval_map **map;
6229 union trace_eval_map_item *map_array;
6230 union trace_eval_map_item *ptr;
6231
6232 stop = start + len;
6233
6234 /*
6235 * The trace_eval_maps contains the map plus a head and tail item,
6236 * where the head holds the module and length of array, and the
6237 * tail holds a pointer to the next list.
6238 */
6239 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6240 if (!map_array) {
6241 pr_warn("Unable to allocate trace eval mapping\n");
6242 return;
6243 }
6244
6245 mutex_lock(&trace_eval_mutex);
6246
6247 if (!trace_eval_maps)
6248 trace_eval_maps = map_array;
6249 else {
6250 ptr = trace_eval_maps;
6251 for (;;) {
6252 ptr = trace_eval_jmp_to_tail(ptr);
6253 if (!ptr->tail.next)
6254 break;
6255 ptr = ptr->tail.next;
6256
6257 }
6258 ptr->tail.next = map_array;
6259 }
6260 map_array->head.mod = mod;
6261 map_array->head.length = len;
6262 map_array++;
6263
6264 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6265 map_array->map = **map;
6266 map_array++;
6267 }
6268 memset(map_array, 0, sizeof(*map_array));
6269
6270 mutex_unlock(&trace_eval_mutex);
6271 }
6272
trace_create_eval_file(struct dentry * d_tracer)6273 static void trace_create_eval_file(struct dentry *d_tracer)
6274 {
6275 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6276 NULL, &tracing_eval_map_fops);
6277 }
6278
6279 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)6280 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6281 static inline void trace_insert_eval_map_file(struct module *mod,
6282 struct trace_eval_map **start, int len) { }
6283 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6284
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)6285 static void trace_insert_eval_map(struct module *mod,
6286 struct trace_eval_map **start, int len)
6287 {
6288 struct trace_eval_map **map;
6289
6290 if (len <= 0)
6291 return;
6292
6293 map = start;
6294
6295 trace_event_eval_update(map, len);
6296
6297 trace_insert_eval_map_file(mod, start, len);
6298 }
6299
6300 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6301 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6302 size_t cnt, loff_t *ppos)
6303 {
6304 struct trace_array *tr = filp->private_data;
6305 char buf[MAX_TRACER_SIZE+2];
6306 int r;
6307
6308 mutex_lock(&trace_types_lock);
6309 r = sprintf(buf, "%s\n", tr->current_trace->name);
6310 mutex_unlock(&trace_types_lock);
6311
6312 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6313 }
6314
tracer_init(struct tracer * t,struct trace_array * tr)6315 int tracer_init(struct tracer *t, struct trace_array *tr)
6316 {
6317 tracing_reset_online_cpus(&tr->array_buffer);
6318 return t->init(tr);
6319 }
6320
set_buffer_entries(struct array_buffer * buf,unsigned long val)6321 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6322 {
6323 int cpu;
6324
6325 for_each_tracing_cpu(cpu)
6326 per_cpu_ptr(buf->data, cpu)->entries = val;
6327 }
6328
update_buffer_entries(struct array_buffer * buf,int cpu)6329 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6330 {
6331 if (cpu == RING_BUFFER_ALL_CPUS) {
6332 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6333 } else {
6334 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6335 }
6336 }
6337
6338 #ifdef CONFIG_TRACER_MAX_TRACE
6339 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)6340 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6341 struct array_buffer *size_buf, int cpu_id)
6342 {
6343 int cpu, ret = 0;
6344
6345 if (cpu_id == RING_BUFFER_ALL_CPUS) {
6346 for_each_tracing_cpu(cpu) {
6347 ret = ring_buffer_resize(trace_buf->buffer,
6348 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6349 if (ret < 0)
6350 break;
6351 per_cpu_ptr(trace_buf->data, cpu)->entries =
6352 per_cpu_ptr(size_buf->data, cpu)->entries;
6353 }
6354 } else {
6355 ret = ring_buffer_resize(trace_buf->buffer,
6356 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6357 if (ret == 0)
6358 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6359 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6360 }
6361
6362 return ret;
6363 }
6364 #endif /* CONFIG_TRACER_MAX_TRACE */
6365
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6366 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6367 unsigned long size, int cpu)
6368 {
6369 int ret;
6370
6371 /*
6372 * If kernel or user changes the size of the ring buffer
6373 * we use the size that was given, and we can forget about
6374 * expanding it later.
6375 */
6376 ring_buffer_expanded = true;
6377
6378 /* May be called before buffers are initialized */
6379 if (!tr->array_buffer.buffer)
6380 return 0;
6381
6382 /* Do not allow tracing while resizing ring buffer */
6383 tracing_stop_tr(tr);
6384
6385 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6386 if (ret < 0)
6387 goto out_start;
6388
6389 #ifdef CONFIG_TRACER_MAX_TRACE
6390 if (!tr->allocated_snapshot)
6391 goto out;
6392
6393 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6394 if (ret < 0) {
6395 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6396 &tr->array_buffer, cpu);
6397 if (r < 0) {
6398 /*
6399 * AARGH! We are left with different
6400 * size max buffer!!!!
6401 * The max buffer is our "snapshot" buffer.
6402 * When a tracer needs a snapshot (one of the
6403 * latency tracers), it swaps the max buffer
6404 * with the saved snap shot. We succeeded to
6405 * update the size of the main buffer, but failed to
6406 * update the size of the max buffer. But when we tried
6407 * to reset the main buffer to the original size, we
6408 * failed there too. This is very unlikely to
6409 * happen, but if it does, warn and kill all
6410 * tracing.
6411 */
6412 WARN_ON(1);
6413 tracing_disabled = 1;
6414 }
6415 goto out_start;
6416 }
6417
6418 update_buffer_entries(&tr->max_buffer, cpu);
6419
6420 out:
6421 #endif /* CONFIG_TRACER_MAX_TRACE */
6422
6423 update_buffer_entries(&tr->array_buffer, cpu);
6424 out_start:
6425 tracing_start_tr(tr);
6426 return ret;
6427 }
6428
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6429 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6430 unsigned long size, int cpu_id)
6431 {
6432 int ret;
6433
6434 mutex_lock(&trace_types_lock);
6435
6436 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6437 /* make sure, this cpu is enabled in the mask */
6438 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6439 ret = -EINVAL;
6440 goto out;
6441 }
6442 }
6443
6444 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6445 if (ret < 0)
6446 ret = -ENOMEM;
6447
6448 out:
6449 mutex_unlock(&trace_types_lock);
6450
6451 return ret;
6452 }
6453
6454
6455 /**
6456 * tracing_update_buffers - used by tracing facility to expand ring buffers
6457 *
6458 * To save on memory when the tracing is never used on a system with it
6459 * configured in. The ring buffers are set to a minimum size. But once
6460 * a user starts to use the tracing facility, then they need to grow
6461 * to their default size.
6462 *
6463 * This function is to be called when a tracer is about to be used.
6464 */
tracing_update_buffers(void)6465 int tracing_update_buffers(void)
6466 {
6467 int ret = 0;
6468
6469 mutex_lock(&trace_types_lock);
6470 if (!ring_buffer_expanded)
6471 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6472 RING_BUFFER_ALL_CPUS);
6473 mutex_unlock(&trace_types_lock);
6474
6475 return ret;
6476 }
6477
6478 struct trace_option_dentry;
6479
6480 static void
6481 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6482
6483 /*
6484 * Used to clear out the tracer before deletion of an instance.
6485 * Must have trace_types_lock held.
6486 */
tracing_set_nop(struct trace_array * tr)6487 static void tracing_set_nop(struct trace_array *tr)
6488 {
6489 if (tr->current_trace == &nop_trace)
6490 return;
6491
6492 tr->current_trace->enabled--;
6493
6494 if (tr->current_trace->reset)
6495 tr->current_trace->reset(tr);
6496
6497 tr->current_trace = &nop_trace;
6498 }
6499
6500 static bool tracer_options_updated;
6501
add_tracer_options(struct trace_array * tr,struct tracer * t)6502 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6503 {
6504 /* Only enable if the directory has been created already. */
6505 if (!tr->dir)
6506 return;
6507
6508 /* Only create trace option files after update_tracer_options finish */
6509 if (!tracer_options_updated)
6510 return;
6511
6512 create_trace_option_files(tr, t);
6513 }
6514
tracing_set_tracer(struct trace_array * tr,const char * buf)6515 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6516 {
6517 struct tracer *t;
6518 #ifdef CONFIG_TRACER_MAX_TRACE
6519 bool had_max_tr;
6520 #endif
6521 int ret = 0;
6522
6523 mutex_lock(&trace_types_lock);
6524
6525 if (!ring_buffer_expanded) {
6526 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6527 RING_BUFFER_ALL_CPUS);
6528 if (ret < 0)
6529 goto out;
6530 ret = 0;
6531 }
6532
6533 for (t = trace_types; t; t = t->next) {
6534 if (strcmp(t->name, buf) == 0)
6535 break;
6536 }
6537 if (!t) {
6538 ret = -EINVAL;
6539 goto out;
6540 }
6541 if (t == tr->current_trace)
6542 goto out;
6543
6544 #ifdef CONFIG_TRACER_SNAPSHOT
6545 if (t->use_max_tr) {
6546 local_irq_disable();
6547 arch_spin_lock(&tr->max_lock);
6548 if (tr->cond_snapshot)
6549 ret = -EBUSY;
6550 arch_spin_unlock(&tr->max_lock);
6551 local_irq_enable();
6552 if (ret)
6553 goto out;
6554 }
6555 #endif
6556 /* Some tracers won't work on kernel command line */
6557 if (system_state < SYSTEM_RUNNING && t->noboot) {
6558 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6559 t->name);
6560 goto out;
6561 }
6562
6563 /* Some tracers are only allowed for the top level buffer */
6564 if (!trace_ok_for_array(t, tr)) {
6565 ret = -EINVAL;
6566 goto out;
6567 }
6568
6569 /* If trace pipe files are being read, we can't change the tracer */
6570 if (tr->trace_ref) {
6571 ret = -EBUSY;
6572 goto out;
6573 }
6574
6575 trace_branch_disable();
6576
6577 tr->current_trace->enabled--;
6578
6579 if (tr->current_trace->reset)
6580 tr->current_trace->reset(tr);
6581
6582 #ifdef CONFIG_TRACER_MAX_TRACE
6583 had_max_tr = tr->current_trace->use_max_tr;
6584
6585 /* Current trace needs to be nop_trace before synchronize_rcu */
6586 tr->current_trace = &nop_trace;
6587
6588 if (had_max_tr && !t->use_max_tr) {
6589 /*
6590 * We need to make sure that the update_max_tr sees that
6591 * current_trace changed to nop_trace to keep it from
6592 * swapping the buffers after we resize it.
6593 * The update_max_tr is called from interrupts disabled
6594 * so a synchronized_sched() is sufficient.
6595 */
6596 synchronize_rcu();
6597 free_snapshot(tr);
6598 }
6599
6600 if (t->use_max_tr && !tr->allocated_snapshot) {
6601 ret = tracing_alloc_snapshot_instance(tr);
6602 if (ret < 0)
6603 goto out;
6604 }
6605 #else
6606 tr->current_trace = &nop_trace;
6607 #endif
6608
6609 if (t->init) {
6610 ret = tracer_init(t, tr);
6611 if (ret)
6612 goto out;
6613 }
6614
6615 tr->current_trace = t;
6616 tr->current_trace->enabled++;
6617 trace_branch_enable(tr);
6618 out:
6619 mutex_unlock(&trace_types_lock);
6620
6621 return ret;
6622 }
6623
6624 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6625 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6626 size_t cnt, loff_t *ppos)
6627 {
6628 struct trace_array *tr = filp->private_data;
6629 char buf[MAX_TRACER_SIZE+1];
6630 char *name;
6631 size_t ret;
6632 int err;
6633
6634 ret = cnt;
6635
6636 if (cnt > MAX_TRACER_SIZE)
6637 cnt = MAX_TRACER_SIZE;
6638
6639 if (copy_from_user(buf, ubuf, cnt))
6640 return -EFAULT;
6641
6642 buf[cnt] = 0;
6643
6644 name = strim(buf);
6645
6646 err = tracing_set_tracer(tr, name);
6647 if (err)
6648 return err;
6649
6650 *ppos += ret;
6651
6652 return ret;
6653 }
6654
6655 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6656 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6657 size_t cnt, loff_t *ppos)
6658 {
6659 char buf[64];
6660 int r;
6661
6662 r = snprintf(buf, sizeof(buf), "%ld\n",
6663 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6664 if (r > sizeof(buf))
6665 r = sizeof(buf);
6666 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6667 }
6668
6669 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6670 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6671 size_t cnt, loff_t *ppos)
6672 {
6673 unsigned long val;
6674 int ret;
6675
6676 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6677 if (ret)
6678 return ret;
6679
6680 *ptr = val * 1000;
6681
6682 return cnt;
6683 }
6684
6685 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6686 tracing_thresh_read(struct file *filp, char __user *ubuf,
6687 size_t cnt, loff_t *ppos)
6688 {
6689 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6690 }
6691
6692 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6693 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6694 size_t cnt, loff_t *ppos)
6695 {
6696 struct trace_array *tr = filp->private_data;
6697 int ret;
6698
6699 mutex_lock(&trace_types_lock);
6700 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6701 if (ret < 0)
6702 goto out;
6703
6704 if (tr->current_trace->update_thresh) {
6705 ret = tr->current_trace->update_thresh(tr);
6706 if (ret < 0)
6707 goto out;
6708 }
6709
6710 ret = cnt;
6711 out:
6712 mutex_unlock(&trace_types_lock);
6713
6714 return ret;
6715 }
6716
6717 #ifdef CONFIG_TRACER_MAX_TRACE
6718
6719 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6720 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6721 size_t cnt, loff_t *ppos)
6722 {
6723 struct trace_array *tr = filp->private_data;
6724
6725 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6726 }
6727
6728 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6729 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6730 size_t cnt, loff_t *ppos)
6731 {
6732 struct trace_array *tr = filp->private_data;
6733
6734 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6735 }
6736
6737 #endif
6738
open_pipe_on_cpu(struct trace_array * tr,int cpu)6739 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6740 {
6741 if (cpu == RING_BUFFER_ALL_CPUS) {
6742 if (cpumask_empty(tr->pipe_cpumask)) {
6743 cpumask_setall(tr->pipe_cpumask);
6744 return 0;
6745 }
6746 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6747 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6748 return 0;
6749 }
6750 return -EBUSY;
6751 }
6752
close_pipe_on_cpu(struct trace_array * tr,int cpu)6753 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6754 {
6755 if (cpu == RING_BUFFER_ALL_CPUS) {
6756 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6757 cpumask_clear(tr->pipe_cpumask);
6758 } else {
6759 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6760 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6761 }
6762 }
6763
tracing_open_pipe(struct inode * inode,struct file * filp)6764 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6765 {
6766 struct trace_array *tr = inode->i_private;
6767 struct trace_iterator *iter;
6768 int cpu;
6769 int ret;
6770
6771 ret = tracing_check_open_get_tr(tr);
6772 if (ret)
6773 return ret;
6774
6775 mutex_lock(&trace_types_lock);
6776 cpu = tracing_get_cpu(inode);
6777 ret = open_pipe_on_cpu(tr, cpu);
6778 if (ret)
6779 goto fail_pipe_on_cpu;
6780
6781 /* create a buffer to store the information to pass to userspace */
6782 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6783 if (!iter) {
6784 ret = -ENOMEM;
6785 goto fail_alloc_iter;
6786 }
6787
6788 trace_seq_init(&iter->seq);
6789 iter->trace = tr->current_trace;
6790
6791 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6792 ret = -ENOMEM;
6793 goto fail;
6794 }
6795
6796 /* trace pipe does not show start of buffer */
6797 cpumask_setall(iter->started);
6798
6799 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6800 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6801
6802 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6803 if (trace_clocks[tr->clock_id].in_ns)
6804 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6805
6806 iter->tr = tr;
6807 iter->array_buffer = &tr->array_buffer;
6808 iter->cpu_file = cpu;
6809 mutex_init(&iter->mutex);
6810 filp->private_data = iter;
6811
6812 if (iter->trace->pipe_open)
6813 iter->trace->pipe_open(iter);
6814
6815 nonseekable_open(inode, filp);
6816
6817 tr->trace_ref++;
6818
6819 mutex_unlock(&trace_types_lock);
6820 return ret;
6821
6822 fail:
6823 kfree(iter);
6824 fail_alloc_iter:
6825 close_pipe_on_cpu(tr, cpu);
6826 fail_pipe_on_cpu:
6827 __trace_array_put(tr);
6828 mutex_unlock(&trace_types_lock);
6829 return ret;
6830 }
6831
tracing_release_pipe(struct inode * inode,struct file * file)6832 static int tracing_release_pipe(struct inode *inode, struct file *file)
6833 {
6834 struct trace_iterator *iter = file->private_data;
6835 struct trace_array *tr = inode->i_private;
6836
6837 mutex_lock(&trace_types_lock);
6838
6839 tr->trace_ref--;
6840
6841 if (iter->trace->pipe_close)
6842 iter->trace->pipe_close(iter);
6843 close_pipe_on_cpu(tr, iter->cpu_file);
6844 mutex_unlock(&trace_types_lock);
6845
6846 free_trace_iter_content(iter);
6847 kfree(iter);
6848
6849 trace_array_put(tr);
6850
6851 return 0;
6852 }
6853
6854 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6855 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6856 {
6857 struct trace_array *tr = iter->tr;
6858
6859 /* Iterators are static, they should be filled or empty */
6860 if (trace_buffer_iter(iter, iter->cpu_file))
6861 return EPOLLIN | EPOLLRDNORM;
6862
6863 if (tr->trace_flags & TRACE_ITER_BLOCK)
6864 /*
6865 * Always select as readable when in blocking mode
6866 */
6867 return EPOLLIN | EPOLLRDNORM;
6868 else
6869 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6870 filp, poll_table, iter->tr->buffer_percent);
6871 }
6872
6873 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6874 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6875 {
6876 struct trace_iterator *iter = filp->private_data;
6877
6878 return trace_poll(iter, filp, poll_table);
6879 }
6880
6881 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6882 static int tracing_wait_pipe(struct file *filp)
6883 {
6884 struct trace_iterator *iter = filp->private_data;
6885 int ret;
6886
6887 while (trace_empty(iter)) {
6888
6889 if ((filp->f_flags & O_NONBLOCK)) {
6890 return -EAGAIN;
6891 }
6892
6893 /*
6894 * We block until we read something and tracing is disabled.
6895 * We still block if tracing is disabled, but we have never
6896 * read anything. This allows a user to cat this file, and
6897 * then enable tracing. But after we have read something,
6898 * we give an EOF when tracing is again disabled.
6899 *
6900 * iter->pos will be 0 if we haven't read anything.
6901 */
6902 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6903 break;
6904
6905 mutex_unlock(&iter->mutex);
6906
6907 ret = wait_on_pipe(iter, 0);
6908
6909 mutex_lock(&iter->mutex);
6910
6911 if (ret)
6912 return ret;
6913 }
6914
6915 return 1;
6916 }
6917
6918 /*
6919 * Consumer reader.
6920 */
6921 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6922 tracing_read_pipe(struct file *filp, char __user *ubuf,
6923 size_t cnt, loff_t *ppos)
6924 {
6925 struct trace_iterator *iter = filp->private_data;
6926 ssize_t sret;
6927
6928 /*
6929 * Avoid more than one consumer on a single file descriptor
6930 * This is just a matter of traces coherency, the ring buffer itself
6931 * is protected.
6932 */
6933 mutex_lock(&iter->mutex);
6934
6935 /* return any leftover data */
6936 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6937 if (sret != -EBUSY)
6938 goto out;
6939
6940 trace_seq_init(&iter->seq);
6941
6942 if (iter->trace->read) {
6943 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6944 if (sret)
6945 goto out;
6946 }
6947
6948 waitagain:
6949 sret = tracing_wait_pipe(filp);
6950 if (sret <= 0)
6951 goto out;
6952
6953 /* stop when tracing is finished */
6954 if (trace_empty(iter)) {
6955 sret = 0;
6956 goto out;
6957 }
6958
6959 if (cnt >= PAGE_SIZE)
6960 cnt = PAGE_SIZE - 1;
6961
6962 /* reset all but tr, trace, and overruns */
6963 trace_iterator_reset(iter);
6964 cpumask_clear(iter->started);
6965 trace_seq_init(&iter->seq);
6966
6967 trace_event_read_lock();
6968 trace_access_lock(iter->cpu_file);
6969 while (trace_find_next_entry_inc(iter) != NULL) {
6970 enum print_line_t ret;
6971 int save_len = iter->seq.seq.len;
6972
6973 ret = print_trace_line(iter);
6974 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6975 /*
6976 * If one print_trace_line() fills entire trace_seq in one shot,
6977 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6978 * In this case, we need to consume it, otherwise, loop will peek
6979 * this event next time, resulting in an infinite loop.
6980 */
6981 if (save_len == 0) {
6982 iter->seq.full = 0;
6983 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6984 trace_consume(iter);
6985 break;
6986 }
6987
6988 /* In other cases, don't print partial lines */
6989 iter->seq.seq.len = save_len;
6990 break;
6991 }
6992 if (ret != TRACE_TYPE_NO_CONSUME)
6993 trace_consume(iter);
6994
6995 if (trace_seq_used(&iter->seq) >= cnt)
6996 break;
6997
6998 /*
6999 * Setting the full flag means we reached the trace_seq buffer
7000 * size and we should leave by partial output condition above.
7001 * One of the trace_seq_* functions is not used properly.
7002 */
7003 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7004 iter->ent->type);
7005 }
7006 trace_access_unlock(iter->cpu_file);
7007 trace_event_read_unlock();
7008
7009 /* Now copy what we have to the user */
7010 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7011 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
7012 trace_seq_init(&iter->seq);
7013
7014 /*
7015 * If there was nothing to send to user, in spite of consuming trace
7016 * entries, go back to wait for more entries.
7017 */
7018 if (sret == -EBUSY)
7019 goto waitagain;
7020
7021 out:
7022 mutex_unlock(&iter->mutex);
7023
7024 return sret;
7025 }
7026
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)7027 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7028 unsigned int idx)
7029 {
7030 __free_page(spd->pages[idx]);
7031 }
7032
7033 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)7034 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7035 {
7036 size_t count;
7037 int save_len;
7038 int ret;
7039
7040 /* Seq buffer is page-sized, exactly what we need. */
7041 for (;;) {
7042 save_len = iter->seq.seq.len;
7043 ret = print_trace_line(iter);
7044
7045 if (trace_seq_has_overflowed(&iter->seq)) {
7046 iter->seq.seq.len = save_len;
7047 break;
7048 }
7049
7050 /*
7051 * This should not be hit, because it should only
7052 * be set if the iter->seq overflowed. But check it
7053 * anyway to be safe.
7054 */
7055 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7056 iter->seq.seq.len = save_len;
7057 break;
7058 }
7059
7060 count = trace_seq_used(&iter->seq) - save_len;
7061 if (rem < count) {
7062 rem = 0;
7063 iter->seq.seq.len = save_len;
7064 break;
7065 }
7066
7067 if (ret != TRACE_TYPE_NO_CONSUME)
7068 trace_consume(iter);
7069 rem -= count;
7070 if (!trace_find_next_entry_inc(iter)) {
7071 rem = 0;
7072 iter->ent = NULL;
7073 break;
7074 }
7075 }
7076
7077 return rem;
7078 }
7079
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7080 static ssize_t tracing_splice_read_pipe(struct file *filp,
7081 loff_t *ppos,
7082 struct pipe_inode_info *pipe,
7083 size_t len,
7084 unsigned int flags)
7085 {
7086 struct page *pages_def[PIPE_DEF_BUFFERS];
7087 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7088 struct trace_iterator *iter = filp->private_data;
7089 struct splice_pipe_desc spd = {
7090 .pages = pages_def,
7091 .partial = partial_def,
7092 .nr_pages = 0, /* This gets updated below. */
7093 .nr_pages_max = PIPE_DEF_BUFFERS,
7094 .ops = &default_pipe_buf_ops,
7095 .spd_release = tracing_spd_release_pipe,
7096 };
7097 ssize_t ret;
7098 size_t rem;
7099 unsigned int i;
7100
7101 if (splice_grow_spd(pipe, &spd))
7102 return -ENOMEM;
7103
7104 mutex_lock(&iter->mutex);
7105
7106 if (iter->trace->splice_read) {
7107 ret = iter->trace->splice_read(iter, filp,
7108 ppos, pipe, len, flags);
7109 if (ret)
7110 goto out_err;
7111 }
7112
7113 ret = tracing_wait_pipe(filp);
7114 if (ret <= 0)
7115 goto out_err;
7116
7117 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7118 ret = -EFAULT;
7119 goto out_err;
7120 }
7121
7122 trace_event_read_lock();
7123 trace_access_lock(iter->cpu_file);
7124
7125 /* Fill as many pages as possible. */
7126 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7127 spd.pages[i] = alloc_page(GFP_KERNEL);
7128 if (!spd.pages[i])
7129 break;
7130
7131 rem = tracing_fill_pipe_page(rem, iter);
7132
7133 /* Copy the data into the page, so we can start over. */
7134 ret = trace_seq_to_buffer(&iter->seq,
7135 page_address(spd.pages[i]),
7136 trace_seq_used(&iter->seq));
7137 if (ret < 0) {
7138 __free_page(spd.pages[i]);
7139 break;
7140 }
7141 spd.partial[i].offset = 0;
7142 spd.partial[i].len = trace_seq_used(&iter->seq);
7143
7144 trace_seq_init(&iter->seq);
7145 }
7146
7147 trace_access_unlock(iter->cpu_file);
7148 trace_event_read_unlock();
7149 mutex_unlock(&iter->mutex);
7150
7151 spd.nr_pages = i;
7152
7153 if (i)
7154 ret = splice_to_pipe(pipe, &spd);
7155 else
7156 ret = 0;
7157 out:
7158 splice_shrink_spd(&spd);
7159 return ret;
7160
7161 out_err:
7162 mutex_unlock(&iter->mutex);
7163 goto out;
7164 }
7165
7166 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7167 tracing_entries_read(struct file *filp, char __user *ubuf,
7168 size_t cnt, loff_t *ppos)
7169 {
7170 struct inode *inode = file_inode(filp);
7171 struct trace_array *tr = inode->i_private;
7172 int cpu = tracing_get_cpu(inode);
7173 char buf[64];
7174 int r = 0;
7175 ssize_t ret;
7176
7177 mutex_lock(&trace_types_lock);
7178
7179 if (cpu == RING_BUFFER_ALL_CPUS) {
7180 int cpu, buf_size_same;
7181 unsigned long size;
7182
7183 size = 0;
7184 buf_size_same = 1;
7185 /* check if all cpu sizes are same */
7186 for_each_tracing_cpu(cpu) {
7187 /* fill in the size from first enabled cpu */
7188 if (size == 0)
7189 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7190 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7191 buf_size_same = 0;
7192 break;
7193 }
7194 }
7195
7196 if (buf_size_same) {
7197 if (!ring_buffer_expanded)
7198 r = sprintf(buf, "%lu (expanded: %lu)\n",
7199 size >> 10,
7200 trace_buf_size >> 10);
7201 else
7202 r = sprintf(buf, "%lu\n", size >> 10);
7203 } else
7204 r = sprintf(buf, "X\n");
7205 } else
7206 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7207
7208 mutex_unlock(&trace_types_lock);
7209
7210 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7211 return ret;
7212 }
7213
7214 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7215 tracing_entries_write(struct file *filp, const char __user *ubuf,
7216 size_t cnt, loff_t *ppos)
7217 {
7218 struct inode *inode = file_inode(filp);
7219 struct trace_array *tr = inode->i_private;
7220 unsigned long val;
7221 int ret;
7222
7223 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7224 if (ret)
7225 return ret;
7226
7227 /* must have at least 1 entry */
7228 if (!val)
7229 return -EINVAL;
7230
7231 /* value is in KB */
7232 val <<= 10;
7233 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7234 if (ret < 0)
7235 return ret;
7236
7237 *ppos += cnt;
7238
7239 return cnt;
7240 }
7241
7242 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7243 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7244 size_t cnt, loff_t *ppos)
7245 {
7246 struct trace_array *tr = filp->private_data;
7247 char buf[64];
7248 int r, cpu;
7249 unsigned long size = 0, expanded_size = 0;
7250
7251 mutex_lock(&trace_types_lock);
7252 for_each_tracing_cpu(cpu) {
7253 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7254 if (!ring_buffer_expanded)
7255 expanded_size += trace_buf_size >> 10;
7256 }
7257 if (ring_buffer_expanded)
7258 r = sprintf(buf, "%lu\n", size);
7259 else
7260 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7261 mutex_unlock(&trace_types_lock);
7262
7263 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7264 }
7265
7266 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7267 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7268 size_t cnt, loff_t *ppos)
7269 {
7270 /*
7271 * There is no need to read what the user has written, this function
7272 * is just to make sure that there is no error when "echo" is used
7273 */
7274
7275 *ppos += cnt;
7276
7277 return cnt;
7278 }
7279
7280 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7281 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7282 {
7283 struct trace_array *tr = inode->i_private;
7284
7285 /* disable tracing ? */
7286 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7287 tracer_tracing_off(tr);
7288 /* resize the ring buffer to 0 */
7289 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7290
7291 trace_array_put(tr);
7292
7293 return 0;
7294 }
7295
7296 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7297 tracing_mark_write(struct file *filp, const char __user *ubuf,
7298 size_t cnt, loff_t *fpos)
7299 {
7300 struct trace_array *tr = filp->private_data;
7301 struct ring_buffer_event *event;
7302 enum event_trigger_type tt = ETT_NONE;
7303 struct trace_buffer *buffer;
7304 struct print_entry *entry;
7305 ssize_t written;
7306 int size;
7307 int len;
7308
7309 /* Used in tracing_mark_raw_write() as well */
7310 #define FAULTED_STR "<faulted>"
7311 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7312
7313 if (tracing_disabled)
7314 return -EINVAL;
7315
7316 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7317 return -EINVAL;
7318
7319 if (cnt > TRACE_BUF_SIZE)
7320 cnt = TRACE_BUF_SIZE;
7321
7322 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7323
7324 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7325
7326 /* If less than "<faulted>", then make sure we can still add that */
7327 if (cnt < FAULTED_SIZE)
7328 size += FAULTED_SIZE - cnt;
7329
7330 buffer = tr->array_buffer.buffer;
7331 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7332 tracing_gen_ctx());
7333 if (unlikely(!event))
7334 /* Ring buffer disabled, return as if not open for write */
7335 return -EBADF;
7336
7337 entry = ring_buffer_event_data(event);
7338 entry->ip = _THIS_IP_;
7339
7340 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7341 if (len) {
7342 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7343 cnt = FAULTED_SIZE;
7344 written = -EFAULT;
7345 } else
7346 written = cnt;
7347
7348 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7349 /* do not add \n before testing triggers, but add \0 */
7350 entry->buf[cnt] = '\0';
7351 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7352 }
7353
7354 if (entry->buf[cnt - 1] != '\n') {
7355 entry->buf[cnt] = '\n';
7356 entry->buf[cnt + 1] = '\0';
7357 } else
7358 entry->buf[cnt] = '\0';
7359
7360 if (static_branch_unlikely(&trace_marker_exports_enabled))
7361 ftrace_exports(event, TRACE_EXPORT_MARKER);
7362 __buffer_unlock_commit(buffer, event);
7363
7364 if (tt)
7365 event_triggers_post_call(tr->trace_marker_file, tt);
7366
7367 return written;
7368 }
7369
7370 /* Limit it for now to 3K (including tag) */
7371 #define RAW_DATA_MAX_SIZE (1024*3)
7372
7373 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7374 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7375 size_t cnt, loff_t *fpos)
7376 {
7377 struct trace_array *tr = filp->private_data;
7378 struct ring_buffer_event *event;
7379 struct trace_buffer *buffer;
7380 struct raw_data_entry *entry;
7381 ssize_t written;
7382 int size;
7383 int len;
7384
7385 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7386
7387 if (tracing_disabled)
7388 return -EINVAL;
7389
7390 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7391 return -EINVAL;
7392
7393 /* The marker must at least have a tag id */
7394 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7395 return -EINVAL;
7396
7397 if (cnt > TRACE_BUF_SIZE)
7398 cnt = TRACE_BUF_SIZE;
7399
7400 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7401
7402 size = sizeof(*entry) + cnt;
7403 if (cnt < FAULT_SIZE_ID)
7404 size += FAULT_SIZE_ID - cnt;
7405
7406 buffer = tr->array_buffer.buffer;
7407 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7408 tracing_gen_ctx());
7409 if (!event)
7410 /* Ring buffer disabled, return as if not open for write */
7411 return -EBADF;
7412
7413 entry = ring_buffer_event_data(event);
7414
7415 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7416 if (len) {
7417 entry->id = -1;
7418 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7419 written = -EFAULT;
7420 } else
7421 written = cnt;
7422
7423 __buffer_unlock_commit(buffer, event);
7424
7425 return written;
7426 }
7427
tracing_clock_show(struct seq_file * m,void * v)7428 static int tracing_clock_show(struct seq_file *m, void *v)
7429 {
7430 struct trace_array *tr = m->private;
7431 int i;
7432
7433 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7434 seq_printf(m,
7435 "%s%s%s%s", i ? " " : "",
7436 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7437 i == tr->clock_id ? "]" : "");
7438 seq_putc(m, '\n');
7439
7440 return 0;
7441 }
7442
tracing_set_clock(struct trace_array * tr,const char * clockstr)7443 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7444 {
7445 int i;
7446
7447 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7448 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7449 break;
7450 }
7451 if (i == ARRAY_SIZE(trace_clocks))
7452 return -EINVAL;
7453
7454 mutex_lock(&trace_types_lock);
7455
7456 tr->clock_id = i;
7457
7458 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7459
7460 /*
7461 * New clock may not be consistent with the previous clock.
7462 * Reset the buffer so that it doesn't have incomparable timestamps.
7463 */
7464 tracing_reset_online_cpus(&tr->array_buffer);
7465
7466 #ifdef CONFIG_TRACER_MAX_TRACE
7467 if (tr->max_buffer.buffer)
7468 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7469 tracing_reset_online_cpus(&tr->max_buffer);
7470 #endif
7471
7472 mutex_unlock(&trace_types_lock);
7473
7474 return 0;
7475 }
7476
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7477 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7478 size_t cnt, loff_t *fpos)
7479 {
7480 struct seq_file *m = filp->private_data;
7481 struct trace_array *tr = m->private;
7482 char buf[64];
7483 const char *clockstr;
7484 int ret;
7485
7486 if (cnt >= sizeof(buf))
7487 return -EINVAL;
7488
7489 if (copy_from_user(buf, ubuf, cnt))
7490 return -EFAULT;
7491
7492 buf[cnt] = 0;
7493
7494 clockstr = strstrip(buf);
7495
7496 ret = tracing_set_clock(tr, clockstr);
7497 if (ret)
7498 return ret;
7499
7500 *fpos += cnt;
7501
7502 return cnt;
7503 }
7504
tracing_clock_open(struct inode * inode,struct file * file)7505 static int tracing_clock_open(struct inode *inode, struct file *file)
7506 {
7507 struct trace_array *tr = inode->i_private;
7508 int ret;
7509
7510 ret = tracing_check_open_get_tr(tr);
7511 if (ret)
7512 return ret;
7513
7514 ret = single_open(file, tracing_clock_show, inode->i_private);
7515 if (ret < 0)
7516 trace_array_put(tr);
7517
7518 return ret;
7519 }
7520
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7521 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7522 {
7523 struct trace_array *tr = m->private;
7524
7525 mutex_lock(&trace_types_lock);
7526
7527 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7528 seq_puts(m, "delta [absolute]\n");
7529 else
7530 seq_puts(m, "[delta] absolute\n");
7531
7532 mutex_unlock(&trace_types_lock);
7533
7534 return 0;
7535 }
7536
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7537 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7538 {
7539 struct trace_array *tr = inode->i_private;
7540 int ret;
7541
7542 ret = tracing_check_open_get_tr(tr);
7543 if (ret)
7544 return ret;
7545
7546 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7547 if (ret < 0)
7548 trace_array_put(tr);
7549
7550 return ret;
7551 }
7552
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7553 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7554 {
7555 if (rbe == this_cpu_read(trace_buffered_event))
7556 return ring_buffer_time_stamp(buffer);
7557
7558 return ring_buffer_event_time_stamp(buffer, rbe);
7559 }
7560
7561 /*
7562 * Set or disable using the per CPU trace_buffer_event when possible.
7563 */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7564 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7565 {
7566 int ret = 0;
7567
7568 mutex_lock(&trace_types_lock);
7569
7570 if (set && tr->no_filter_buffering_ref++)
7571 goto out;
7572
7573 if (!set) {
7574 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7575 ret = -EINVAL;
7576 goto out;
7577 }
7578
7579 --tr->no_filter_buffering_ref;
7580 }
7581 out:
7582 mutex_unlock(&trace_types_lock);
7583
7584 return ret;
7585 }
7586
7587 struct ftrace_buffer_info {
7588 struct trace_iterator iter;
7589 void *spare;
7590 unsigned int spare_cpu;
7591 unsigned int read;
7592 };
7593
7594 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7595 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7596 {
7597 struct trace_array *tr = inode->i_private;
7598 struct trace_iterator *iter;
7599 struct seq_file *m;
7600 int ret;
7601
7602 ret = tracing_check_open_get_tr(tr);
7603 if (ret)
7604 return ret;
7605
7606 if (file->f_mode & FMODE_READ) {
7607 iter = __tracing_open(inode, file, true);
7608 if (IS_ERR(iter))
7609 ret = PTR_ERR(iter);
7610 } else {
7611 /* Writes still need the seq_file to hold the private data */
7612 ret = -ENOMEM;
7613 m = kzalloc(sizeof(*m), GFP_KERNEL);
7614 if (!m)
7615 goto out;
7616 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7617 if (!iter) {
7618 kfree(m);
7619 goto out;
7620 }
7621 ret = 0;
7622
7623 iter->tr = tr;
7624 iter->array_buffer = &tr->max_buffer;
7625 iter->cpu_file = tracing_get_cpu(inode);
7626 m->private = iter;
7627 file->private_data = m;
7628 }
7629 out:
7630 if (ret < 0)
7631 trace_array_put(tr);
7632
7633 return ret;
7634 }
7635
tracing_swap_cpu_buffer(void * tr)7636 static void tracing_swap_cpu_buffer(void *tr)
7637 {
7638 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7639 }
7640
7641 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7642 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7643 loff_t *ppos)
7644 {
7645 struct seq_file *m = filp->private_data;
7646 struct trace_iterator *iter = m->private;
7647 struct trace_array *tr = iter->tr;
7648 unsigned long val;
7649 int ret;
7650
7651 ret = tracing_update_buffers();
7652 if (ret < 0)
7653 return ret;
7654
7655 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7656 if (ret)
7657 return ret;
7658
7659 mutex_lock(&trace_types_lock);
7660
7661 if (tr->current_trace->use_max_tr) {
7662 ret = -EBUSY;
7663 goto out;
7664 }
7665
7666 local_irq_disable();
7667 arch_spin_lock(&tr->max_lock);
7668 if (tr->cond_snapshot)
7669 ret = -EBUSY;
7670 arch_spin_unlock(&tr->max_lock);
7671 local_irq_enable();
7672 if (ret)
7673 goto out;
7674
7675 switch (val) {
7676 case 0:
7677 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7678 ret = -EINVAL;
7679 break;
7680 }
7681 if (tr->allocated_snapshot)
7682 free_snapshot(tr);
7683 break;
7684 case 1:
7685 /* Only allow per-cpu swap if the ring buffer supports it */
7686 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7687 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7688 ret = -EINVAL;
7689 break;
7690 }
7691 #endif
7692 if (tr->allocated_snapshot)
7693 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7694 &tr->array_buffer, iter->cpu_file);
7695 else
7696 ret = tracing_alloc_snapshot_instance(tr);
7697 if (ret < 0)
7698 break;
7699 /* Now, we're going to swap */
7700 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7701 local_irq_disable();
7702 update_max_tr(tr, current, smp_processor_id(), NULL);
7703 local_irq_enable();
7704 } else {
7705 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7706 (void *)tr, 1);
7707 }
7708 break;
7709 default:
7710 if (tr->allocated_snapshot) {
7711 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7712 tracing_reset_online_cpus(&tr->max_buffer);
7713 else
7714 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7715 }
7716 break;
7717 }
7718
7719 if (ret >= 0) {
7720 *ppos += cnt;
7721 ret = cnt;
7722 }
7723 out:
7724 mutex_unlock(&trace_types_lock);
7725 return ret;
7726 }
7727
tracing_snapshot_release(struct inode * inode,struct file * file)7728 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7729 {
7730 struct seq_file *m = file->private_data;
7731 int ret;
7732
7733 ret = tracing_release(inode, file);
7734
7735 if (file->f_mode & FMODE_READ)
7736 return ret;
7737
7738 /* If write only, the seq_file is just a stub */
7739 if (m)
7740 kfree(m->private);
7741 kfree(m);
7742
7743 return 0;
7744 }
7745
7746 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7747 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7748 size_t count, loff_t *ppos);
7749 static int tracing_buffers_release(struct inode *inode, struct file *file);
7750 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7751 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7752
snapshot_raw_open(struct inode * inode,struct file * filp)7753 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7754 {
7755 struct ftrace_buffer_info *info;
7756 int ret;
7757
7758 /* The following checks for tracefs lockdown */
7759 ret = tracing_buffers_open(inode, filp);
7760 if (ret < 0)
7761 return ret;
7762
7763 info = filp->private_data;
7764
7765 if (info->iter.trace->use_max_tr) {
7766 tracing_buffers_release(inode, filp);
7767 return -EBUSY;
7768 }
7769
7770 info->iter.snapshot = true;
7771 info->iter.array_buffer = &info->iter.tr->max_buffer;
7772
7773 return ret;
7774 }
7775
7776 #endif /* CONFIG_TRACER_SNAPSHOT */
7777
7778
7779 static const struct file_operations tracing_thresh_fops = {
7780 .open = tracing_open_generic,
7781 .read = tracing_thresh_read,
7782 .write = tracing_thresh_write,
7783 .llseek = generic_file_llseek,
7784 };
7785
7786 #ifdef CONFIG_TRACER_MAX_TRACE
7787 static const struct file_operations tracing_max_lat_fops = {
7788 .open = tracing_open_generic_tr,
7789 .read = tracing_max_lat_read,
7790 .write = tracing_max_lat_write,
7791 .llseek = generic_file_llseek,
7792 .release = tracing_release_generic_tr,
7793 };
7794 #endif
7795
7796 static const struct file_operations set_tracer_fops = {
7797 .open = tracing_open_generic_tr,
7798 .read = tracing_set_trace_read,
7799 .write = tracing_set_trace_write,
7800 .llseek = generic_file_llseek,
7801 .release = tracing_release_generic_tr,
7802 };
7803
7804 static const struct file_operations tracing_pipe_fops = {
7805 .open = tracing_open_pipe,
7806 .poll = tracing_poll_pipe,
7807 .read = tracing_read_pipe,
7808 .splice_read = tracing_splice_read_pipe,
7809 .release = tracing_release_pipe,
7810 .llseek = no_llseek,
7811 };
7812
7813 static const struct file_operations tracing_entries_fops = {
7814 .open = tracing_open_generic_tr,
7815 .read = tracing_entries_read,
7816 .write = tracing_entries_write,
7817 .llseek = generic_file_llseek,
7818 .release = tracing_release_generic_tr,
7819 };
7820
7821 static const struct file_operations tracing_total_entries_fops = {
7822 .open = tracing_open_generic_tr,
7823 .read = tracing_total_entries_read,
7824 .llseek = generic_file_llseek,
7825 .release = tracing_release_generic_tr,
7826 };
7827
7828 static const struct file_operations tracing_free_buffer_fops = {
7829 .open = tracing_open_generic_tr,
7830 .write = tracing_free_buffer_write,
7831 .release = tracing_free_buffer_release,
7832 };
7833
7834 static const struct file_operations tracing_mark_fops = {
7835 .open = tracing_mark_open,
7836 .write = tracing_mark_write,
7837 .release = tracing_release_generic_tr,
7838 };
7839
7840 static const struct file_operations tracing_mark_raw_fops = {
7841 .open = tracing_mark_open,
7842 .write = tracing_mark_raw_write,
7843 .release = tracing_release_generic_tr,
7844 };
7845
7846 static const struct file_operations trace_clock_fops = {
7847 .open = tracing_clock_open,
7848 .read = seq_read,
7849 .llseek = seq_lseek,
7850 .release = tracing_single_release_tr,
7851 .write = tracing_clock_write,
7852 };
7853
7854 static const struct file_operations trace_time_stamp_mode_fops = {
7855 .open = tracing_time_stamp_mode_open,
7856 .read = seq_read,
7857 .llseek = seq_lseek,
7858 .release = tracing_single_release_tr,
7859 };
7860
7861 #ifdef CONFIG_TRACER_SNAPSHOT
7862 static const struct file_operations snapshot_fops = {
7863 .open = tracing_snapshot_open,
7864 .read = seq_read,
7865 .write = tracing_snapshot_write,
7866 .llseek = tracing_lseek,
7867 .release = tracing_snapshot_release,
7868 };
7869
7870 static const struct file_operations snapshot_raw_fops = {
7871 .open = snapshot_raw_open,
7872 .read = tracing_buffers_read,
7873 .release = tracing_buffers_release,
7874 .splice_read = tracing_buffers_splice_read,
7875 .llseek = no_llseek,
7876 };
7877
7878 #endif /* CONFIG_TRACER_SNAPSHOT */
7879
7880 /*
7881 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7882 * @filp: The active open file structure
7883 * @ubuf: The userspace provided buffer to read value into
7884 * @cnt: The maximum number of bytes to read
7885 * @ppos: The current "file" position
7886 *
7887 * This function implements the write interface for a struct trace_min_max_param.
7888 * The filp->private_data must point to a trace_min_max_param structure that
7889 * defines where to write the value, the min and the max acceptable values,
7890 * and a lock to protect the write.
7891 */
7892 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7893 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7894 {
7895 struct trace_min_max_param *param = filp->private_data;
7896 u64 val;
7897 int err;
7898
7899 if (!param)
7900 return -EFAULT;
7901
7902 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7903 if (err)
7904 return err;
7905
7906 if (param->lock)
7907 mutex_lock(param->lock);
7908
7909 if (param->min && val < *param->min)
7910 err = -EINVAL;
7911
7912 if (param->max && val > *param->max)
7913 err = -EINVAL;
7914
7915 if (!err)
7916 *param->val = val;
7917
7918 if (param->lock)
7919 mutex_unlock(param->lock);
7920
7921 if (err)
7922 return err;
7923
7924 return cnt;
7925 }
7926
7927 /*
7928 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7929 * @filp: The active open file structure
7930 * @ubuf: The userspace provided buffer to read value into
7931 * @cnt: The maximum number of bytes to read
7932 * @ppos: The current "file" position
7933 *
7934 * This function implements the read interface for a struct trace_min_max_param.
7935 * The filp->private_data must point to a trace_min_max_param struct with valid
7936 * data.
7937 */
7938 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7939 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7940 {
7941 struct trace_min_max_param *param = filp->private_data;
7942 char buf[U64_STR_SIZE];
7943 int len;
7944 u64 val;
7945
7946 if (!param)
7947 return -EFAULT;
7948
7949 val = *param->val;
7950
7951 if (cnt > sizeof(buf))
7952 cnt = sizeof(buf);
7953
7954 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7955
7956 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7957 }
7958
7959 const struct file_operations trace_min_max_fops = {
7960 .open = tracing_open_generic,
7961 .read = trace_min_max_read,
7962 .write = trace_min_max_write,
7963 };
7964
7965 #define TRACING_LOG_ERRS_MAX 8
7966 #define TRACING_LOG_LOC_MAX 128
7967
7968 #define CMD_PREFIX " Command: "
7969
7970 struct err_info {
7971 const char **errs; /* ptr to loc-specific array of err strings */
7972 u8 type; /* index into errs -> specific err string */
7973 u16 pos; /* caret position */
7974 u64 ts;
7975 };
7976
7977 struct tracing_log_err {
7978 struct list_head list;
7979 struct err_info info;
7980 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7981 char *cmd; /* what caused err */
7982 };
7983
7984 static DEFINE_MUTEX(tracing_err_log_lock);
7985
alloc_tracing_log_err(int len)7986 static struct tracing_log_err *alloc_tracing_log_err(int len)
7987 {
7988 struct tracing_log_err *err;
7989
7990 err = kzalloc(sizeof(*err), GFP_KERNEL);
7991 if (!err)
7992 return ERR_PTR(-ENOMEM);
7993
7994 err->cmd = kzalloc(len, GFP_KERNEL);
7995 if (!err->cmd) {
7996 kfree(err);
7997 return ERR_PTR(-ENOMEM);
7998 }
7999
8000 return err;
8001 }
8002
free_tracing_log_err(struct tracing_log_err * err)8003 static void free_tracing_log_err(struct tracing_log_err *err)
8004 {
8005 kfree(err->cmd);
8006 kfree(err);
8007 }
8008
get_tracing_log_err(struct trace_array * tr,int len)8009 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8010 int len)
8011 {
8012 struct tracing_log_err *err;
8013 char *cmd;
8014
8015 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8016 err = alloc_tracing_log_err(len);
8017 if (PTR_ERR(err) != -ENOMEM)
8018 tr->n_err_log_entries++;
8019
8020 return err;
8021 }
8022 cmd = kzalloc(len, GFP_KERNEL);
8023 if (!cmd)
8024 return ERR_PTR(-ENOMEM);
8025 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8026 kfree(err->cmd);
8027 err->cmd = cmd;
8028 list_del(&err->list);
8029
8030 return err;
8031 }
8032
8033 /**
8034 * err_pos - find the position of a string within a command for error careting
8035 * @cmd: The tracing command that caused the error
8036 * @str: The string to position the caret at within @cmd
8037 *
8038 * Finds the position of the first occurrence of @str within @cmd. The
8039 * return value can be passed to tracing_log_err() for caret placement
8040 * within @cmd.
8041 *
8042 * Returns the index within @cmd of the first occurrence of @str or 0
8043 * if @str was not found.
8044 */
err_pos(char * cmd,const char * str)8045 unsigned int err_pos(char *cmd, const char *str)
8046 {
8047 char *found;
8048
8049 if (WARN_ON(!strlen(cmd)))
8050 return 0;
8051
8052 found = strstr(cmd, str);
8053 if (found)
8054 return found - cmd;
8055
8056 return 0;
8057 }
8058
8059 /**
8060 * tracing_log_err - write an error to the tracing error log
8061 * @tr: The associated trace array for the error (NULL for top level array)
8062 * @loc: A string describing where the error occurred
8063 * @cmd: The tracing command that caused the error
8064 * @errs: The array of loc-specific static error strings
8065 * @type: The index into errs[], which produces the specific static err string
8066 * @pos: The position the caret should be placed in the cmd
8067 *
8068 * Writes an error into tracing/error_log of the form:
8069 *
8070 * <loc>: error: <text>
8071 * Command: <cmd>
8072 * ^
8073 *
8074 * tracing/error_log is a small log file containing the last
8075 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
8076 * unless there has been a tracing error, and the error log can be
8077 * cleared and have its memory freed by writing the empty string in
8078 * truncation mode to it i.e. echo > tracing/error_log.
8079 *
8080 * NOTE: the @errs array along with the @type param are used to
8081 * produce a static error string - this string is not copied and saved
8082 * when the error is logged - only a pointer to it is saved. See
8083 * existing callers for examples of how static strings are typically
8084 * defined for use with tracing_log_err().
8085 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8086 void tracing_log_err(struct trace_array *tr,
8087 const char *loc, const char *cmd,
8088 const char **errs, u8 type, u16 pos)
8089 {
8090 struct tracing_log_err *err;
8091 int len = 0;
8092
8093 if (!tr)
8094 tr = &global_trace;
8095
8096 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8097
8098 mutex_lock(&tracing_err_log_lock);
8099 err = get_tracing_log_err(tr, len);
8100 if (PTR_ERR(err) == -ENOMEM) {
8101 mutex_unlock(&tracing_err_log_lock);
8102 return;
8103 }
8104
8105 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8106 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8107
8108 err->info.errs = errs;
8109 err->info.type = type;
8110 err->info.pos = pos;
8111 err->info.ts = local_clock();
8112
8113 list_add_tail(&err->list, &tr->err_log);
8114 mutex_unlock(&tracing_err_log_lock);
8115 }
8116
clear_tracing_err_log(struct trace_array * tr)8117 static void clear_tracing_err_log(struct trace_array *tr)
8118 {
8119 struct tracing_log_err *err, *next;
8120
8121 mutex_lock(&tracing_err_log_lock);
8122 list_for_each_entry_safe(err, next, &tr->err_log, list) {
8123 list_del(&err->list);
8124 free_tracing_log_err(err);
8125 }
8126
8127 tr->n_err_log_entries = 0;
8128 mutex_unlock(&tracing_err_log_lock);
8129 }
8130
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8131 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8132 {
8133 struct trace_array *tr = m->private;
8134
8135 mutex_lock(&tracing_err_log_lock);
8136
8137 return seq_list_start(&tr->err_log, *pos);
8138 }
8139
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8140 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8141 {
8142 struct trace_array *tr = m->private;
8143
8144 return seq_list_next(v, &tr->err_log, pos);
8145 }
8146
tracing_err_log_seq_stop(struct seq_file * m,void * v)8147 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8148 {
8149 mutex_unlock(&tracing_err_log_lock);
8150 }
8151
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8152 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8153 {
8154 u16 i;
8155
8156 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8157 seq_putc(m, ' ');
8158 for (i = 0; i < pos; i++)
8159 seq_putc(m, ' ');
8160 seq_puts(m, "^\n");
8161 }
8162
tracing_err_log_seq_show(struct seq_file * m,void * v)8163 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8164 {
8165 struct tracing_log_err *err = v;
8166
8167 if (err) {
8168 const char *err_text = err->info.errs[err->info.type];
8169 u64 sec = err->info.ts;
8170 u32 nsec;
8171
8172 nsec = do_div(sec, NSEC_PER_SEC);
8173 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8174 err->loc, err_text);
8175 seq_printf(m, "%s", err->cmd);
8176 tracing_err_log_show_pos(m, err->info.pos);
8177 }
8178
8179 return 0;
8180 }
8181
8182 static const struct seq_operations tracing_err_log_seq_ops = {
8183 .start = tracing_err_log_seq_start,
8184 .next = tracing_err_log_seq_next,
8185 .stop = tracing_err_log_seq_stop,
8186 .show = tracing_err_log_seq_show
8187 };
8188
tracing_err_log_open(struct inode * inode,struct file * file)8189 static int tracing_err_log_open(struct inode *inode, struct file *file)
8190 {
8191 struct trace_array *tr = inode->i_private;
8192 int ret = 0;
8193
8194 ret = tracing_check_open_get_tr(tr);
8195 if (ret)
8196 return ret;
8197
8198 /* If this file was opened for write, then erase contents */
8199 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8200 clear_tracing_err_log(tr);
8201
8202 if (file->f_mode & FMODE_READ) {
8203 ret = seq_open(file, &tracing_err_log_seq_ops);
8204 if (!ret) {
8205 struct seq_file *m = file->private_data;
8206 m->private = tr;
8207 } else {
8208 trace_array_put(tr);
8209 }
8210 }
8211 return ret;
8212 }
8213
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8214 static ssize_t tracing_err_log_write(struct file *file,
8215 const char __user *buffer,
8216 size_t count, loff_t *ppos)
8217 {
8218 return count;
8219 }
8220
tracing_err_log_release(struct inode * inode,struct file * file)8221 static int tracing_err_log_release(struct inode *inode, struct file *file)
8222 {
8223 struct trace_array *tr = inode->i_private;
8224
8225 trace_array_put(tr);
8226
8227 if (file->f_mode & FMODE_READ)
8228 seq_release(inode, file);
8229
8230 return 0;
8231 }
8232
8233 static const struct file_operations tracing_err_log_fops = {
8234 .open = tracing_err_log_open,
8235 .write = tracing_err_log_write,
8236 .read = seq_read,
8237 .llseek = tracing_lseek,
8238 .release = tracing_err_log_release,
8239 };
8240
tracing_buffers_open(struct inode * inode,struct file * filp)8241 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8242 {
8243 struct trace_array *tr = inode->i_private;
8244 struct ftrace_buffer_info *info;
8245 int ret;
8246
8247 ret = tracing_check_open_get_tr(tr);
8248 if (ret)
8249 return ret;
8250
8251 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8252 if (!info) {
8253 trace_array_put(tr);
8254 return -ENOMEM;
8255 }
8256
8257 mutex_lock(&trace_types_lock);
8258
8259 info->iter.tr = tr;
8260 info->iter.cpu_file = tracing_get_cpu(inode);
8261 info->iter.trace = tr->current_trace;
8262 info->iter.array_buffer = &tr->array_buffer;
8263 info->spare = NULL;
8264 /* Force reading ring buffer for first read */
8265 info->read = (unsigned int)-1;
8266
8267 filp->private_data = info;
8268
8269 tr->trace_ref++;
8270
8271 mutex_unlock(&trace_types_lock);
8272
8273 ret = nonseekable_open(inode, filp);
8274 if (ret < 0)
8275 trace_array_put(tr);
8276
8277 return ret;
8278 }
8279
8280 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8281 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8282 {
8283 struct ftrace_buffer_info *info = filp->private_data;
8284 struct trace_iterator *iter = &info->iter;
8285
8286 return trace_poll(iter, filp, poll_table);
8287 }
8288
8289 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8290 tracing_buffers_read(struct file *filp, char __user *ubuf,
8291 size_t count, loff_t *ppos)
8292 {
8293 struct ftrace_buffer_info *info = filp->private_data;
8294 struct trace_iterator *iter = &info->iter;
8295 ssize_t ret = 0;
8296 ssize_t size;
8297
8298 if (!count)
8299 return 0;
8300
8301 #ifdef CONFIG_TRACER_MAX_TRACE
8302 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8303 return -EBUSY;
8304 #endif
8305
8306 if (!info->spare) {
8307 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8308 iter->cpu_file);
8309 if (IS_ERR(info->spare)) {
8310 ret = PTR_ERR(info->spare);
8311 info->spare = NULL;
8312 } else {
8313 info->spare_cpu = iter->cpu_file;
8314 }
8315 }
8316 if (!info->spare)
8317 return ret;
8318
8319 /* Do we have previous read data to read? */
8320 if (info->read < PAGE_SIZE)
8321 goto read;
8322
8323 again:
8324 trace_access_lock(iter->cpu_file);
8325 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8326 &info->spare,
8327 count,
8328 iter->cpu_file, 0);
8329 trace_access_unlock(iter->cpu_file);
8330
8331 if (ret < 0) {
8332 if (trace_empty(iter)) {
8333 if ((filp->f_flags & O_NONBLOCK))
8334 return -EAGAIN;
8335
8336 ret = wait_on_pipe(iter, 0);
8337 if (ret)
8338 return ret;
8339
8340 goto again;
8341 }
8342 return 0;
8343 }
8344
8345 info->read = 0;
8346 read:
8347 size = PAGE_SIZE - info->read;
8348 if (size > count)
8349 size = count;
8350
8351 ret = copy_to_user(ubuf, info->spare + info->read, size);
8352 if (ret == size)
8353 return -EFAULT;
8354
8355 size -= ret;
8356
8357 *ppos += size;
8358 info->read += size;
8359
8360 return size;
8361 }
8362
tracing_buffers_flush(struct file * file,fl_owner_t id)8363 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8364 {
8365 struct ftrace_buffer_info *info = file->private_data;
8366 struct trace_iterator *iter = &info->iter;
8367
8368 iter->wait_index++;
8369 /* Make sure the waiters see the new wait_index */
8370 smp_wmb();
8371
8372 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8373
8374 return 0;
8375 }
8376
tracing_buffers_release(struct inode * inode,struct file * file)8377 static int tracing_buffers_release(struct inode *inode, struct file *file)
8378 {
8379 struct ftrace_buffer_info *info = file->private_data;
8380 struct trace_iterator *iter = &info->iter;
8381
8382 mutex_lock(&trace_types_lock);
8383
8384 iter->tr->trace_ref--;
8385
8386 __trace_array_put(iter->tr);
8387
8388 if (info->spare)
8389 ring_buffer_free_read_page(iter->array_buffer->buffer,
8390 info->spare_cpu, info->spare);
8391 kvfree(info);
8392
8393 mutex_unlock(&trace_types_lock);
8394
8395 return 0;
8396 }
8397
8398 struct buffer_ref {
8399 struct trace_buffer *buffer;
8400 void *page;
8401 int cpu;
8402 refcount_t refcount;
8403 };
8404
buffer_ref_release(struct buffer_ref * ref)8405 static void buffer_ref_release(struct buffer_ref *ref)
8406 {
8407 if (!refcount_dec_and_test(&ref->refcount))
8408 return;
8409 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8410 kfree(ref);
8411 }
8412
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8413 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8414 struct pipe_buffer *buf)
8415 {
8416 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8417
8418 buffer_ref_release(ref);
8419 buf->private = 0;
8420 }
8421
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8422 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8423 struct pipe_buffer *buf)
8424 {
8425 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8426
8427 if (refcount_read(&ref->refcount) > INT_MAX/2)
8428 return false;
8429
8430 refcount_inc(&ref->refcount);
8431 return true;
8432 }
8433
8434 /* Pipe buffer operations for a buffer. */
8435 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8436 .release = buffer_pipe_buf_release,
8437 .get = buffer_pipe_buf_get,
8438 };
8439
8440 /*
8441 * Callback from splice_to_pipe(), if we need to release some pages
8442 * at the end of the spd in case we error'ed out in filling the pipe.
8443 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8444 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8445 {
8446 struct buffer_ref *ref =
8447 (struct buffer_ref *)spd->partial[i].private;
8448
8449 buffer_ref_release(ref);
8450 spd->partial[i].private = 0;
8451 }
8452
8453 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8454 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8455 struct pipe_inode_info *pipe, size_t len,
8456 unsigned int flags)
8457 {
8458 struct ftrace_buffer_info *info = file->private_data;
8459 struct trace_iterator *iter = &info->iter;
8460 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8461 struct page *pages_def[PIPE_DEF_BUFFERS];
8462 struct splice_pipe_desc spd = {
8463 .pages = pages_def,
8464 .partial = partial_def,
8465 .nr_pages_max = PIPE_DEF_BUFFERS,
8466 .ops = &buffer_pipe_buf_ops,
8467 .spd_release = buffer_spd_release,
8468 };
8469 struct buffer_ref *ref;
8470 int entries, i;
8471 ssize_t ret = 0;
8472
8473 #ifdef CONFIG_TRACER_MAX_TRACE
8474 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8475 return -EBUSY;
8476 #endif
8477
8478 if (*ppos & (PAGE_SIZE - 1))
8479 return -EINVAL;
8480
8481 if (len & (PAGE_SIZE - 1)) {
8482 if (len < PAGE_SIZE)
8483 return -EINVAL;
8484 len &= PAGE_MASK;
8485 }
8486
8487 if (splice_grow_spd(pipe, &spd))
8488 return -ENOMEM;
8489
8490 again:
8491 trace_access_lock(iter->cpu_file);
8492 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8493
8494 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8495 struct page *page;
8496 int r;
8497
8498 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8499 if (!ref) {
8500 ret = -ENOMEM;
8501 break;
8502 }
8503
8504 refcount_set(&ref->refcount, 1);
8505 ref->buffer = iter->array_buffer->buffer;
8506 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8507 if (IS_ERR(ref->page)) {
8508 ret = PTR_ERR(ref->page);
8509 ref->page = NULL;
8510 kfree(ref);
8511 break;
8512 }
8513 ref->cpu = iter->cpu_file;
8514
8515 r = ring_buffer_read_page(ref->buffer, &ref->page,
8516 len, iter->cpu_file, 1);
8517 if (r < 0) {
8518 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8519 ref->page);
8520 kfree(ref);
8521 break;
8522 }
8523
8524 page = virt_to_page(ref->page);
8525
8526 spd.pages[i] = page;
8527 spd.partial[i].len = PAGE_SIZE;
8528 spd.partial[i].offset = 0;
8529 spd.partial[i].private = (unsigned long)ref;
8530 spd.nr_pages++;
8531 *ppos += PAGE_SIZE;
8532
8533 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8534 }
8535
8536 trace_access_unlock(iter->cpu_file);
8537 spd.nr_pages = i;
8538
8539 /* did we read anything? */
8540 if (!spd.nr_pages) {
8541 long wait_index;
8542
8543 if (ret)
8544 goto out;
8545
8546 ret = -EAGAIN;
8547 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8548 goto out;
8549
8550 wait_index = READ_ONCE(iter->wait_index);
8551
8552 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8553 if (ret)
8554 goto out;
8555
8556 /* No need to wait after waking up when tracing is off */
8557 if (!tracer_tracing_is_on(iter->tr))
8558 goto out;
8559
8560 /* Make sure we see the new wait_index */
8561 smp_rmb();
8562 if (wait_index != iter->wait_index)
8563 goto out;
8564
8565 goto again;
8566 }
8567
8568 ret = splice_to_pipe(pipe, &spd);
8569 out:
8570 splice_shrink_spd(&spd);
8571
8572 return ret;
8573 }
8574
8575 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8576 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8577 {
8578 struct ftrace_buffer_info *info = file->private_data;
8579 struct trace_iterator *iter = &info->iter;
8580
8581 if (cmd)
8582 return -ENOIOCTLCMD;
8583
8584 mutex_lock(&trace_types_lock);
8585
8586 iter->wait_index++;
8587 /* Make sure the waiters see the new wait_index */
8588 smp_wmb();
8589
8590 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8591
8592 mutex_unlock(&trace_types_lock);
8593 return 0;
8594 }
8595
8596 static const struct file_operations tracing_buffers_fops = {
8597 .open = tracing_buffers_open,
8598 .read = tracing_buffers_read,
8599 .poll = tracing_buffers_poll,
8600 .release = tracing_buffers_release,
8601 .flush = tracing_buffers_flush,
8602 .splice_read = tracing_buffers_splice_read,
8603 .unlocked_ioctl = tracing_buffers_ioctl,
8604 .llseek = no_llseek,
8605 };
8606
8607 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8608 tracing_stats_read(struct file *filp, char __user *ubuf,
8609 size_t count, loff_t *ppos)
8610 {
8611 struct inode *inode = file_inode(filp);
8612 struct trace_array *tr = inode->i_private;
8613 struct array_buffer *trace_buf = &tr->array_buffer;
8614 int cpu = tracing_get_cpu(inode);
8615 struct trace_seq *s;
8616 unsigned long cnt;
8617 unsigned long long t;
8618 unsigned long usec_rem;
8619
8620 s = kmalloc(sizeof(*s), GFP_KERNEL);
8621 if (!s)
8622 return -ENOMEM;
8623
8624 trace_seq_init(s);
8625
8626 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8627 trace_seq_printf(s, "entries: %ld\n", cnt);
8628
8629 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8630 trace_seq_printf(s, "overrun: %ld\n", cnt);
8631
8632 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8633 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8634
8635 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8636 trace_seq_printf(s, "bytes: %ld\n", cnt);
8637
8638 if (trace_clocks[tr->clock_id].in_ns) {
8639 /* local or global for trace_clock */
8640 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8641 usec_rem = do_div(t, USEC_PER_SEC);
8642 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8643 t, usec_rem);
8644
8645 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8646 usec_rem = do_div(t, USEC_PER_SEC);
8647 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8648 } else {
8649 /* counter or tsc mode for trace_clock */
8650 trace_seq_printf(s, "oldest event ts: %llu\n",
8651 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8652
8653 trace_seq_printf(s, "now ts: %llu\n",
8654 ring_buffer_time_stamp(trace_buf->buffer));
8655 }
8656
8657 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8658 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8659
8660 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8661 trace_seq_printf(s, "read events: %ld\n", cnt);
8662
8663 count = simple_read_from_buffer(ubuf, count, ppos,
8664 s->buffer, trace_seq_used(s));
8665
8666 kfree(s);
8667
8668 return count;
8669 }
8670
8671 static const struct file_operations tracing_stats_fops = {
8672 .open = tracing_open_generic_tr,
8673 .read = tracing_stats_read,
8674 .llseek = generic_file_llseek,
8675 .release = tracing_release_generic_tr,
8676 };
8677
8678 #ifdef CONFIG_DYNAMIC_FTRACE
8679
8680 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8681 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8682 size_t cnt, loff_t *ppos)
8683 {
8684 ssize_t ret;
8685 char *buf;
8686 int r;
8687
8688 /* 256 should be plenty to hold the amount needed */
8689 buf = kmalloc(256, GFP_KERNEL);
8690 if (!buf)
8691 return -ENOMEM;
8692
8693 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8694 ftrace_update_tot_cnt,
8695 ftrace_number_of_pages,
8696 ftrace_number_of_groups);
8697
8698 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8699 kfree(buf);
8700 return ret;
8701 }
8702
8703 static const struct file_operations tracing_dyn_info_fops = {
8704 .open = tracing_open_generic,
8705 .read = tracing_read_dyn_info,
8706 .llseek = generic_file_llseek,
8707 };
8708 #endif /* CONFIG_DYNAMIC_FTRACE */
8709
8710 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8711 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8712 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8713 struct trace_array *tr, struct ftrace_probe_ops *ops,
8714 void *data)
8715 {
8716 tracing_snapshot_instance(tr);
8717 }
8718
8719 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8720 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8721 struct trace_array *tr, struct ftrace_probe_ops *ops,
8722 void *data)
8723 {
8724 struct ftrace_func_mapper *mapper = data;
8725 long *count = NULL;
8726
8727 if (mapper)
8728 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8729
8730 if (count) {
8731
8732 if (*count <= 0)
8733 return;
8734
8735 (*count)--;
8736 }
8737
8738 tracing_snapshot_instance(tr);
8739 }
8740
8741 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8742 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8743 struct ftrace_probe_ops *ops, void *data)
8744 {
8745 struct ftrace_func_mapper *mapper = data;
8746 long *count = NULL;
8747
8748 seq_printf(m, "%ps:", (void *)ip);
8749
8750 seq_puts(m, "snapshot");
8751
8752 if (mapper)
8753 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8754
8755 if (count)
8756 seq_printf(m, ":count=%ld\n", *count);
8757 else
8758 seq_puts(m, ":unlimited\n");
8759
8760 return 0;
8761 }
8762
8763 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8764 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8765 unsigned long ip, void *init_data, void **data)
8766 {
8767 struct ftrace_func_mapper *mapper = *data;
8768
8769 if (!mapper) {
8770 mapper = allocate_ftrace_func_mapper();
8771 if (!mapper)
8772 return -ENOMEM;
8773 *data = mapper;
8774 }
8775
8776 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8777 }
8778
8779 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8780 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8781 unsigned long ip, void *data)
8782 {
8783 struct ftrace_func_mapper *mapper = data;
8784
8785 if (!ip) {
8786 if (!mapper)
8787 return;
8788 free_ftrace_func_mapper(mapper, NULL);
8789 return;
8790 }
8791
8792 ftrace_func_mapper_remove_ip(mapper, ip);
8793 }
8794
8795 static struct ftrace_probe_ops snapshot_probe_ops = {
8796 .func = ftrace_snapshot,
8797 .print = ftrace_snapshot_print,
8798 };
8799
8800 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8801 .func = ftrace_count_snapshot,
8802 .print = ftrace_snapshot_print,
8803 .init = ftrace_snapshot_init,
8804 .free = ftrace_snapshot_free,
8805 };
8806
8807 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8808 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8809 char *glob, char *cmd, char *param, int enable)
8810 {
8811 struct ftrace_probe_ops *ops;
8812 void *count = (void *)-1;
8813 char *number;
8814 int ret;
8815
8816 if (!tr)
8817 return -ENODEV;
8818
8819 /* hash funcs only work with set_ftrace_filter */
8820 if (!enable)
8821 return -EINVAL;
8822
8823 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8824
8825 if (glob[0] == '!')
8826 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8827
8828 if (!param)
8829 goto out_reg;
8830
8831 number = strsep(¶m, ":");
8832
8833 if (!strlen(number))
8834 goto out_reg;
8835
8836 /*
8837 * We use the callback data field (which is a pointer)
8838 * as our counter.
8839 */
8840 ret = kstrtoul(number, 0, (unsigned long *)&count);
8841 if (ret)
8842 return ret;
8843
8844 out_reg:
8845 ret = tracing_alloc_snapshot_instance(tr);
8846 if (ret < 0)
8847 goto out;
8848
8849 ret = register_ftrace_function_probe(glob, tr, ops, count);
8850
8851 out:
8852 return ret < 0 ? ret : 0;
8853 }
8854
8855 static struct ftrace_func_command ftrace_snapshot_cmd = {
8856 .name = "snapshot",
8857 .func = ftrace_trace_snapshot_callback,
8858 };
8859
register_snapshot_cmd(void)8860 static __init int register_snapshot_cmd(void)
8861 {
8862 return register_ftrace_command(&ftrace_snapshot_cmd);
8863 }
8864 #else
register_snapshot_cmd(void)8865 static inline __init int register_snapshot_cmd(void) { return 0; }
8866 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8867
tracing_get_dentry(struct trace_array * tr)8868 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8869 {
8870 if (WARN_ON(!tr->dir))
8871 return ERR_PTR(-ENODEV);
8872
8873 /* Top directory uses NULL as the parent */
8874 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8875 return NULL;
8876
8877 /* All sub buffers have a descriptor */
8878 return tr->dir;
8879 }
8880
tracing_dentry_percpu(struct trace_array * tr,int cpu)8881 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8882 {
8883 struct dentry *d_tracer;
8884
8885 if (tr->percpu_dir)
8886 return tr->percpu_dir;
8887
8888 d_tracer = tracing_get_dentry(tr);
8889 if (IS_ERR(d_tracer))
8890 return NULL;
8891
8892 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8893
8894 MEM_FAIL(!tr->percpu_dir,
8895 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8896
8897 return tr->percpu_dir;
8898 }
8899
8900 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8901 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8902 void *data, long cpu, const struct file_operations *fops)
8903 {
8904 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8905
8906 if (ret) /* See tracing_get_cpu() */
8907 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8908 return ret;
8909 }
8910
8911 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8912 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8913 {
8914 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8915 struct dentry *d_cpu;
8916 char cpu_dir[30]; /* 30 characters should be more than enough */
8917
8918 if (!d_percpu)
8919 return;
8920
8921 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8922 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8923 if (!d_cpu) {
8924 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8925 return;
8926 }
8927
8928 /* per cpu trace_pipe */
8929 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8930 tr, cpu, &tracing_pipe_fops);
8931
8932 /* per cpu trace */
8933 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8934 tr, cpu, &tracing_fops);
8935
8936 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8937 tr, cpu, &tracing_buffers_fops);
8938
8939 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8940 tr, cpu, &tracing_stats_fops);
8941
8942 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8943 tr, cpu, &tracing_entries_fops);
8944
8945 #ifdef CONFIG_TRACER_SNAPSHOT
8946 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8947 tr, cpu, &snapshot_fops);
8948
8949 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8950 tr, cpu, &snapshot_raw_fops);
8951 #endif
8952 }
8953
8954 #ifdef CONFIG_FTRACE_SELFTEST
8955 /* Let selftest have access to static functions in this file */
8956 #include "trace_selftest.c"
8957 #endif
8958
8959 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8960 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8961 loff_t *ppos)
8962 {
8963 struct trace_option_dentry *topt = filp->private_data;
8964 char *buf;
8965
8966 if (topt->flags->val & topt->opt->bit)
8967 buf = "1\n";
8968 else
8969 buf = "0\n";
8970
8971 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8972 }
8973
8974 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8975 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8976 loff_t *ppos)
8977 {
8978 struct trace_option_dentry *topt = filp->private_data;
8979 unsigned long val;
8980 int ret;
8981
8982 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8983 if (ret)
8984 return ret;
8985
8986 if (val != 0 && val != 1)
8987 return -EINVAL;
8988
8989 if (!!(topt->flags->val & topt->opt->bit) != val) {
8990 mutex_lock(&trace_types_lock);
8991 ret = __set_tracer_option(topt->tr, topt->flags,
8992 topt->opt, !val);
8993 mutex_unlock(&trace_types_lock);
8994 if (ret)
8995 return ret;
8996 }
8997
8998 *ppos += cnt;
8999
9000 return cnt;
9001 }
9002
tracing_open_options(struct inode * inode,struct file * filp)9003 static int tracing_open_options(struct inode *inode, struct file *filp)
9004 {
9005 struct trace_option_dentry *topt = inode->i_private;
9006 int ret;
9007
9008 ret = tracing_check_open_get_tr(topt->tr);
9009 if (ret)
9010 return ret;
9011
9012 filp->private_data = inode->i_private;
9013 return 0;
9014 }
9015
tracing_release_options(struct inode * inode,struct file * file)9016 static int tracing_release_options(struct inode *inode, struct file *file)
9017 {
9018 struct trace_option_dentry *topt = file->private_data;
9019
9020 trace_array_put(topt->tr);
9021 return 0;
9022 }
9023
9024 static const struct file_operations trace_options_fops = {
9025 .open = tracing_open_options,
9026 .read = trace_options_read,
9027 .write = trace_options_write,
9028 .llseek = generic_file_llseek,
9029 .release = tracing_release_options,
9030 };
9031
9032 /*
9033 * In order to pass in both the trace_array descriptor as well as the index
9034 * to the flag that the trace option file represents, the trace_array
9035 * has a character array of trace_flags_index[], which holds the index
9036 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9037 * The address of this character array is passed to the flag option file
9038 * read/write callbacks.
9039 *
9040 * In order to extract both the index and the trace_array descriptor,
9041 * get_tr_index() uses the following algorithm.
9042 *
9043 * idx = *ptr;
9044 *
9045 * As the pointer itself contains the address of the index (remember
9046 * index[1] == 1).
9047 *
9048 * Then to get the trace_array descriptor, by subtracting that index
9049 * from the ptr, we get to the start of the index itself.
9050 *
9051 * ptr - idx == &index[0]
9052 *
9053 * Then a simple container_of() from that pointer gets us to the
9054 * trace_array descriptor.
9055 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9056 static void get_tr_index(void *data, struct trace_array **ptr,
9057 unsigned int *pindex)
9058 {
9059 *pindex = *(unsigned char *)data;
9060
9061 *ptr = container_of(data - *pindex, struct trace_array,
9062 trace_flags_index);
9063 }
9064
9065 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9066 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9067 loff_t *ppos)
9068 {
9069 void *tr_index = filp->private_data;
9070 struct trace_array *tr;
9071 unsigned int index;
9072 char *buf;
9073
9074 get_tr_index(tr_index, &tr, &index);
9075
9076 if (tr->trace_flags & (1 << index))
9077 buf = "1\n";
9078 else
9079 buf = "0\n";
9080
9081 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9082 }
9083
9084 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9085 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9086 loff_t *ppos)
9087 {
9088 void *tr_index = filp->private_data;
9089 struct trace_array *tr;
9090 unsigned int index;
9091 unsigned long val;
9092 int ret;
9093
9094 get_tr_index(tr_index, &tr, &index);
9095
9096 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9097 if (ret)
9098 return ret;
9099
9100 if (val != 0 && val != 1)
9101 return -EINVAL;
9102
9103 mutex_lock(&event_mutex);
9104 mutex_lock(&trace_types_lock);
9105 ret = set_tracer_flag(tr, 1 << index, val);
9106 mutex_unlock(&trace_types_lock);
9107 mutex_unlock(&event_mutex);
9108
9109 if (ret < 0)
9110 return ret;
9111
9112 *ppos += cnt;
9113
9114 return cnt;
9115 }
9116
9117 static const struct file_operations trace_options_core_fops = {
9118 .open = tracing_open_generic,
9119 .read = trace_options_core_read,
9120 .write = trace_options_core_write,
9121 .llseek = generic_file_llseek,
9122 };
9123
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9124 struct dentry *trace_create_file(const char *name,
9125 umode_t mode,
9126 struct dentry *parent,
9127 void *data,
9128 const struct file_operations *fops)
9129 {
9130 struct dentry *ret;
9131
9132 ret = tracefs_create_file(name, mode, parent, data, fops);
9133 if (!ret)
9134 pr_warn("Could not create tracefs '%s' entry\n", name);
9135
9136 return ret;
9137 }
9138
9139
trace_options_init_dentry(struct trace_array * tr)9140 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9141 {
9142 struct dentry *d_tracer;
9143
9144 if (tr->options)
9145 return tr->options;
9146
9147 d_tracer = tracing_get_dentry(tr);
9148 if (IS_ERR(d_tracer))
9149 return NULL;
9150
9151 tr->options = tracefs_create_dir("options", d_tracer);
9152 if (!tr->options) {
9153 pr_warn("Could not create tracefs directory 'options'\n");
9154 return NULL;
9155 }
9156
9157 return tr->options;
9158 }
9159
9160 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9161 create_trace_option_file(struct trace_array *tr,
9162 struct trace_option_dentry *topt,
9163 struct tracer_flags *flags,
9164 struct tracer_opt *opt)
9165 {
9166 struct dentry *t_options;
9167
9168 t_options = trace_options_init_dentry(tr);
9169 if (!t_options)
9170 return;
9171
9172 topt->flags = flags;
9173 topt->opt = opt;
9174 topt->tr = tr;
9175
9176 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9177 t_options, topt, &trace_options_fops);
9178
9179 }
9180
9181 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9182 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9183 {
9184 struct trace_option_dentry *topts;
9185 struct trace_options *tr_topts;
9186 struct tracer_flags *flags;
9187 struct tracer_opt *opts;
9188 int cnt;
9189 int i;
9190
9191 if (!tracer)
9192 return;
9193
9194 flags = tracer->flags;
9195
9196 if (!flags || !flags->opts)
9197 return;
9198
9199 /*
9200 * If this is an instance, only create flags for tracers
9201 * the instance may have.
9202 */
9203 if (!trace_ok_for_array(tracer, tr))
9204 return;
9205
9206 for (i = 0; i < tr->nr_topts; i++) {
9207 /* Make sure there's no duplicate flags. */
9208 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9209 return;
9210 }
9211
9212 opts = flags->opts;
9213
9214 for (cnt = 0; opts[cnt].name; cnt++)
9215 ;
9216
9217 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9218 if (!topts)
9219 return;
9220
9221 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9222 GFP_KERNEL);
9223 if (!tr_topts) {
9224 kfree(topts);
9225 return;
9226 }
9227
9228 tr->topts = tr_topts;
9229 tr->topts[tr->nr_topts].tracer = tracer;
9230 tr->topts[tr->nr_topts].topts = topts;
9231 tr->nr_topts++;
9232
9233 for (cnt = 0; opts[cnt].name; cnt++) {
9234 create_trace_option_file(tr, &topts[cnt], flags,
9235 &opts[cnt]);
9236 MEM_FAIL(topts[cnt].entry == NULL,
9237 "Failed to create trace option: %s",
9238 opts[cnt].name);
9239 }
9240 }
9241
9242 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9243 create_trace_option_core_file(struct trace_array *tr,
9244 const char *option, long index)
9245 {
9246 struct dentry *t_options;
9247
9248 t_options = trace_options_init_dentry(tr);
9249 if (!t_options)
9250 return NULL;
9251
9252 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9253 (void *)&tr->trace_flags_index[index],
9254 &trace_options_core_fops);
9255 }
9256
create_trace_options_dir(struct trace_array * tr)9257 static void create_trace_options_dir(struct trace_array *tr)
9258 {
9259 struct dentry *t_options;
9260 bool top_level = tr == &global_trace;
9261 int i;
9262
9263 t_options = trace_options_init_dentry(tr);
9264 if (!t_options)
9265 return;
9266
9267 for (i = 0; trace_options[i]; i++) {
9268 if (top_level ||
9269 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9270 create_trace_option_core_file(tr, trace_options[i], i);
9271 }
9272 }
9273
9274 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9275 rb_simple_read(struct file *filp, char __user *ubuf,
9276 size_t cnt, loff_t *ppos)
9277 {
9278 struct trace_array *tr = filp->private_data;
9279 char buf[64];
9280 int r;
9281
9282 r = tracer_tracing_is_on(tr);
9283 r = sprintf(buf, "%d\n", r);
9284
9285 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9286 }
9287
9288 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9289 rb_simple_write(struct file *filp, const char __user *ubuf,
9290 size_t cnt, loff_t *ppos)
9291 {
9292 struct trace_array *tr = filp->private_data;
9293 struct trace_buffer *buffer = tr->array_buffer.buffer;
9294 unsigned long val;
9295 int ret;
9296
9297 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9298 if (ret)
9299 return ret;
9300
9301 if (buffer) {
9302 mutex_lock(&trace_types_lock);
9303 if (!!val == tracer_tracing_is_on(tr)) {
9304 val = 0; /* do nothing */
9305 } else if (val) {
9306 tracer_tracing_on(tr);
9307 if (tr->current_trace->start)
9308 tr->current_trace->start(tr);
9309 } else {
9310 tracer_tracing_off(tr);
9311 if (tr->current_trace->stop)
9312 tr->current_trace->stop(tr);
9313 /* Wake up any waiters */
9314 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9315 }
9316 mutex_unlock(&trace_types_lock);
9317 }
9318
9319 (*ppos)++;
9320
9321 return cnt;
9322 }
9323
9324 static const struct file_operations rb_simple_fops = {
9325 .open = tracing_open_generic_tr,
9326 .read = rb_simple_read,
9327 .write = rb_simple_write,
9328 .release = tracing_release_generic_tr,
9329 .llseek = default_llseek,
9330 };
9331
9332 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9333 buffer_percent_read(struct file *filp, char __user *ubuf,
9334 size_t cnt, loff_t *ppos)
9335 {
9336 struct trace_array *tr = filp->private_data;
9337 char buf[64];
9338 int r;
9339
9340 r = tr->buffer_percent;
9341 r = sprintf(buf, "%d\n", r);
9342
9343 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9344 }
9345
9346 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9347 buffer_percent_write(struct file *filp, const char __user *ubuf,
9348 size_t cnt, loff_t *ppos)
9349 {
9350 struct trace_array *tr = filp->private_data;
9351 unsigned long val;
9352 int ret;
9353
9354 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9355 if (ret)
9356 return ret;
9357
9358 if (val > 100)
9359 return -EINVAL;
9360
9361 tr->buffer_percent = val;
9362
9363 (*ppos)++;
9364
9365 return cnt;
9366 }
9367
9368 static const struct file_operations buffer_percent_fops = {
9369 .open = tracing_open_generic_tr,
9370 .read = buffer_percent_read,
9371 .write = buffer_percent_write,
9372 .release = tracing_release_generic_tr,
9373 .llseek = default_llseek,
9374 };
9375
9376 static struct dentry *trace_instance_dir;
9377
9378 static void
9379 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9380
9381 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9382 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9383 {
9384 enum ring_buffer_flags rb_flags;
9385
9386 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9387
9388 buf->tr = tr;
9389
9390 buf->buffer = ring_buffer_alloc(size, rb_flags);
9391 if (!buf->buffer)
9392 return -ENOMEM;
9393
9394 buf->data = alloc_percpu(struct trace_array_cpu);
9395 if (!buf->data) {
9396 ring_buffer_free(buf->buffer);
9397 buf->buffer = NULL;
9398 return -ENOMEM;
9399 }
9400
9401 /* Allocate the first page for all buffers */
9402 set_buffer_entries(&tr->array_buffer,
9403 ring_buffer_size(tr->array_buffer.buffer, 0));
9404
9405 return 0;
9406 }
9407
free_trace_buffer(struct array_buffer * buf)9408 static void free_trace_buffer(struct array_buffer *buf)
9409 {
9410 if (buf->buffer) {
9411 ring_buffer_free(buf->buffer);
9412 buf->buffer = NULL;
9413 free_percpu(buf->data);
9414 buf->data = NULL;
9415 }
9416 }
9417
allocate_trace_buffers(struct trace_array * tr,int size)9418 static int allocate_trace_buffers(struct trace_array *tr, int size)
9419 {
9420 int ret;
9421
9422 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9423 if (ret)
9424 return ret;
9425
9426 #ifdef CONFIG_TRACER_MAX_TRACE
9427 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9428 allocate_snapshot ? size : 1);
9429 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9430 free_trace_buffer(&tr->array_buffer);
9431 return -ENOMEM;
9432 }
9433 tr->allocated_snapshot = allocate_snapshot;
9434
9435 allocate_snapshot = false;
9436 #endif
9437
9438 return 0;
9439 }
9440
free_trace_buffers(struct trace_array * tr)9441 static void free_trace_buffers(struct trace_array *tr)
9442 {
9443 if (!tr)
9444 return;
9445
9446 free_trace_buffer(&tr->array_buffer);
9447
9448 #ifdef CONFIG_TRACER_MAX_TRACE
9449 free_trace_buffer(&tr->max_buffer);
9450 #endif
9451 }
9452
init_trace_flags_index(struct trace_array * tr)9453 static void init_trace_flags_index(struct trace_array *tr)
9454 {
9455 int i;
9456
9457 /* Used by the trace options files */
9458 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9459 tr->trace_flags_index[i] = i;
9460 }
9461
__update_tracer_options(struct trace_array * tr)9462 static void __update_tracer_options(struct trace_array *tr)
9463 {
9464 struct tracer *t;
9465
9466 for (t = trace_types; t; t = t->next)
9467 add_tracer_options(tr, t);
9468 }
9469
update_tracer_options(struct trace_array * tr)9470 static void update_tracer_options(struct trace_array *tr)
9471 {
9472 mutex_lock(&trace_types_lock);
9473 tracer_options_updated = true;
9474 __update_tracer_options(tr);
9475 mutex_unlock(&trace_types_lock);
9476 }
9477
9478 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9479 struct trace_array *trace_array_find(const char *instance)
9480 {
9481 struct trace_array *tr, *found = NULL;
9482
9483 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9484 if (tr->name && strcmp(tr->name, instance) == 0) {
9485 found = tr;
9486 break;
9487 }
9488 }
9489
9490 return found;
9491 }
9492
trace_array_find_get(const char * instance)9493 struct trace_array *trace_array_find_get(const char *instance)
9494 {
9495 struct trace_array *tr;
9496
9497 mutex_lock(&trace_types_lock);
9498 tr = trace_array_find(instance);
9499 if (tr)
9500 tr->ref++;
9501 mutex_unlock(&trace_types_lock);
9502
9503 return tr;
9504 }
9505
trace_array_create_dir(struct trace_array * tr)9506 static int trace_array_create_dir(struct trace_array *tr)
9507 {
9508 int ret;
9509
9510 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9511 if (!tr->dir)
9512 return -EINVAL;
9513
9514 ret = event_trace_add_tracer(tr->dir, tr);
9515 if (ret) {
9516 tracefs_remove(tr->dir);
9517 return ret;
9518 }
9519
9520 init_tracer_tracefs(tr, tr->dir);
9521 __update_tracer_options(tr);
9522
9523 return ret;
9524 }
9525
trace_array_create(const char * name)9526 static struct trace_array *trace_array_create(const char *name)
9527 {
9528 struct trace_array *tr;
9529 int ret;
9530
9531 ret = -ENOMEM;
9532 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9533 if (!tr)
9534 return ERR_PTR(ret);
9535
9536 tr->name = kstrdup(name, GFP_KERNEL);
9537 if (!tr->name)
9538 goto out_free_tr;
9539
9540 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9541 goto out_free_tr;
9542
9543 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9544 goto out_free_tr;
9545
9546 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9547
9548 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9549
9550 raw_spin_lock_init(&tr->start_lock);
9551
9552 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9553
9554 tr->current_trace = &nop_trace;
9555
9556 INIT_LIST_HEAD(&tr->systems);
9557 INIT_LIST_HEAD(&tr->events);
9558 INIT_LIST_HEAD(&tr->hist_vars);
9559 INIT_LIST_HEAD(&tr->err_log);
9560
9561 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9562 goto out_free_tr;
9563
9564 if (ftrace_allocate_ftrace_ops(tr) < 0)
9565 goto out_free_tr;
9566
9567 ftrace_init_trace_array(tr);
9568
9569 init_trace_flags_index(tr);
9570
9571 if (trace_instance_dir) {
9572 ret = trace_array_create_dir(tr);
9573 if (ret)
9574 goto out_free_tr;
9575 } else
9576 __trace_early_add_events(tr);
9577
9578 list_add(&tr->list, &ftrace_trace_arrays);
9579
9580 tr->ref++;
9581
9582 return tr;
9583
9584 out_free_tr:
9585 ftrace_free_ftrace_ops(tr);
9586 free_trace_buffers(tr);
9587 free_cpumask_var(tr->pipe_cpumask);
9588 free_cpumask_var(tr->tracing_cpumask);
9589 kfree(tr->name);
9590 kfree(tr);
9591
9592 return ERR_PTR(ret);
9593 }
9594
instance_mkdir(const char * name)9595 static int instance_mkdir(const char *name)
9596 {
9597 struct trace_array *tr;
9598 int ret;
9599
9600 mutex_lock(&event_mutex);
9601 mutex_lock(&trace_types_lock);
9602
9603 ret = -EEXIST;
9604 if (trace_array_find(name))
9605 goto out_unlock;
9606
9607 tr = trace_array_create(name);
9608
9609 ret = PTR_ERR_OR_ZERO(tr);
9610
9611 out_unlock:
9612 mutex_unlock(&trace_types_lock);
9613 mutex_unlock(&event_mutex);
9614 return ret;
9615 }
9616
9617 /**
9618 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9619 * @name: The name of the trace array to be looked up/created.
9620 *
9621 * Returns pointer to trace array with given name.
9622 * NULL, if it cannot be created.
9623 *
9624 * NOTE: This function increments the reference counter associated with the
9625 * trace array returned. This makes sure it cannot be freed while in use.
9626 * Use trace_array_put() once the trace array is no longer needed.
9627 * If the trace_array is to be freed, trace_array_destroy() needs to
9628 * be called after the trace_array_put(), or simply let user space delete
9629 * it from the tracefs instances directory. But until the
9630 * trace_array_put() is called, user space can not delete it.
9631 *
9632 */
trace_array_get_by_name(const char * name)9633 struct trace_array *trace_array_get_by_name(const char *name)
9634 {
9635 struct trace_array *tr;
9636
9637 mutex_lock(&event_mutex);
9638 mutex_lock(&trace_types_lock);
9639
9640 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9641 if (tr->name && strcmp(tr->name, name) == 0)
9642 goto out_unlock;
9643 }
9644
9645 tr = trace_array_create(name);
9646
9647 if (IS_ERR(tr))
9648 tr = NULL;
9649 out_unlock:
9650 if (tr)
9651 tr->ref++;
9652
9653 mutex_unlock(&trace_types_lock);
9654 mutex_unlock(&event_mutex);
9655 return tr;
9656 }
9657 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9658
__remove_instance(struct trace_array * tr)9659 static int __remove_instance(struct trace_array *tr)
9660 {
9661 int i;
9662
9663 /* Reference counter for a newly created trace array = 1. */
9664 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9665 return -EBUSY;
9666
9667 list_del(&tr->list);
9668
9669 /* Disable all the flags that were enabled coming in */
9670 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9671 if ((1 << i) & ZEROED_TRACE_FLAGS)
9672 set_tracer_flag(tr, 1 << i, 0);
9673 }
9674
9675 tracing_set_nop(tr);
9676 clear_ftrace_function_probes(tr);
9677 event_trace_del_tracer(tr);
9678 ftrace_clear_pids(tr);
9679 ftrace_destroy_function_files(tr);
9680 tracefs_remove(tr->dir);
9681 free_percpu(tr->last_func_repeats);
9682 free_trace_buffers(tr);
9683 clear_tracing_err_log(tr);
9684
9685 for (i = 0; i < tr->nr_topts; i++) {
9686 kfree(tr->topts[i].topts);
9687 }
9688 kfree(tr->topts);
9689
9690 free_cpumask_var(tr->pipe_cpumask);
9691 free_cpumask_var(tr->tracing_cpumask);
9692 kfree(tr->name);
9693 kfree(tr);
9694
9695 return 0;
9696 }
9697
trace_array_destroy(struct trace_array * this_tr)9698 int trace_array_destroy(struct trace_array *this_tr)
9699 {
9700 struct trace_array *tr;
9701 int ret;
9702
9703 if (!this_tr)
9704 return -EINVAL;
9705
9706 mutex_lock(&event_mutex);
9707 mutex_lock(&trace_types_lock);
9708
9709 ret = -ENODEV;
9710
9711 /* Making sure trace array exists before destroying it. */
9712 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9713 if (tr == this_tr) {
9714 ret = __remove_instance(tr);
9715 break;
9716 }
9717 }
9718
9719 mutex_unlock(&trace_types_lock);
9720 mutex_unlock(&event_mutex);
9721
9722 return ret;
9723 }
9724 EXPORT_SYMBOL_GPL(trace_array_destroy);
9725
instance_rmdir(const char * name)9726 static int instance_rmdir(const char *name)
9727 {
9728 struct trace_array *tr;
9729 int ret;
9730
9731 mutex_lock(&event_mutex);
9732 mutex_lock(&trace_types_lock);
9733
9734 ret = -ENODEV;
9735 tr = trace_array_find(name);
9736 if (tr)
9737 ret = __remove_instance(tr);
9738
9739 mutex_unlock(&trace_types_lock);
9740 mutex_unlock(&event_mutex);
9741
9742 return ret;
9743 }
9744
create_trace_instances(struct dentry * d_tracer)9745 static __init void create_trace_instances(struct dentry *d_tracer)
9746 {
9747 struct trace_array *tr;
9748
9749 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9750 instance_mkdir,
9751 instance_rmdir);
9752 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9753 return;
9754
9755 mutex_lock(&event_mutex);
9756 mutex_lock(&trace_types_lock);
9757
9758 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9759 if (!tr->name)
9760 continue;
9761 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9762 "Failed to create instance directory\n"))
9763 break;
9764 }
9765
9766 mutex_unlock(&trace_types_lock);
9767 mutex_unlock(&event_mutex);
9768 }
9769
9770 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9771 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9772 {
9773 int cpu;
9774
9775 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9776 tr, &show_traces_fops);
9777
9778 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9779 tr, &set_tracer_fops);
9780
9781 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9782 tr, &tracing_cpumask_fops);
9783
9784 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9785 tr, &tracing_iter_fops);
9786
9787 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9788 tr, &tracing_fops);
9789
9790 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9791 tr, &tracing_pipe_fops);
9792
9793 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9794 tr, &tracing_entries_fops);
9795
9796 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9797 tr, &tracing_total_entries_fops);
9798
9799 trace_create_file("free_buffer", 0200, d_tracer,
9800 tr, &tracing_free_buffer_fops);
9801
9802 trace_create_file("trace_marker", 0220, d_tracer,
9803 tr, &tracing_mark_fops);
9804
9805 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9806
9807 trace_create_file("trace_marker_raw", 0220, d_tracer,
9808 tr, &tracing_mark_raw_fops);
9809
9810 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9811 &trace_clock_fops);
9812
9813 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9814 tr, &rb_simple_fops);
9815
9816 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9817 &trace_time_stamp_mode_fops);
9818
9819 tr->buffer_percent = 50;
9820
9821 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9822 tr, &buffer_percent_fops);
9823
9824 create_trace_options_dir(tr);
9825
9826 #ifdef CONFIG_TRACER_MAX_TRACE
9827 trace_create_maxlat_file(tr, d_tracer);
9828 #endif
9829
9830 if (ftrace_create_function_files(tr, d_tracer))
9831 MEM_FAIL(1, "Could not allocate function filter files");
9832
9833 #ifdef CONFIG_TRACER_SNAPSHOT
9834 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9835 tr, &snapshot_fops);
9836 #endif
9837
9838 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9839 tr, &tracing_err_log_fops);
9840
9841 for_each_tracing_cpu(cpu)
9842 tracing_init_tracefs_percpu(tr, cpu);
9843
9844 ftrace_init_tracefs(tr, d_tracer);
9845 }
9846
trace_automount(struct dentry * mntpt,void * ingore)9847 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9848 {
9849 struct vfsmount *mnt;
9850 struct file_system_type *type;
9851
9852 /*
9853 * To maintain backward compatibility for tools that mount
9854 * debugfs to get to the tracing facility, tracefs is automatically
9855 * mounted to the debugfs/tracing directory.
9856 */
9857 type = get_fs_type("tracefs");
9858 if (!type)
9859 return NULL;
9860 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9861 put_filesystem(type);
9862 if (IS_ERR(mnt))
9863 return NULL;
9864 mntget(mnt);
9865
9866 return mnt;
9867 }
9868
9869 /**
9870 * tracing_init_dentry - initialize top level trace array
9871 *
9872 * This is called when creating files or directories in the tracing
9873 * directory. It is called via fs_initcall() by any of the boot up code
9874 * and expects to return the dentry of the top level tracing directory.
9875 */
tracing_init_dentry(void)9876 int tracing_init_dentry(void)
9877 {
9878 struct trace_array *tr = &global_trace;
9879
9880 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9881 pr_warn("Tracing disabled due to lockdown\n");
9882 return -EPERM;
9883 }
9884
9885 /* The top level trace array uses NULL as parent */
9886 if (tr->dir)
9887 return 0;
9888
9889 if (WARN_ON(!tracefs_initialized()))
9890 return -ENODEV;
9891
9892 /*
9893 * As there may still be users that expect the tracing
9894 * files to exist in debugfs/tracing, we must automount
9895 * the tracefs file system there, so older tools still
9896 * work with the newer kernel.
9897 */
9898 tr->dir = debugfs_create_automount("tracing", NULL,
9899 trace_automount, NULL);
9900
9901 return 0;
9902 }
9903
9904 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9905 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9906
9907 static struct workqueue_struct *eval_map_wq __initdata;
9908 static struct work_struct eval_map_work __initdata;
9909 static struct work_struct tracerfs_init_work __initdata;
9910
eval_map_work_func(struct work_struct * work)9911 static void __init eval_map_work_func(struct work_struct *work)
9912 {
9913 int len;
9914
9915 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9916 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9917 }
9918
trace_eval_init(void)9919 static int __init trace_eval_init(void)
9920 {
9921 INIT_WORK(&eval_map_work, eval_map_work_func);
9922
9923 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9924 if (!eval_map_wq) {
9925 pr_err("Unable to allocate eval_map_wq\n");
9926 /* Do work here */
9927 eval_map_work_func(&eval_map_work);
9928 return -ENOMEM;
9929 }
9930
9931 queue_work(eval_map_wq, &eval_map_work);
9932 return 0;
9933 }
9934
9935 subsys_initcall(trace_eval_init);
9936
trace_eval_sync(void)9937 static int __init trace_eval_sync(void)
9938 {
9939 /* Make sure the eval map updates are finished */
9940 if (eval_map_wq)
9941 destroy_workqueue(eval_map_wq);
9942 return 0;
9943 }
9944
9945 late_initcall_sync(trace_eval_sync);
9946
9947
9948 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9949 static void trace_module_add_evals(struct module *mod)
9950 {
9951 if (!mod->num_trace_evals)
9952 return;
9953
9954 /*
9955 * Modules with bad taint do not have events created, do
9956 * not bother with enums either.
9957 */
9958 if (trace_module_has_bad_taint(mod))
9959 return;
9960
9961 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9962 }
9963
9964 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9965 static void trace_module_remove_evals(struct module *mod)
9966 {
9967 union trace_eval_map_item *map;
9968 union trace_eval_map_item **last = &trace_eval_maps;
9969
9970 if (!mod->num_trace_evals)
9971 return;
9972
9973 mutex_lock(&trace_eval_mutex);
9974
9975 map = trace_eval_maps;
9976
9977 while (map) {
9978 if (map->head.mod == mod)
9979 break;
9980 map = trace_eval_jmp_to_tail(map);
9981 last = &map->tail.next;
9982 map = map->tail.next;
9983 }
9984 if (!map)
9985 goto out;
9986
9987 *last = trace_eval_jmp_to_tail(map)->tail.next;
9988 kfree(map);
9989 out:
9990 mutex_unlock(&trace_eval_mutex);
9991 }
9992 #else
trace_module_remove_evals(struct module * mod)9993 static inline void trace_module_remove_evals(struct module *mod) { }
9994 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9995
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9996 static int trace_module_notify(struct notifier_block *self,
9997 unsigned long val, void *data)
9998 {
9999 struct module *mod = data;
10000
10001 switch (val) {
10002 case MODULE_STATE_COMING:
10003 trace_module_add_evals(mod);
10004 break;
10005 case MODULE_STATE_GOING:
10006 trace_module_remove_evals(mod);
10007 break;
10008 }
10009
10010 return NOTIFY_OK;
10011 }
10012
10013 static struct notifier_block trace_module_nb = {
10014 .notifier_call = trace_module_notify,
10015 .priority = 0,
10016 };
10017 #endif /* CONFIG_MODULES */
10018
tracer_init_tracefs_work_func(struct work_struct * work)10019 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10020 {
10021
10022 event_trace_init();
10023
10024 init_tracer_tracefs(&global_trace, NULL);
10025 ftrace_init_tracefs_toplevel(&global_trace, NULL);
10026
10027 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10028 &global_trace, &tracing_thresh_fops);
10029
10030 trace_create_file("README", TRACE_MODE_READ, NULL,
10031 NULL, &tracing_readme_fops);
10032
10033 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10034 NULL, &tracing_saved_cmdlines_fops);
10035
10036 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10037 NULL, &tracing_saved_cmdlines_size_fops);
10038
10039 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10040 NULL, &tracing_saved_tgids_fops);
10041
10042 trace_create_eval_file(NULL);
10043
10044 #ifdef CONFIG_MODULES
10045 register_module_notifier(&trace_module_nb);
10046 #endif
10047
10048 #ifdef CONFIG_DYNAMIC_FTRACE
10049 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10050 NULL, &tracing_dyn_info_fops);
10051 #endif
10052
10053 create_trace_instances(NULL);
10054
10055 update_tracer_options(&global_trace);
10056 }
10057
tracer_init_tracefs(void)10058 static __init int tracer_init_tracefs(void)
10059 {
10060 int ret;
10061
10062 trace_access_lock_init();
10063
10064 ret = tracing_init_dentry();
10065 if (ret)
10066 return 0;
10067
10068 if (eval_map_wq) {
10069 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10070 queue_work(eval_map_wq, &tracerfs_init_work);
10071 } else {
10072 tracer_init_tracefs_work_func(NULL);
10073 }
10074
10075 rv_init_interface();
10076
10077 return 0;
10078 }
10079
10080 fs_initcall(tracer_init_tracefs);
10081
10082 static int trace_die_panic_handler(struct notifier_block *self,
10083 unsigned long ev, void *unused);
10084
10085 static struct notifier_block trace_panic_notifier = {
10086 .notifier_call = trace_die_panic_handler,
10087 .priority = INT_MAX - 1,
10088 };
10089
10090 static struct notifier_block trace_die_notifier = {
10091 .notifier_call = trace_die_panic_handler,
10092 .priority = INT_MAX - 1,
10093 };
10094
10095 /*
10096 * The idea is to execute the following die/panic callback early, in order
10097 * to avoid showing irrelevant information in the trace (like other panic
10098 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10099 * warnings get disabled (to prevent potential log flooding).
10100 */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10101 static int trace_die_panic_handler(struct notifier_block *self,
10102 unsigned long ev, void *unused)
10103 {
10104 if (!ftrace_dump_on_oops)
10105 return NOTIFY_DONE;
10106
10107 /* The die notifier requires DIE_OOPS to trigger */
10108 if (self == &trace_die_notifier && ev != DIE_OOPS)
10109 return NOTIFY_DONE;
10110
10111 ftrace_dump(ftrace_dump_on_oops);
10112
10113 return NOTIFY_DONE;
10114 }
10115
10116 /*
10117 * printk is set to max of 1024, we really don't need it that big.
10118 * Nothing should be printing 1000 characters anyway.
10119 */
10120 #define TRACE_MAX_PRINT 1000
10121
10122 /*
10123 * Define here KERN_TRACE so that we have one place to modify
10124 * it if we decide to change what log level the ftrace dump
10125 * should be at.
10126 */
10127 #define KERN_TRACE KERN_EMERG
10128
10129 void
trace_printk_seq(struct trace_seq * s)10130 trace_printk_seq(struct trace_seq *s)
10131 {
10132 /* Probably should print a warning here. */
10133 if (s->seq.len >= TRACE_MAX_PRINT)
10134 s->seq.len = TRACE_MAX_PRINT;
10135
10136 /*
10137 * More paranoid code. Although the buffer size is set to
10138 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10139 * an extra layer of protection.
10140 */
10141 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10142 s->seq.len = s->seq.size - 1;
10143
10144 /* should be zero ended, but we are paranoid. */
10145 s->buffer[s->seq.len] = 0;
10146
10147 printk(KERN_TRACE "%s", s->buffer);
10148
10149 trace_seq_init(s);
10150 }
10151
trace_init_global_iter(struct trace_iterator * iter)10152 void trace_init_global_iter(struct trace_iterator *iter)
10153 {
10154 iter->tr = &global_trace;
10155 iter->trace = iter->tr->current_trace;
10156 iter->cpu_file = RING_BUFFER_ALL_CPUS;
10157 iter->array_buffer = &global_trace.array_buffer;
10158
10159 if (iter->trace && iter->trace->open)
10160 iter->trace->open(iter);
10161
10162 /* Annotate start of buffers if we had overruns */
10163 if (ring_buffer_overruns(iter->array_buffer->buffer))
10164 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10165
10166 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10167 if (trace_clocks[iter->tr->clock_id].in_ns)
10168 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10169
10170 /* Can not use kmalloc for iter.temp and iter.fmt */
10171 iter->temp = static_temp_buf;
10172 iter->temp_size = STATIC_TEMP_BUF_SIZE;
10173 iter->fmt = static_fmt_buf;
10174 iter->fmt_size = STATIC_FMT_BUF_SIZE;
10175 }
10176
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10177 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10178 {
10179 /* use static because iter can be a bit big for the stack */
10180 static struct trace_iterator iter;
10181 static atomic_t dump_running;
10182 struct trace_array *tr = &global_trace;
10183 unsigned int old_userobj;
10184 unsigned long flags;
10185 int cnt = 0, cpu;
10186
10187 /* Only allow one dump user at a time. */
10188 if (atomic_inc_return(&dump_running) != 1) {
10189 atomic_dec(&dump_running);
10190 return;
10191 }
10192
10193 /*
10194 * Always turn off tracing when we dump.
10195 * We don't need to show trace output of what happens
10196 * between multiple crashes.
10197 *
10198 * If the user does a sysrq-z, then they can re-enable
10199 * tracing with echo 1 > tracing_on.
10200 */
10201 tracing_off();
10202
10203 local_irq_save(flags);
10204
10205 /* Simulate the iterator */
10206 trace_init_global_iter(&iter);
10207
10208 for_each_tracing_cpu(cpu) {
10209 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10210 }
10211
10212 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10213
10214 /* don't look at user memory in panic mode */
10215 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10216
10217 switch (oops_dump_mode) {
10218 case DUMP_ALL:
10219 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10220 break;
10221 case DUMP_ORIG:
10222 iter.cpu_file = raw_smp_processor_id();
10223 break;
10224 case DUMP_NONE:
10225 goto out_enable;
10226 default:
10227 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10228 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10229 }
10230
10231 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10232
10233 /* Did function tracer already get disabled? */
10234 if (ftrace_is_dead()) {
10235 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10236 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10237 }
10238
10239 /*
10240 * We need to stop all tracing on all CPUS to read
10241 * the next buffer. This is a bit expensive, but is
10242 * not done often. We fill all what we can read,
10243 * and then release the locks again.
10244 */
10245
10246 while (!trace_empty(&iter)) {
10247
10248 if (!cnt)
10249 printk(KERN_TRACE "---------------------------------\n");
10250
10251 cnt++;
10252
10253 trace_iterator_reset(&iter);
10254 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10255
10256 if (trace_find_next_entry_inc(&iter) != NULL) {
10257 int ret;
10258
10259 ret = print_trace_line(&iter);
10260 if (ret != TRACE_TYPE_NO_CONSUME)
10261 trace_consume(&iter);
10262 }
10263 touch_nmi_watchdog();
10264
10265 trace_printk_seq(&iter.seq);
10266 }
10267
10268 if (!cnt)
10269 printk(KERN_TRACE " (ftrace buffer empty)\n");
10270 else
10271 printk(KERN_TRACE "---------------------------------\n");
10272
10273 out_enable:
10274 tr->trace_flags |= old_userobj;
10275
10276 for_each_tracing_cpu(cpu) {
10277 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10278 }
10279 atomic_dec(&dump_running);
10280 local_irq_restore(flags);
10281 }
10282 EXPORT_SYMBOL_GPL(ftrace_dump);
10283
10284 #define WRITE_BUFSIZE 4096
10285
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10286 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10287 size_t count, loff_t *ppos,
10288 int (*createfn)(const char *))
10289 {
10290 char *kbuf, *buf, *tmp;
10291 int ret = 0;
10292 size_t done = 0;
10293 size_t size;
10294
10295 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10296 if (!kbuf)
10297 return -ENOMEM;
10298
10299 while (done < count) {
10300 size = count - done;
10301
10302 if (size >= WRITE_BUFSIZE)
10303 size = WRITE_BUFSIZE - 1;
10304
10305 if (copy_from_user(kbuf, buffer + done, size)) {
10306 ret = -EFAULT;
10307 goto out;
10308 }
10309 kbuf[size] = '\0';
10310 buf = kbuf;
10311 do {
10312 tmp = strchr(buf, '\n');
10313 if (tmp) {
10314 *tmp = '\0';
10315 size = tmp - buf + 1;
10316 } else {
10317 size = strlen(buf);
10318 if (done + size < count) {
10319 if (buf != kbuf)
10320 break;
10321 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10322 pr_warn("Line length is too long: Should be less than %d\n",
10323 WRITE_BUFSIZE - 2);
10324 ret = -EINVAL;
10325 goto out;
10326 }
10327 }
10328 done += size;
10329
10330 /* Remove comments */
10331 tmp = strchr(buf, '#');
10332
10333 if (tmp)
10334 *tmp = '\0';
10335
10336 ret = createfn(buf);
10337 if (ret)
10338 goto out;
10339 buf += size;
10340
10341 } while (done < count);
10342 }
10343 ret = done;
10344
10345 out:
10346 kfree(kbuf);
10347
10348 return ret;
10349 }
10350
10351 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10352 __init static bool tr_needs_alloc_snapshot(const char *name)
10353 {
10354 char *test;
10355 int len = strlen(name);
10356 bool ret;
10357
10358 if (!boot_snapshot_index)
10359 return false;
10360
10361 if (strncmp(name, boot_snapshot_info, len) == 0 &&
10362 boot_snapshot_info[len] == '\t')
10363 return true;
10364
10365 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10366 if (!test)
10367 return false;
10368
10369 sprintf(test, "\t%s\t", name);
10370 ret = strstr(boot_snapshot_info, test) == NULL;
10371 kfree(test);
10372 return ret;
10373 }
10374
do_allocate_snapshot(const char * name)10375 __init static void do_allocate_snapshot(const char *name)
10376 {
10377 if (!tr_needs_alloc_snapshot(name))
10378 return;
10379
10380 /*
10381 * When allocate_snapshot is set, the next call to
10382 * allocate_trace_buffers() (called by trace_array_get_by_name())
10383 * will allocate the snapshot buffer. That will alse clear
10384 * this flag.
10385 */
10386 allocate_snapshot = true;
10387 }
10388 #else
do_allocate_snapshot(const char * name)10389 static inline void do_allocate_snapshot(const char *name) { }
10390 #endif
10391
enable_instances(void)10392 __init static void enable_instances(void)
10393 {
10394 struct trace_array *tr;
10395 char *curr_str;
10396 char *str;
10397 char *tok;
10398
10399 /* A tab is always appended */
10400 boot_instance_info[boot_instance_index - 1] = '\0';
10401 str = boot_instance_info;
10402
10403 while ((curr_str = strsep(&str, "\t"))) {
10404
10405 tok = strsep(&curr_str, ",");
10406
10407 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10408 do_allocate_snapshot(tok);
10409
10410 tr = trace_array_get_by_name(tok);
10411 if (!tr) {
10412 pr_warn("Failed to create instance buffer %s\n", curr_str);
10413 continue;
10414 }
10415 /* Allow user space to delete it */
10416 trace_array_put(tr);
10417
10418 while ((tok = strsep(&curr_str, ","))) {
10419 early_enable_events(tr, tok, true);
10420 }
10421 }
10422 }
10423
tracer_alloc_buffers(void)10424 __init static int tracer_alloc_buffers(void)
10425 {
10426 int ring_buf_size;
10427 int ret = -ENOMEM;
10428
10429
10430 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10431 pr_warn("Tracing disabled due to lockdown\n");
10432 return -EPERM;
10433 }
10434
10435 /*
10436 * Make sure we don't accidentally add more trace options
10437 * than we have bits for.
10438 */
10439 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10440
10441 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10442 goto out;
10443
10444 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10445 goto out_free_buffer_mask;
10446
10447 /* Only allocate trace_printk buffers if a trace_printk exists */
10448 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10449 /* Must be called before global_trace.buffer is allocated */
10450 trace_printk_init_buffers();
10451
10452 /* To save memory, keep the ring buffer size to its minimum */
10453 if (ring_buffer_expanded)
10454 ring_buf_size = trace_buf_size;
10455 else
10456 ring_buf_size = 1;
10457
10458 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10459 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10460
10461 raw_spin_lock_init(&global_trace.start_lock);
10462
10463 /*
10464 * The prepare callbacks allocates some memory for the ring buffer. We
10465 * don't free the buffer if the CPU goes down. If we were to free
10466 * the buffer, then the user would lose any trace that was in the
10467 * buffer. The memory will be removed once the "instance" is removed.
10468 */
10469 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10470 "trace/RB:prepare", trace_rb_cpu_prepare,
10471 NULL);
10472 if (ret < 0)
10473 goto out_free_cpumask;
10474 /* Used for event triggers */
10475 ret = -ENOMEM;
10476 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10477 if (!temp_buffer)
10478 goto out_rm_hp_state;
10479
10480 if (trace_create_savedcmd() < 0)
10481 goto out_free_temp_buffer;
10482
10483 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10484 goto out_free_savedcmd;
10485
10486 /* TODO: make the number of buffers hot pluggable with CPUS */
10487 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10488 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10489 goto out_free_pipe_cpumask;
10490 }
10491 if (global_trace.buffer_disabled)
10492 tracing_off();
10493
10494 if (trace_boot_clock) {
10495 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10496 if (ret < 0)
10497 pr_warn("Trace clock %s not defined, going back to default\n",
10498 trace_boot_clock);
10499 }
10500
10501 /*
10502 * register_tracer() might reference current_trace, so it
10503 * needs to be set before we register anything. This is
10504 * just a bootstrap of current_trace anyway.
10505 */
10506 global_trace.current_trace = &nop_trace;
10507
10508 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10509
10510 ftrace_init_global_array_ops(&global_trace);
10511
10512 init_trace_flags_index(&global_trace);
10513
10514 register_tracer(&nop_trace);
10515
10516 /* Function tracing may start here (via kernel command line) */
10517 init_function_trace();
10518
10519 /* All seems OK, enable tracing */
10520 tracing_disabled = 0;
10521
10522 atomic_notifier_chain_register(&panic_notifier_list,
10523 &trace_panic_notifier);
10524
10525 register_die_notifier(&trace_die_notifier);
10526
10527 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10528
10529 INIT_LIST_HEAD(&global_trace.systems);
10530 INIT_LIST_HEAD(&global_trace.events);
10531 INIT_LIST_HEAD(&global_trace.hist_vars);
10532 INIT_LIST_HEAD(&global_trace.err_log);
10533 list_add(&global_trace.list, &ftrace_trace_arrays);
10534
10535 apply_trace_boot_options();
10536
10537 register_snapshot_cmd();
10538
10539 test_can_verify();
10540
10541 return 0;
10542
10543 out_free_pipe_cpumask:
10544 free_cpumask_var(global_trace.pipe_cpumask);
10545 out_free_savedcmd:
10546 free_saved_cmdlines_buffer(savedcmd);
10547 out_free_temp_buffer:
10548 ring_buffer_free(temp_buffer);
10549 out_rm_hp_state:
10550 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10551 out_free_cpumask:
10552 free_cpumask_var(global_trace.tracing_cpumask);
10553 out_free_buffer_mask:
10554 free_cpumask_var(tracing_buffer_mask);
10555 out:
10556 return ret;
10557 }
10558
ftrace_boot_snapshot(void)10559 void __init ftrace_boot_snapshot(void)
10560 {
10561 #ifdef CONFIG_TRACER_MAX_TRACE
10562 struct trace_array *tr;
10563
10564 if (!snapshot_at_boot)
10565 return;
10566
10567 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10568 if (!tr->allocated_snapshot)
10569 continue;
10570
10571 tracing_snapshot_instance(tr);
10572 trace_array_puts(tr, "** Boot snapshot taken **\n");
10573 }
10574 #endif
10575 }
10576
early_trace_init(void)10577 void __init early_trace_init(void)
10578 {
10579 if (tracepoint_printk) {
10580 tracepoint_print_iter =
10581 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10582 if (MEM_FAIL(!tracepoint_print_iter,
10583 "Failed to allocate trace iterator\n"))
10584 tracepoint_printk = 0;
10585 else
10586 static_key_enable(&tracepoint_printk_key.key);
10587 }
10588 tracer_alloc_buffers();
10589
10590 init_events();
10591 }
10592
trace_init(void)10593 void __init trace_init(void)
10594 {
10595 trace_event_init();
10596
10597 if (boot_instance_index)
10598 enable_instances();
10599 }
10600
clear_boot_tracer(void)10601 __init static void clear_boot_tracer(void)
10602 {
10603 /*
10604 * The default tracer at boot buffer is an init section.
10605 * This function is called in lateinit. If we did not
10606 * find the boot tracer, then clear it out, to prevent
10607 * later registration from accessing the buffer that is
10608 * about to be freed.
10609 */
10610 if (!default_bootup_tracer)
10611 return;
10612
10613 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10614 default_bootup_tracer);
10615 default_bootup_tracer = NULL;
10616 }
10617
10618 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10619 __init static void tracing_set_default_clock(void)
10620 {
10621 /* sched_clock_stable() is determined in late_initcall */
10622 if (!trace_boot_clock && !sched_clock_stable()) {
10623 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10624 pr_warn("Can not set tracing clock due to lockdown\n");
10625 return;
10626 }
10627
10628 printk(KERN_WARNING
10629 "Unstable clock detected, switching default tracing clock to \"global\"\n"
10630 "If you want to keep using the local clock, then add:\n"
10631 " \"trace_clock=local\"\n"
10632 "on the kernel command line\n");
10633 tracing_set_clock(&global_trace, "global");
10634 }
10635 }
10636 #else
tracing_set_default_clock(void)10637 static inline void tracing_set_default_clock(void) { }
10638 #endif
10639
late_trace_init(void)10640 __init static int late_trace_init(void)
10641 {
10642 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10643 static_key_disable(&tracepoint_printk_key.key);
10644 tracepoint_printk = 0;
10645 }
10646
10647 tracing_set_default_clock();
10648 clear_boot_tracer();
10649 return 0;
10650 }
10651
10652 late_initcall_sync(late_trace_init);
10653