xref: /openbmc/linux/kernel/trace/trace.c (revision fd589a8f)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/ring_buffer.h>
15 #include <linux/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/smp_lock.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/ctype.h>
36 #include <linux/init.h>
37 #include <linux/poll.h>
38 #include <linux/gfp.h>
39 #include <linux/fs.h>
40 
41 #include "trace.h"
42 #include "trace_output.h"
43 
44 #define TRACE_BUFFER_FLAGS	(RB_FL_OVERWRITE)
45 
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 int ring_buffer_expanded;
51 
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60 
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65 
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68 	{ }
69 };
70 
71 static struct tracer_flags dummy_tracer_flags = {
72 	.val = 0,
73 	.opts = dummy_tracer_opt
74 };
75 
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78 	return 0;
79 }
80 
81 /*
82  * Kill all tracing for good (never come back).
83  * It is initialized to 1 but will turn to zero if the initialization
84  * of the tracer is successful. But that is the only place that sets
85  * this back to zero.
86  */
87 static int tracing_disabled = 1;
88 
89 DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
90 
91 static inline void ftrace_disable_cpu(void)
92 {
93 	preempt_disable();
94 	local_inc(&__get_cpu_var(ftrace_cpu_disabled));
95 }
96 
97 static inline void ftrace_enable_cpu(void)
98 {
99 	local_dec(&__get_cpu_var(ftrace_cpu_disabled));
100 	preempt_enable();
101 }
102 
103 static cpumask_var_t __read_mostly	tracing_buffer_mask;
104 
105 /* Define which cpu buffers are currently read in trace_pipe */
106 static cpumask_var_t			tracing_reader_cpumask;
107 
108 #define for_each_tracing_cpu(cpu)	\
109 	for_each_cpu(cpu, tracing_buffer_mask)
110 
111 /*
112  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
113  *
114  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
115  * is set, then ftrace_dump is called. This will output the contents
116  * of the ftrace buffers to the console.  This is very useful for
117  * capturing traces that lead to crashes and outputing it to a
118  * serial console.
119  *
120  * It is default off, but you can enable it with either specifying
121  * "ftrace_dump_on_oops" in the kernel command line, or setting
122  * /proc/sys/kernel/ftrace_dump_on_oops to true.
123  */
124 int ftrace_dump_on_oops;
125 
126 static int tracing_set_tracer(const char *buf);
127 
128 #define BOOTUP_TRACER_SIZE		100
129 static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
130 static char *default_bootup_tracer;
131 
132 static int __init set_ftrace(char *str)
133 {
134 	strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
135 	default_bootup_tracer = bootup_tracer_buf;
136 	/* We are using ftrace early, expand it */
137 	ring_buffer_expanded = 1;
138 	return 1;
139 }
140 __setup("ftrace=", set_ftrace);
141 
142 static int __init set_ftrace_dump_on_oops(char *str)
143 {
144 	ftrace_dump_on_oops = 1;
145 	return 1;
146 }
147 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
148 
149 unsigned long long ns2usecs(cycle_t nsec)
150 {
151 	nsec += 500;
152 	do_div(nsec, 1000);
153 	return nsec;
154 }
155 
156 /*
157  * The global_trace is the descriptor that holds the tracing
158  * buffers for the live tracing. For each CPU, it contains
159  * a link list of pages that will store trace entries. The
160  * page descriptor of the pages in the memory is used to hold
161  * the link list by linking the lru item in the page descriptor
162  * to each of the pages in the buffer per CPU.
163  *
164  * For each active CPU there is a data field that holds the
165  * pages for the buffer for that CPU. Each CPU has the same number
166  * of pages allocated for its buffer.
167  */
168 static struct trace_array	global_trace;
169 
170 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
171 
172 int filter_current_check_discard(struct ring_buffer *buffer,
173 				 struct ftrace_event_call *call, void *rec,
174 				 struct ring_buffer_event *event)
175 {
176 	return filter_check_discard(call, rec, buffer, event);
177 }
178 EXPORT_SYMBOL_GPL(filter_current_check_discard);
179 
180 cycle_t ftrace_now(int cpu)
181 {
182 	u64 ts;
183 
184 	/* Early boot up does not have a buffer yet */
185 	if (!global_trace.buffer)
186 		return trace_clock_local();
187 
188 	ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
189 	ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
190 
191 	return ts;
192 }
193 
194 /*
195  * The max_tr is used to snapshot the global_trace when a maximum
196  * latency is reached. Some tracers will use this to store a maximum
197  * trace while it continues examining live traces.
198  *
199  * The buffers for the max_tr are set up the same as the global_trace.
200  * When a snapshot is taken, the link list of the max_tr is swapped
201  * with the link list of the global_trace and the buffers are reset for
202  * the global_trace so the tracing can continue.
203  */
204 static struct trace_array	max_tr;
205 
206 static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
207 
208 /* tracer_enabled is used to toggle activation of a tracer */
209 static int			tracer_enabled = 1;
210 
211 /**
212  * tracing_is_enabled - return tracer_enabled status
213  *
214  * This function is used by other tracers to know the status
215  * of the tracer_enabled flag.  Tracers may use this function
216  * to know if it should enable their features when starting
217  * up. See irqsoff tracer for an example (start_irqsoff_tracer).
218  */
219 int tracing_is_enabled(void)
220 {
221 	return tracer_enabled;
222 }
223 
224 /*
225  * trace_buf_size is the size in bytes that is allocated
226  * for a buffer. Note, the number of bytes is always rounded
227  * to page size.
228  *
229  * This number is purposely set to a low number of 16384.
230  * If the dump on oops happens, it will be much appreciated
231  * to not have to wait for all that output. Anyway this can be
232  * boot time and run time configurable.
233  */
234 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
235 
236 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
237 
238 /* trace_types holds a link list of available tracers. */
239 static struct tracer		*trace_types __read_mostly;
240 
241 /* current_trace points to the tracer that is currently active */
242 static struct tracer		*current_trace __read_mostly;
243 
244 /*
245  * max_tracer_type_len is used to simplify the allocating of
246  * buffers to read userspace tracer names. We keep track of
247  * the longest tracer name registered.
248  */
249 static int			max_tracer_type_len;
250 
251 /*
252  * trace_types_lock is used to protect the trace_types list.
253  * This lock is also used to keep user access serialized.
254  * Accesses from userspace will grab this lock while userspace
255  * activities happen inside the kernel.
256  */
257 static DEFINE_MUTEX(trace_types_lock);
258 
259 /* trace_wait is a waitqueue for tasks blocked on trace_poll */
260 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
261 
262 /* trace_flags holds trace_options default values */
263 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
264 	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
265 	TRACE_ITER_GRAPH_TIME;
266 
267 static int trace_stop_count;
268 static DEFINE_SPINLOCK(tracing_start_lock);
269 
270 /**
271  * trace_wake_up - wake up tasks waiting for trace input
272  *
273  * Simply wakes up any task that is blocked on the trace_wait
274  * queue. These is used with trace_poll for tasks polling the trace.
275  */
276 void trace_wake_up(void)
277 {
278 	/*
279 	 * The runqueue_is_locked() can fail, but this is the best we
280 	 * have for now:
281 	 */
282 	if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
283 		wake_up(&trace_wait);
284 }
285 
286 static int __init set_buf_size(char *str)
287 {
288 	unsigned long buf_size;
289 
290 	if (!str)
291 		return 0;
292 	buf_size = memparse(str, &str);
293 	/* nr_entries can not be zero */
294 	if (buf_size == 0)
295 		return 0;
296 	trace_buf_size = buf_size;
297 	return 1;
298 }
299 __setup("trace_buf_size=", set_buf_size);
300 
301 unsigned long nsecs_to_usecs(unsigned long nsecs)
302 {
303 	return nsecs / 1000;
304 }
305 
306 /* These must match the bit postions in trace_iterator_flags */
307 static const char *trace_options[] = {
308 	"print-parent",
309 	"sym-offset",
310 	"sym-addr",
311 	"verbose",
312 	"raw",
313 	"hex",
314 	"bin",
315 	"block",
316 	"stacktrace",
317 	"sched-tree",
318 	"trace_printk",
319 	"ftrace_preempt",
320 	"branch",
321 	"annotate",
322 	"userstacktrace",
323 	"sym-userobj",
324 	"printk-msg-only",
325 	"context-info",
326 	"latency-format",
327 	"sleep-time",
328 	"graph-time",
329 	NULL
330 };
331 
332 static struct {
333 	u64 (*func)(void);
334 	const char *name;
335 } trace_clocks[] = {
336 	{ trace_clock_local,	"local" },
337 	{ trace_clock_global,	"global" },
338 };
339 
340 int trace_clock_id;
341 
342 /*
343  * trace_parser_get_init - gets the buffer for trace parser
344  */
345 int trace_parser_get_init(struct trace_parser *parser, int size)
346 {
347 	memset(parser, 0, sizeof(*parser));
348 
349 	parser->buffer = kmalloc(size, GFP_KERNEL);
350 	if (!parser->buffer)
351 		return 1;
352 
353 	parser->size = size;
354 	return 0;
355 }
356 
357 /*
358  * trace_parser_put - frees the buffer for trace parser
359  */
360 void trace_parser_put(struct trace_parser *parser)
361 {
362 	kfree(parser->buffer);
363 }
364 
365 /*
366  * trace_get_user - reads the user input string separated by  space
367  * (matched by isspace(ch))
368  *
369  * For each string found the 'struct trace_parser' is updated,
370  * and the function returns.
371  *
372  * Returns number of bytes read.
373  *
374  * See kernel/trace/trace.h for 'struct trace_parser' details.
375  */
376 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
377 	size_t cnt, loff_t *ppos)
378 {
379 	char ch;
380 	size_t read = 0;
381 	ssize_t ret;
382 
383 	if (!*ppos)
384 		trace_parser_clear(parser);
385 
386 	ret = get_user(ch, ubuf++);
387 	if (ret)
388 		goto out;
389 
390 	read++;
391 	cnt--;
392 
393 	/*
394 	 * The parser is not finished with the last write,
395 	 * continue reading the user input without skipping spaces.
396 	 */
397 	if (!parser->cont) {
398 		/* skip white space */
399 		while (cnt && isspace(ch)) {
400 			ret = get_user(ch, ubuf++);
401 			if (ret)
402 				goto out;
403 			read++;
404 			cnt--;
405 		}
406 
407 		/* only spaces were written */
408 		if (isspace(ch)) {
409 			*ppos += read;
410 			ret = read;
411 			goto out;
412 		}
413 
414 		parser->idx = 0;
415 	}
416 
417 	/* read the non-space input */
418 	while (cnt && !isspace(ch)) {
419 		if (parser->idx < parser->size)
420 			parser->buffer[parser->idx++] = ch;
421 		else {
422 			ret = -EINVAL;
423 			goto out;
424 		}
425 		ret = get_user(ch, ubuf++);
426 		if (ret)
427 			goto out;
428 		read++;
429 		cnt--;
430 	}
431 
432 	/* We either got finished input or we have to wait for another call. */
433 	if (isspace(ch)) {
434 		parser->buffer[parser->idx] = 0;
435 		parser->cont = false;
436 	} else {
437 		parser->cont = true;
438 		parser->buffer[parser->idx++] = ch;
439 	}
440 
441 	*ppos += read;
442 	ret = read;
443 
444 out:
445 	return ret;
446 }
447 
448 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
449 {
450 	int len;
451 	int ret;
452 
453 	if (!cnt)
454 		return 0;
455 
456 	if (s->len <= s->readpos)
457 		return -EBUSY;
458 
459 	len = s->len - s->readpos;
460 	if (cnt > len)
461 		cnt = len;
462 	ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
463 	if (ret == cnt)
464 		return -EFAULT;
465 
466 	cnt -= ret;
467 
468 	s->readpos += cnt;
469 	return cnt;
470 }
471 
472 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
473 {
474 	int len;
475 	void *ret;
476 
477 	if (s->len <= s->readpos)
478 		return -EBUSY;
479 
480 	len = s->len - s->readpos;
481 	if (cnt > len)
482 		cnt = len;
483 	ret = memcpy(buf, s->buffer + s->readpos, cnt);
484 	if (!ret)
485 		return -EFAULT;
486 
487 	s->readpos += cnt;
488 	return cnt;
489 }
490 
491 /*
492  * ftrace_max_lock is used to protect the swapping of buffers
493  * when taking a max snapshot. The buffers themselves are
494  * protected by per_cpu spinlocks. But the action of the swap
495  * needs its own lock.
496  *
497  * This is defined as a raw_spinlock_t in order to help
498  * with performance when lockdep debugging is enabled.
499  *
500  * It is also used in other places outside the update_max_tr
501  * so it needs to be defined outside of the
502  * CONFIG_TRACER_MAX_TRACE.
503  */
504 static raw_spinlock_t ftrace_max_lock =
505 	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
506 
507 #ifdef CONFIG_TRACER_MAX_TRACE
508 unsigned long __read_mostly	tracing_max_latency;
509 unsigned long __read_mostly	tracing_thresh;
510 
511 /*
512  * Copy the new maximum trace into the separate maximum-trace
513  * structure. (this way the maximum trace is permanently saved,
514  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
515  */
516 static void
517 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
518 {
519 	struct trace_array_cpu *data = tr->data[cpu];
520 	struct trace_array_cpu *max_data = tr->data[cpu];
521 
522 	max_tr.cpu = cpu;
523 	max_tr.time_start = data->preempt_timestamp;
524 
525 	max_data = max_tr.data[cpu];
526 	max_data->saved_latency = tracing_max_latency;
527 	max_data->critical_start = data->critical_start;
528 	max_data->critical_end = data->critical_end;
529 
530 	memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
531 	max_data->pid = tsk->pid;
532 	max_data->uid = task_uid(tsk);
533 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
534 	max_data->policy = tsk->policy;
535 	max_data->rt_priority = tsk->rt_priority;
536 
537 	/* record this tasks comm */
538 	tracing_record_cmdline(tsk);
539 }
540 
541 /**
542  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
543  * @tr: tracer
544  * @tsk: the task with the latency
545  * @cpu: The cpu that initiated the trace.
546  *
547  * Flip the buffers between the @tr and the max_tr and record information
548  * about which task was the cause of this latency.
549  */
550 void
551 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
552 {
553 	struct ring_buffer *buf = tr->buffer;
554 
555 	if (trace_stop_count)
556 		return;
557 
558 	WARN_ON_ONCE(!irqs_disabled());
559 	__raw_spin_lock(&ftrace_max_lock);
560 
561 	tr->buffer = max_tr.buffer;
562 	max_tr.buffer = buf;
563 
564 	__update_max_tr(tr, tsk, cpu);
565 	__raw_spin_unlock(&ftrace_max_lock);
566 }
567 
568 /**
569  * update_max_tr_single - only copy one trace over, and reset the rest
570  * @tr - tracer
571  * @tsk - task with the latency
572  * @cpu - the cpu of the buffer to copy.
573  *
574  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
575  */
576 void
577 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
578 {
579 	int ret;
580 
581 	if (trace_stop_count)
582 		return;
583 
584 	WARN_ON_ONCE(!irqs_disabled());
585 	__raw_spin_lock(&ftrace_max_lock);
586 
587 	ftrace_disable_cpu();
588 
589 	ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
590 
591 	if (ret == -EBUSY) {
592 		/*
593 		 * We failed to swap the buffer due to a commit taking
594 		 * place on this CPU. We fail to record, but we reset
595 		 * the max trace buffer (no one writes directly to it)
596 		 * and flag that it failed.
597 		 */
598 		trace_array_printk(&max_tr, _THIS_IP_,
599 			"Failed to swap buffers due to commit in progress\n");
600 	}
601 
602 	ftrace_enable_cpu();
603 
604 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
605 
606 	__update_max_tr(tr, tsk, cpu);
607 	__raw_spin_unlock(&ftrace_max_lock);
608 }
609 #endif /* CONFIG_TRACER_MAX_TRACE */
610 
611 /**
612  * register_tracer - register a tracer with the ftrace system.
613  * @type - the plugin for the tracer
614  *
615  * Register a new plugin tracer.
616  */
617 int register_tracer(struct tracer *type)
618 __releases(kernel_lock)
619 __acquires(kernel_lock)
620 {
621 	struct tracer *t;
622 	int len;
623 	int ret = 0;
624 
625 	if (!type->name) {
626 		pr_info("Tracer must have a name\n");
627 		return -1;
628 	}
629 
630 	/*
631 	 * When this gets called we hold the BKL which means that
632 	 * preemption is disabled. Various trace selftests however
633 	 * need to disable and enable preemption for successful tests.
634 	 * So we drop the BKL here and grab it after the tests again.
635 	 */
636 	unlock_kernel();
637 	mutex_lock(&trace_types_lock);
638 
639 	tracing_selftest_running = true;
640 
641 	for (t = trace_types; t; t = t->next) {
642 		if (strcmp(type->name, t->name) == 0) {
643 			/* already found */
644 			pr_info("Trace %s already registered\n",
645 				type->name);
646 			ret = -1;
647 			goto out;
648 		}
649 	}
650 
651 	if (!type->set_flag)
652 		type->set_flag = &dummy_set_flag;
653 	if (!type->flags)
654 		type->flags = &dummy_tracer_flags;
655 	else
656 		if (!type->flags->opts)
657 			type->flags->opts = dummy_tracer_opt;
658 	if (!type->wait_pipe)
659 		type->wait_pipe = default_wait_pipe;
660 
661 
662 #ifdef CONFIG_FTRACE_STARTUP_TEST
663 	if (type->selftest && !tracing_selftest_disabled) {
664 		struct tracer *saved_tracer = current_trace;
665 		struct trace_array *tr = &global_trace;
666 
667 		/*
668 		 * Run a selftest on this tracer.
669 		 * Here we reset the trace buffer, and set the current
670 		 * tracer to be this tracer. The tracer can then run some
671 		 * internal tracing to verify that everything is in order.
672 		 * If we fail, we do not register this tracer.
673 		 */
674 		tracing_reset_online_cpus(tr);
675 
676 		current_trace = type;
677 		/* the test is responsible for initializing and enabling */
678 		pr_info("Testing tracer %s: ", type->name);
679 		ret = type->selftest(type, tr);
680 		/* the test is responsible for resetting too */
681 		current_trace = saved_tracer;
682 		if (ret) {
683 			printk(KERN_CONT "FAILED!\n");
684 			goto out;
685 		}
686 		/* Only reset on passing, to avoid touching corrupted buffers */
687 		tracing_reset_online_cpus(tr);
688 
689 		printk(KERN_CONT "PASSED\n");
690 	}
691 #endif
692 
693 	type->next = trace_types;
694 	trace_types = type;
695 	len = strlen(type->name);
696 	if (len > max_tracer_type_len)
697 		max_tracer_type_len = len;
698 
699  out:
700 	tracing_selftest_running = false;
701 	mutex_unlock(&trace_types_lock);
702 
703 	if (ret || !default_bootup_tracer)
704 		goto out_unlock;
705 
706 	if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE))
707 		goto out_unlock;
708 
709 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
710 	/* Do we want this tracer to start on bootup? */
711 	tracing_set_tracer(type->name);
712 	default_bootup_tracer = NULL;
713 	/* disable other selftests, since this will break it. */
714 	tracing_selftest_disabled = 1;
715 #ifdef CONFIG_FTRACE_STARTUP_TEST
716 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
717 	       type->name);
718 #endif
719 
720  out_unlock:
721 	lock_kernel();
722 	return ret;
723 }
724 
725 void unregister_tracer(struct tracer *type)
726 {
727 	struct tracer **t;
728 	int len;
729 
730 	mutex_lock(&trace_types_lock);
731 	for (t = &trace_types; *t; t = &(*t)->next) {
732 		if (*t == type)
733 			goto found;
734 	}
735 	pr_info("Trace %s not registered\n", type->name);
736 	goto out;
737 
738  found:
739 	*t = (*t)->next;
740 
741 	if (type == current_trace && tracer_enabled) {
742 		tracer_enabled = 0;
743 		tracing_stop();
744 		if (current_trace->stop)
745 			current_trace->stop(&global_trace);
746 		current_trace = &nop_trace;
747 	}
748 
749 	if (strlen(type->name) != max_tracer_type_len)
750 		goto out;
751 
752 	max_tracer_type_len = 0;
753 	for (t = &trace_types; *t; t = &(*t)->next) {
754 		len = strlen((*t)->name);
755 		if (len > max_tracer_type_len)
756 			max_tracer_type_len = len;
757 	}
758  out:
759 	mutex_unlock(&trace_types_lock);
760 }
761 
762 static void __tracing_reset(struct trace_array *tr, int cpu)
763 {
764 	ftrace_disable_cpu();
765 	ring_buffer_reset_cpu(tr->buffer, cpu);
766 	ftrace_enable_cpu();
767 }
768 
769 void tracing_reset(struct trace_array *tr, int cpu)
770 {
771 	struct ring_buffer *buffer = tr->buffer;
772 
773 	ring_buffer_record_disable(buffer);
774 
775 	/* Make sure all commits have finished */
776 	synchronize_sched();
777 	__tracing_reset(tr, cpu);
778 
779 	ring_buffer_record_enable(buffer);
780 }
781 
782 void tracing_reset_online_cpus(struct trace_array *tr)
783 {
784 	struct ring_buffer *buffer = tr->buffer;
785 	int cpu;
786 
787 	ring_buffer_record_disable(buffer);
788 
789 	/* Make sure all commits have finished */
790 	synchronize_sched();
791 
792 	tr->time_start = ftrace_now(tr->cpu);
793 
794 	for_each_online_cpu(cpu)
795 		__tracing_reset(tr, cpu);
796 
797 	ring_buffer_record_enable(buffer);
798 }
799 
800 void tracing_reset_current(int cpu)
801 {
802 	tracing_reset(&global_trace, cpu);
803 }
804 
805 void tracing_reset_current_online_cpus(void)
806 {
807 	tracing_reset_online_cpus(&global_trace);
808 }
809 
810 #define SAVED_CMDLINES 128
811 #define NO_CMDLINE_MAP UINT_MAX
812 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
813 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
814 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
815 static int cmdline_idx;
816 static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
817 
818 /* temporary disable recording */
819 static atomic_t trace_record_cmdline_disabled __read_mostly;
820 
821 static void trace_init_cmdlines(void)
822 {
823 	memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
824 	memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
825 	cmdline_idx = 0;
826 }
827 
828 int is_tracing_stopped(void)
829 {
830 	return trace_stop_count;
831 }
832 
833 /**
834  * ftrace_off_permanent - disable all ftrace code permanently
835  *
836  * This should only be called when a serious anomally has
837  * been detected.  This will turn off the function tracing,
838  * ring buffers, and other tracing utilites. It takes no
839  * locks and can be called from any context.
840  */
841 void ftrace_off_permanent(void)
842 {
843 	tracing_disabled = 1;
844 	ftrace_stop();
845 	tracing_off_permanent();
846 }
847 
848 /**
849  * tracing_start - quick start of the tracer
850  *
851  * If tracing is enabled but was stopped by tracing_stop,
852  * this will start the tracer back up.
853  */
854 void tracing_start(void)
855 {
856 	struct ring_buffer *buffer;
857 	unsigned long flags;
858 
859 	if (tracing_disabled)
860 		return;
861 
862 	spin_lock_irqsave(&tracing_start_lock, flags);
863 	if (--trace_stop_count) {
864 		if (trace_stop_count < 0) {
865 			/* Someone screwed up their debugging */
866 			WARN_ON_ONCE(1);
867 			trace_stop_count = 0;
868 		}
869 		goto out;
870 	}
871 
872 
873 	buffer = global_trace.buffer;
874 	if (buffer)
875 		ring_buffer_record_enable(buffer);
876 
877 	buffer = max_tr.buffer;
878 	if (buffer)
879 		ring_buffer_record_enable(buffer);
880 
881 	ftrace_start();
882  out:
883 	spin_unlock_irqrestore(&tracing_start_lock, flags);
884 }
885 
886 /**
887  * tracing_stop - quick stop of the tracer
888  *
889  * Light weight way to stop tracing. Use in conjunction with
890  * tracing_start.
891  */
892 void tracing_stop(void)
893 {
894 	struct ring_buffer *buffer;
895 	unsigned long flags;
896 
897 	ftrace_stop();
898 	spin_lock_irqsave(&tracing_start_lock, flags);
899 	if (trace_stop_count++)
900 		goto out;
901 
902 	buffer = global_trace.buffer;
903 	if (buffer)
904 		ring_buffer_record_disable(buffer);
905 
906 	buffer = max_tr.buffer;
907 	if (buffer)
908 		ring_buffer_record_disable(buffer);
909 
910  out:
911 	spin_unlock_irqrestore(&tracing_start_lock, flags);
912 }
913 
914 void trace_stop_cmdline_recording(void);
915 
916 static void trace_save_cmdline(struct task_struct *tsk)
917 {
918 	unsigned pid, idx;
919 
920 	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
921 		return;
922 
923 	/*
924 	 * It's not the end of the world if we don't get
925 	 * the lock, but we also don't want to spin
926 	 * nor do we want to disable interrupts,
927 	 * so if we miss here, then better luck next time.
928 	 */
929 	if (!__raw_spin_trylock(&trace_cmdline_lock))
930 		return;
931 
932 	idx = map_pid_to_cmdline[tsk->pid];
933 	if (idx == NO_CMDLINE_MAP) {
934 		idx = (cmdline_idx + 1) % SAVED_CMDLINES;
935 
936 		/*
937 		 * Check whether the cmdline buffer at idx has a pid
938 		 * mapped. We are going to overwrite that entry so we
939 		 * need to clear the map_pid_to_cmdline. Otherwise we
940 		 * would read the new comm for the old pid.
941 		 */
942 		pid = map_cmdline_to_pid[idx];
943 		if (pid != NO_CMDLINE_MAP)
944 			map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
945 
946 		map_cmdline_to_pid[idx] = tsk->pid;
947 		map_pid_to_cmdline[tsk->pid] = idx;
948 
949 		cmdline_idx = idx;
950 	}
951 
952 	memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
953 
954 	__raw_spin_unlock(&trace_cmdline_lock);
955 }
956 
957 void trace_find_cmdline(int pid, char comm[])
958 {
959 	unsigned map;
960 
961 	if (!pid) {
962 		strcpy(comm, "<idle>");
963 		return;
964 	}
965 
966 	if (pid > PID_MAX_DEFAULT) {
967 		strcpy(comm, "<...>");
968 		return;
969 	}
970 
971 	preempt_disable();
972 	__raw_spin_lock(&trace_cmdline_lock);
973 	map = map_pid_to_cmdline[pid];
974 	if (map != NO_CMDLINE_MAP)
975 		strcpy(comm, saved_cmdlines[map]);
976 	else
977 		strcpy(comm, "<...>");
978 
979 	__raw_spin_unlock(&trace_cmdline_lock);
980 	preempt_enable();
981 }
982 
983 void tracing_record_cmdline(struct task_struct *tsk)
984 {
985 	if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled ||
986 	    !tracing_is_on())
987 		return;
988 
989 	trace_save_cmdline(tsk);
990 }
991 
992 void
993 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
994 			     int pc)
995 {
996 	struct task_struct *tsk = current;
997 
998 	entry->preempt_count		= pc & 0xff;
999 	entry->pid			= (tsk) ? tsk->pid : 0;
1000 	entry->lock_depth		= (tsk) ? tsk->lock_depth : 0;
1001 	entry->flags =
1002 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1003 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1004 #else
1005 		TRACE_FLAG_IRQS_NOSUPPORT |
1006 #endif
1007 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1008 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1009 		(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
1010 }
1011 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1012 
1013 struct ring_buffer_event *
1014 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1015 			  int type,
1016 			  unsigned long len,
1017 			  unsigned long flags, int pc)
1018 {
1019 	struct ring_buffer_event *event;
1020 
1021 	event = ring_buffer_lock_reserve(buffer, len);
1022 	if (event != NULL) {
1023 		struct trace_entry *ent = ring_buffer_event_data(event);
1024 
1025 		tracing_generic_entry_update(ent, flags, pc);
1026 		ent->type = type;
1027 	}
1028 
1029 	return event;
1030 }
1031 
1032 static inline void
1033 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1034 			     struct ring_buffer_event *event,
1035 			     unsigned long flags, int pc,
1036 			     int wake)
1037 {
1038 	ring_buffer_unlock_commit(buffer, event);
1039 
1040 	ftrace_trace_stack(buffer, flags, 6, pc);
1041 	ftrace_trace_userstack(buffer, flags, pc);
1042 
1043 	if (wake)
1044 		trace_wake_up();
1045 }
1046 
1047 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1048 				struct ring_buffer_event *event,
1049 				unsigned long flags, int pc)
1050 {
1051 	__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
1052 }
1053 
1054 struct ring_buffer_event *
1055 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1056 				  int type, unsigned long len,
1057 				  unsigned long flags, int pc)
1058 {
1059 	*current_rb = global_trace.buffer;
1060 	return trace_buffer_lock_reserve(*current_rb,
1061 					 type, len, flags, pc);
1062 }
1063 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1064 
1065 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1066 					struct ring_buffer_event *event,
1067 					unsigned long flags, int pc)
1068 {
1069 	__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
1070 }
1071 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1072 
1073 void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
1074 				       struct ring_buffer_event *event,
1075 				       unsigned long flags, int pc)
1076 {
1077 	__trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
1078 }
1079 EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
1080 
1081 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1082 					 struct ring_buffer_event *event)
1083 {
1084 	ring_buffer_discard_commit(buffer, event);
1085 }
1086 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1087 
1088 void
1089 trace_function(struct trace_array *tr,
1090 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
1091 	       int pc)
1092 {
1093 	struct ftrace_event_call *call = &event_function;
1094 	struct ring_buffer *buffer = tr->buffer;
1095 	struct ring_buffer_event *event;
1096 	struct ftrace_entry *entry;
1097 
1098 	/* If we are reading the ring buffer, don't trace */
1099 	if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
1100 		return;
1101 
1102 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1103 					  flags, pc);
1104 	if (!event)
1105 		return;
1106 	entry	= ring_buffer_event_data(event);
1107 	entry->ip			= ip;
1108 	entry->parent_ip		= parent_ip;
1109 
1110 	if (!filter_check_discard(call, entry, buffer, event))
1111 		ring_buffer_unlock_commit(buffer, event);
1112 }
1113 
1114 void
1115 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1116        unsigned long ip, unsigned long parent_ip, unsigned long flags,
1117        int pc)
1118 {
1119 	if (likely(!atomic_read(&data->disabled)))
1120 		trace_function(tr, ip, parent_ip, flags, pc);
1121 }
1122 
1123 #ifdef CONFIG_STACKTRACE
1124 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1125 				 unsigned long flags,
1126 				 int skip, int pc)
1127 {
1128 	struct ftrace_event_call *call = &event_kernel_stack;
1129 	struct ring_buffer_event *event;
1130 	struct stack_entry *entry;
1131 	struct stack_trace trace;
1132 
1133 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1134 					  sizeof(*entry), flags, pc);
1135 	if (!event)
1136 		return;
1137 	entry	= ring_buffer_event_data(event);
1138 	memset(&entry->caller, 0, sizeof(entry->caller));
1139 
1140 	trace.nr_entries	= 0;
1141 	trace.max_entries	= FTRACE_STACK_ENTRIES;
1142 	trace.skip		= skip;
1143 	trace.entries		= entry->caller;
1144 
1145 	save_stack_trace(&trace);
1146 	if (!filter_check_discard(call, entry, buffer, event))
1147 		ring_buffer_unlock_commit(buffer, event);
1148 }
1149 
1150 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1151 			int skip, int pc)
1152 {
1153 	if (!(trace_flags & TRACE_ITER_STACKTRACE))
1154 		return;
1155 
1156 	__ftrace_trace_stack(buffer, flags, skip, pc);
1157 }
1158 
1159 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1160 		   int pc)
1161 {
1162 	__ftrace_trace_stack(tr->buffer, flags, skip, pc);
1163 }
1164 
1165 void
1166 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1167 {
1168 	struct ftrace_event_call *call = &event_user_stack;
1169 	struct ring_buffer_event *event;
1170 	struct userstack_entry *entry;
1171 	struct stack_trace trace;
1172 
1173 	if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1174 		return;
1175 
1176 	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1177 					  sizeof(*entry), flags, pc);
1178 	if (!event)
1179 		return;
1180 	entry	= ring_buffer_event_data(event);
1181 
1182 	entry->tgid		= current->tgid;
1183 	memset(&entry->caller, 0, sizeof(entry->caller));
1184 
1185 	trace.nr_entries	= 0;
1186 	trace.max_entries	= FTRACE_STACK_ENTRIES;
1187 	trace.skip		= 0;
1188 	trace.entries		= entry->caller;
1189 
1190 	save_stack_trace_user(&trace);
1191 	if (!filter_check_discard(call, entry, buffer, event))
1192 		ring_buffer_unlock_commit(buffer, event);
1193 }
1194 
1195 #ifdef UNUSED
1196 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1197 {
1198 	ftrace_trace_userstack(tr, flags, preempt_count());
1199 }
1200 #endif /* UNUSED */
1201 
1202 #endif /* CONFIG_STACKTRACE */
1203 
1204 static void
1205 ftrace_trace_special(void *__tr,
1206 		     unsigned long arg1, unsigned long arg2, unsigned long arg3,
1207 		     int pc)
1208 {
1209 	struct ftrace_event_call *call = &event_special;
1210 	struct ring_buffer_event *event;
1211 	struct trace_array *tr = __tr;
1212 	struct ring_buffer *buffer = tr->buffer;
1213 	struct special_entry *entry;
1214 
1215 	event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
1216 					  sizeof(*entry), 0, pc);
1217 	if (!event)
1218 		return;
1219 	entry	= ring_buffer_event_data(event);
1220 	entry->arg1			= arg1;
1221 	entry->arg2			= arg2;
1222 	entry->arg3			= arg3;
1223 
1224 	if (!filter_check_discard(call, entry, buffer, event))
1225 		trace_buffer_unlock_commit(buffer, event, 0, pc);
1226 }
1227 
1228 void
1229 __trace_special(void *__tr, void *__data,
1230 		unsigned long arg1, unsigned long arg2, unsigned long arg3)
1231 {
1232 	ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
1233 }
1234 
1235 void
1236 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1237 {
1238 	struct trace_array *tr = &global_trace;
1239 	struct trace_array_cpu *data;
1240 	unsigned long flags;
1241 	int cpu;
1242 	int pc;
1243 
1244 	if (tracing_disabled)
1245 		return;
1246 
1247 	pc = preempt_count();
1248 	local_irq_save(flags);
1249 	cpu = raw_smp_processor_id();
1250 	data = tr->data[cpu];
1251 
1252 	if (likely(atomic_inc_return(&data->disabled) == 1))
1253 		ftrace_trace_special(tr, arg1, arg2, arg3, pc);
1254 
1255 	atomic_dec(&data->disabled);
1256 	local_irq_restore(flags);
1257 }
1258 
1259 /**
1260  * trace_vbprintk - write binary msg to tracing buffer
1261  *
1262  */
1263 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1264 {
1265 	static raw_spinlock_t trace_buf_lock =
1266 		(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
1267 	static u32 trace_buf[TRACE_BUF_SIZE];
1268 
1269 	struct ftrace_event_call *call = &event_bprint;
1270 	struct ring_buffer_event *event;
1271 	struct ring_buffer *buffer;
1272 	struct trace_array *tr = &global_trace;
1273 	struct trace_array_cpu *data;
1274 	struct bprint_entry *entry;
1275 	unsigned long flags;
1276 	int disable;
1277 	int resched;
1278 	int cpu, len = 0, size, pc;
1279 
1280 	if (unlikely(tracing_selftest_running || tracing_disabled))
1281 		return 0;
1282 
1283 	/* Don't pollute graph traces with trace_vprintk internals */
1284 	pause_graph_tracing();
1285 
1286 	pc = preempt_count();
1287 	resched = ftrace_preempt_disable();
1288 	cpu = raw_smp_processor_id();
1289 	data = tr->data[cpu];
1290 
1291 	disable = atomic_inc_return(&data->disabled);
1292 	if (unlikely(disable != 1))
1293 		goto out;
1294 
1295 	/* Lockdep uses trace_printk for lock tracing */
1296 	local_irq_save(flags);
1297 	__raw_spin_lock(&trace_buf_lock);
1298 	len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1299 
1300 	if (len > TRACE_BUF_SIZE || len < 0)
1301 		goto out_unlock;
1302 
1303 	size = sizeof(*entry) + sizeof(u32) * len;
1304 	buffer = tr->buffer;
1305 	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1306 					  flags, pc);
1307 	if (!event)
1308 		goto out_unlock;
1309 	entry = ring_buffer_event_data(event);
1310 	entry->ip			= ip;
1311 	entry->fmt			= fmt;
1312 
1313 	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1314 	if (!filter_check_discard(call, entry, buffer, event))
1315 		ring_buffer_unlock_commit(buffer, event);
1316 
1317 out_unlock:
1318 	__raw_spin_unlock(&trace_buf_lock);
1319 	local_irq_restore(flags);
1320 
1321 out:
1322 	atomic_dec_return(&data->disabled);
1323 	ftrace_preempt_enable(resched);
1324 	unpause_graph_tracing();
1325 
1326 	return len;
1327 }
1328 EXPORT_SYMBOL_GPL(trace_vbprintk);
1329 
1330 int trace_array_printk(struct trace_array *tr,
1331 		       unsigned long ip, const char *fmt, ...)
1332 {
1333 	int ret;
1334 	va_list ap;
1335 
1336 	if (!(trace_flags & TRACE_ITER_PRINTK))
1337 		return 0;
1338 
1339 	va_start(ap, fmt);
1340 	ret = trace_array_vprintk(tr, ip, fmt, ap);
1341 	va_end(ap);
1342 	return ret;
1343 }
1344 
1345 int trace_array_vprintk(struct trace_array *tr,
1346 			unsigned long ip, const char *fmt, va_list args)
1347 {
1348 	static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
1349 	static char trace_buf[TRACE_BUF_SIZE];
1350 
1351 	struct ftrace_event_call *call = &event_print;
1352 	struct ring_buffer_event *event;
1353 	struct ring_buffer *buffer;
1354 	struct trace_array_cpu *data;
1355 	int cpu, len = 0, size, pc;
1356 	struct print_entry *entry;
1357 	unsigned long irq_flags;
1358 	int disable;
1359 
1360 	if (tracing_disabled || tracing_selftest_running)
1361 		return 0;
1362 
1363 	pc = preempt_count();
1364 	preempt_disable_notrace();
1365 	cpu = raw_smp_processor_id();
1366 	data = tr->data[cpu];
1367 
1368 	disable = atomic_inc_return(&data->disabled);
1369 	if (unlikely(disable != 1))
1370 		goto out;
1371 
1372 	pause_graph_tracing();
1373 	raw_local_irq_save(irq_flags);
1374 	__raw_spin_lock(&trace_buf_lock);
1375 	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1376 
1377 	len = min(len, TRACE_BUF_SIZE-1);
1378 	trace_buf[len] = 0;
1379 
1380 	size = sizeof(*entry) + len + 1;
1381 	buffer = tr->buffer;
1382 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
1383 					  irq_flags, pc);
1384 	if (!event)
1385 		goto out_unlock;
1386 	entry = ring_buffer_event_data(event);
1387 	entry->ip			= ip;
1388 
1389 	memcpy(&entry->buf, trace_buf, len);
1390 	entry->buf[len] = 0;
1391 	if (!filter_check_discard(call, entry, buffer, event))
1392 		ring_buffer_unlock_commit(buffer, event);
1393 
1394  out_unlock:
1395 	__raw_spin_unlock(&trace_buf_lock);
1396 	raw_local_irq_restore(irq_flags);
1397 	unpause_graph_tracing();
1398  out:
1399 	atomic_dec_return(&data->disabled);
1400 	preempt_enable_notrace();
1401 
1402 	return len;
1403 }
1404 
1405 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1406 {
1407 	return trace_array_printk(&global_trace, ip, fmt, args);
1408 }
1409 EXPORT_SYMBOL_GPL(trace_vprintk);
1410 
1411 enum trace_file_type {
1412 	TRACE_FILE_LAT_FMT	= 1,
1413 	TRACE_FILE_ANNOTATE	= 2,
1414 };
1415 
1416 static void trace_iterator_increment(struct trace_iterator *iter)
1417 {
1418 	/* Don't allow ftrace to trace into the ring buffers */
1419 	ftrace_disable_cpu();
1420 
1421 	iter->idx++;
1422 	if (iter->buffer_iter[iter->cpu])
1423 		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1424 
1425 	ftrace_enable_cpu();
1426 }
1427 
1428 static struct trace_entry *
1429 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1430 {
1431 	struct ring_buffer_event *event;
1432 	struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1433 
1434 	/* Don't allow ftrace to trace into the ring buffers */
1435 	ftrace_disable_cpu();
1436 
1437 	if (buf_iter)
1438 		event = ring_buffer_iter_peek(buf_iter, ts);
1439 	else
1440 		event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
1441 
1442 	ftrace_enable_cpu();
1443 
1444 	return event ? ring_buffer_event_data(event) : NULL;
1445 }
1446 
1447 static struct trace_entry *
1448 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1449 {
1450 	struct ring_buffer *buffer = iter->tr->buffer;
1451 	struct trace_entry *ent, *next = NULL;
1452 	int cpu_file = iter->cpu_file;
1453 	u64 next_ts = 0, ts;
1454 	int next_cpu = -1;
1455 	int cpu;
1456 
1457 	/*
1458 	 * If we are in a per_cpu trace file, don't bother by iterating over
1459 	 * all cpu and peek directly.
1460 	 */
1461 	if (cpu_file > TRACE_PIPE_ALL_CPU) {
1462 		if (ring_buffer_empty_cpu(buffer, cpu_file))
1463 			return NULL;
1464 		ent = peek_next_entry(iter, cpu_file, ent_ts);
1465 		if (ent_cpu)
1466 			*ent_cpu = cpu_file;
1467 
1468 		return ent;
1469 	}
1470 
1471 	for_each_tracing_cpu(cpu) {
1472 
1473 		if (ring_buffer_empty_cpu(buffer, cpu))
1474 			continue;
1475 
1476 		ent = peek_next_entry(iter, cpu, &ts);
1477 
1478 		/*
1479 		 * Pick the entry with the smallest timestamp:
1480 		 */
1481 		if (ent && (!next || ts < next_ts)) {
1482 			next = ent;
1483 			next_cpu = cpu;
1484 			next_ts = ts;
1485 		}
1486 	}
1487 
1488 	if (ent_cpu)
1489 		*ent_cpu = next_cpu;
1490 
1491 	if (ent_ts)
1492 		*ent_ts = next_ts;
1493 
1494 	return next;
1495 }
1496 
1497 /* Find the next real entry, without updating the iterator itself */
1498 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1499 					  int *ent_cpu, u64 *ent_ts)
1500 {
1501 	return __find_next_entry(iter, ent_cpu, ent_ts);
1502 }
1503 
1504 /* Find the next real entry, and increment the iterator to the next entry */
1505 static void *find_next_entry_inc(struct trace_iterator *iter)
1506 {
1507 	iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
1508 
1509 	if (iter->ent)
1510 		trace_iterator_increment(iter);
1511 
1512 	return iter->ent ? iter : NULL;
1513 }
1514 
1515 static void trace_consume(struct trace_iterator *iter)
1516 {
1517 	/* Don't allow ftrace to trace into the ring buffers */
1518 	ftrace_disable_cpu();
1519 	ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
1520 	ftrace_enable_cpu();
1521 }
1522 
1523 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1524 {
1525 	struct trace_iterator *iter = m->private;
1526 	int i = (int)*pos;
1527 	void *ent;
1528 
1529 	(*pos)++;
1530 
1531 	/* can't go backwards */
1532 	if (iter->idx > i)
1533 		return NULL;
1534 
1535 	if (iter->idx < 0)
1536 		ent = find_next_entry_inc(iter);
1537 	else
1538 		ent = iter;
1539 
1540 	while (ent && iter->idx < i)
1541 		ent = find_next_entry_inc(iter);
1542 
1543 	iter->pos = *pos;
1544 
1545 	return ent;
1546 }
1547 
1548 static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1549 {
1550 	struct trace_array *tr = iter->tr;
1551 	struct ring_buffer_event *event;
1552 	struct ring_buffer_iter *buf_iter;
1553 	unsigned long entries = 0;
1554 	u64 ts;
1555 
1556 	tr->data[cpu]->skipped_entries = 0;
1557 
1558 	if (!iter->buffer_iter[cpu])
1559 		return;
1560 
1561 	buf_iter = iter->buffer_iter[cpu];
1562 	ring_buffer_iter_reset(buf_iter);
1563 
1564 	/*
1565 	 * We could have the case with the max latency tracers
1566 	 * that a reset never took place on a cpu. This is evident
1567 	 * by the timestamp being before the start of the buffer.
1568 	 */
1569 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
1570 		if (ts >= iter->tr->time_start)
1571 			break;
1572 		entries++;
1573 		ring_buffer_read(buf_iter, NULL);
1574 	}
1575 
1576 	tr->data[cpu]->skipped_entries = entries;
1577 }
1578 
1579 /*
1580  * No necessary locking here. The worst thing which can
1581  * happen is loosing events consumed at the same time
1582  * by a trace_pipe reader.
1583  * Other than that, we don't risk to crash the ring buffer
1584  * because it serializes the readers.
1585  *
1586  * The current tracer is copied to avoid a global locking
1587  * all around.
1588  */
1589 static void *s_start(struct seq_file *m, loff_t *pos)
1590 {
1591 	struct trace_iterator *iter = m->private;
1592 	static struct tracer *old_tracer;
1593 	int cpu_file = iter->cpu_file;
1594 	void *p = NULL;
1595 	loff_t l = 0;
1596 	int cpu;
1597 
1598 	/* copy the tracer to avoid using a global lock all around */
1599 	mutex_lock(&trace_types_lock);
1600 	if (unlikely(old_tracer != current_trace && current_trace)) {
1601 		old_tracer = current_trace;
1602 		*iter->trace = *current_trace;
1603 	}
1604 	mutex_unlock(&trace_types_lock);
1605 
1606 	atomic_inc(&trace_record_cmdline_disabled);
1607 
1608 	if (*pos != iter->pos) {
1609 		iter->ent = NULL;
1610 		iter->cpu = 0;
1611 		iter->idx = -1;
1612 
1613 		ftrace_disable_cpu();
1614 
1615 		if (cpu_file == TRACE_PIPE_ALL_CPU) {
1616 			for_each_tracing_cpu(cpu)
1617 				tracing_iter_reset(iter, cpu);
1618 		} else
1619 			tracing_iter_reset(iter, cpu_file);
1620 
1621 		ftrace_enable_cpu();
1622 
1623 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1624 			;
1625 
1626 	} else {
1627 		l = *pos - 1;
1628 		p = s_next(m, p, &l);
1629 	}
1630 
1631 	trace_event_read_lock();
1632 	return p;
1633 }
1634 
1635 static void s_stop(struct seq_file *m, void *p)
1636 {
1637 	atomic_dec(&trace_record_cmdline_disabled);
1638 	trace_event_read_unlock();
1639 }
1640 
1641 static void print_lat_help_header(struct seq_file *m)
1642 {
1643 	seq_puts(m, "#                  _------=> CPU#            \n");
1644 	seq_puts(m, "#                 / _-----=> irqs-off        \n");
1645 	seq_puts(m, "#                | / _----=> need-resched    \n");
1646 	seq_puts(m, "#                || / _---=> hardirq/softirq \n");
1647 	seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
1648 	seq_puts(m, "#                |||| /_--=> lock-depth       \n");
1649 	seq_puts(m, "#                |||||/     delay             \n");
1650 	seq_puts(m, "#  cmd     pid   |||||| time  |   caller      \n");
1651 	seq_puts(m, "#     \\   /      ||||||   \\   |   /           \n");
1652 }
1653 
1654 static void print_func_help_header(struct seq_file *m)
1655 {
1656 	seq_puts(m, "#           TASK-PID    CPU#    TIMESTAMP  FUNCTION\n");
1657 	seq_puts(m, "#              | |       |          |         |\n");
1658 }
1659 
1660 
1661 static void
1662 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1663 {
1664 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1665 	struct trace_array *tr = iter->tr;
1666 	struct trace_array_cpu *data = tr->data[tr->cpu];
1667 	struct tracer *type = current_trace;
1668 	unsigned long entries = 0;
1669 	unsigned long total = 0;
1670 	unsigned long count;
1671 	const char *name = "preemption";
1672 	int cpu;
1673 
1674 	if (type)
1675 		name = type->name;
1676 
1677 
1678 	for_each_tracing_cpu(cpu) {
1679 		count = ring_buffer_entries_cpu(tr->buffer, cpu);
1680 		/*
1681 		 * If this buffer has skipped entries, then we hold all
1682 		 * entries for the trace and we need to ignore the
1683 		 * ones before the time stamp.
1684 		 */
1685 		if (tr->data[cpu]->skipped_entries) {
1686 			count -= tr->data[cpu]->skipped_entries;
1687 			/* total is the same as the entries */
1688 			total += count;
1689 		} else
1690 			total += count +
1691 				ring_buffer_overrun_cpu(tr->buffer, cpu);
1692 		entries += count;
1693 	}
1694 
1695 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
1696 		   name, UTS_RELEASE);
1697 	seq_puts(m, "# -----------------------------------"
1698 		 "---------------------------------\n");
1699 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
1700 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1701 		   nsecs_to_usecs(data->saved_latency),
1702 		   entries,
1703 		   total,
1704 		   tr->cpu,
1705 #if defined(CONFIG_PREEMPT_NONE)
1706 		   "server",
1707 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
1708 		   "desktop",
1709 #elif defined(CONFIG_PREEMPT)
1710 		   "preempt",
1711 #else
1712 		   "unknown",
1713 #endif
1714 		   /* These are reserved for later use */
1715 		   0, 0, 0, 0);
1716 #ifdef CONFIG_SMP
1717 	seq_printf(m, " #P:%d)\n", num_online_cpus());
1718 #else
1719 	seq_puts(m, ")\n");
1720 #endif
1721 	seq_puts(m, "#    -----------------\n");
1722 	seq_printf(m, "#    | task: %.16s-%d "
1723 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1724 		   data->comm, data->pid, data->uid, data->nice,
1725 		   data->policy, data->rt_priority);
1726 	seq_puts(m, "#    -----------------\n");
1727 
1728 	if (data->critical_start) {
1729 		seq_puts(m, "#  => started at: ");
1730 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1731 		trace_print_seq(m, &iter->seq);
1732 		seq_puts(m, "\n#  => ended at:   ");
1733 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1734 		trace_print_seq(m, &iter->seq);
1735 		seq_puts(m, "\n#\n");
1736 	}
1737 
1738 	seq_puts(m, "#\n");
1739 }
1740 
1741 static void test_cpu_buff_start(struct trace_iterator *iter)
1742 {
1743 	struct trace_seq *s = &iter->seq;
1744 
1745 	if (!(trace_flags & TRACE_ITER_ANNOTATE))
1746 		return;
1747 
1748 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
1749 		return;
1750 
1751 	if (cpumask_test_cpu(iter->cpu, iter->started))
1752 		return;
1753 
1754 	if (iter->tr->data[iter->cpu]->skipped_entries)
1755 		return;
1756 
1757 	cpumask_set_cpu(iter->cpu, iter->started);
1758 
1759 	/* Don't print started cpu buffer for the first entry of the trace */
1760 	if (iter->idx > 1)
1761 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
1762 				iter->cpu);
1763 }
1764 
1765 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1766 {
1767 	struct trace_seq *s = &iter->seq;
1768 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1769 	struct trace_entry *entry;
1770 	struct trace_event *event;
1771 
1772 	entry = iter->ent;
1773 
1774 	test_cpu_buff_start(iter);
1775 
1776 	event = ftrace_find_event(entry->type);
1777 
1778 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1779 		if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1780 			if (!trace_print_lat_context(iter))
1781 				goto partial;
1782 		} else {
1783 			if (!trace_print_context(iter))
1784 				goto partial;
1785 		}
1786 	}
1787 
1788 	if (event)
1789 		return event->trace(iter, sym_flags);
1790 
1791 	if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1792 		goto partial;
1793 
1794 	return TRACE_TYPE_HANDLED;
1795 partial:
1796 	return TRACE_TYPE_PARTIAL_LINE;
1797 }
1798 
1799 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1800 {
1801 	struct trace_seq *s = &iter->seq;
1802 	struct trace_entry *entry;
1803 	struct trace_event *event;
1804 
1805 	entry = iter->ent;
1806 
1807 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1808 		if (!trace_seq_printf(s, "%d %d %llu ",
1809 				      entry->pid, iter->cpu, iter->ts))
1810 			goto partial;
1811 	}
1812 
1813 	event = ftrace_find_event(entry->type);
1814 	if (event)
1815 		return event->raw(iter, 0);
1816 
1817 	if (!trace_seq_printf(s, "%d ?\n", entry->type))
1818 		goto partial;
1819 
1820 	return TRACE_TYPE_HANDLED;
1821 partial:
1822 	return TRACE_TYPE_PARTIAL_LINE;
1823 }
1824 
1825 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1826 {
1827 	struct trace_seq *s = &iter->seq;
1828 	unsigned char newline = '\n';
1829 	struct trace_entry *entry;
1830 	struct trace_event *event;
1831 
1832 	entry = iter->ent;
1833 
1834 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1835 		SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1836 		SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1837 		SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1838 	}
1839 
1840 	event = ftrace_find_event(entry->type);
1841 	if (event) {
1842 		enum print_line_t ret = event->hex(iter, 0);
1843 		if (ret != TRACE_TYPE_HANDLED)
1844 			return ret;
1845 	}
1846 
1847 	SEQ_PUT_FIELD_RET(s, newline);
1848 
1849 	return TRACE_TYPE_HANDLED;
1850 }
1851 
1852 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1853 {
1854 	struct trace_seq *s = &iter->seq;
1855 	struct trace_entry *entry;
1856 	struct trace_event *event;
1857 
1858 	entry = iter->ent;
1859 
1860 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1861 		SEQ_PUT_FIELD_RET(s, entry->pid);
1862 		SEQ_PUT_FIELD_RET(s, iter->cpu);
1863 		SEQ_PUT_FIELD_RET(s, iter->ts);
1864 	}
1865 
1866 	event = ftrace_find_event(entry->type);
1867 	return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED;
1868 }
1869 
1870 static int trace_empty(struct trace_iterator *iter)
1871 {
1872 	int cpu;
1873 
1874 	/* If we are looking at one CPU buffer, only check that one */
1875 	if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
1876 		cpu = iter->cpu_file;
1877 		if (iter->buffer_iter[cpu]) {
1878 			if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1879 				return 0;
1880 		} else {
1881 			if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1882 				return 0;
1883 		}
1884 		return 1;
1885 	}
1886 
1887 	for_each_tracing_cpu(cpu) {
1888 		if (iter->buffer_iter[cpu]) {
1889 			if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1890 				return 0;
1891 		} else {
1892 			if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1893 				return 0;
1894 		}
1895 	}
1896 
1897 	return 1;
1898 }
1899 
1900 /*  Called with trace_event_read_lock() held. */
1901 static enum print_line_t print_trace_line(struct trace_iterator *iter)
1902 {
1903 	enum print_line_t ret;
1904 
1905 	if (iter->trace && iter->trace->print_line) {
1906 		ret = iter->trace->print_line(iter);
1907 		if (ret != TRACE_TYPE_UNHANDLED)
1908 			return ret;
1909 	}
1910 
1911 	if (iter->ent->type == TRACE_BPRINT &&
1912 			trace_flags & TRACE_ITER_PRINTK &&
1913 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
1914 		return trace_print_bprintk_msg_only(iter);
1915 
1916 	if (iter->ent->type == TRACE_PRINT &&
1917 			trace_flags & TRACE_ITER_PRINTK &&
1918 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
1919 		return trace_print_printk_msg_only(iter);
1920 
1921 	if (trace_flags & TRACE_ITER_BIN)
1922 		return print_bin_fmt(iter);
1923 
1924 	if (trace_flags & TRACE_ITER_HEX)
1925 		return print_hex_fmt(iter);
1926 
1927 	if (trace_flags & TRACE_ITER_RAW)
1928 		return print_raw_fmt(iter);
1929 
1930 	return print_trace_fmt(iter);
1931 }
1932 
1933 static int s_show(struct seq_file *m, void *v)
1934 {
1935 	struct trace_iterator *iter = v;
1936 
1937 	if (iter->ent == NULL) {
1938 		if (iter->tr) {
1939 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
1940 			seq_puts(m, "#\n");
1941 		}
1942 		if (iter->trace && iter->trace->print_header)
1943 			iter->trace->print_header(m);
1944 		else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1945 			/* print nothing if the buffers are empty */
1946 			if (trace_empty(iter))
1947 				return 0;
1948 			print_trace_header(m, iter);
1949 			if (!(trace_flags & TRACE_ITER_VERBOSE))
1950 				print_lat_help_header(m);
1951 		} else {
1952 			if (!(trace_flags & TRACE_ITER_VERBOSE))
1953 				print_func_help_header(m);
1954 		}
1955 	} else {
1956 		print_trace_line(iter);
1957 		trace_print_seq(m, &iter->seq);
1958 	}
1959 
1960 	return 0;
1961 }
1962 
1963 static struct seq_operations tracer_seq_ops = {
1964 	.start		= s_start,
1965 	.next		= s_next,
1966 	.stop		= s_stop,
1967 	.show		= s_show,
1968 };
1969 
1970 static struct trace_iterator *
1971 __tracing_open(struct inode *inode, struct file *file)
1972 {
1973 	long cpu_file = (long) inode->i_private;
1974 	void *fail_ret = ERR_PTR(-ENOMEM);
1975 	struct trace_iterator *iter;
1976 	struct seq_file *m;
1977 	int cpu, ret;
1978 
1979 	if (tracing_disabled)
1980 		return ERR_PTR(-ENODEV);
1981 
1982 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1983 	if (!iter)
1984 		return ERR_PTR(-ENOMEM);
1985 
1986 	/*
1987 	 * We make a copy of the current tracer to avoid concurrent
1988 	 * changes on it while we are reading.
1989 	 */
1990 	mutex_lock(&trace_types_lock);
1991 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
1992 	if (!iter->trace)
1993 		goto fail;
1994 
1995 	if (current_trace)
1996 		*iter->trace = *current_trace;
1997 
1998 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
1999 		goto fail;
2000 
2001 	cpumask_clear(iter->started);
2002 
2003 	if (current_trace && current_trace->print_max)
2004 		iter->tr = &max_tr;
2005 	else
2006 		iter->tr = &global_trace;
2007 	iter->pos = -1;
2008 	mutex_init(&iter->mutex);
2009 	iter->cpu_file = cpu_file;
2010 
2011 	/* Notify the tracer early; before we stop tracing. */
2012 	if (iter->trace && iter->trace->open)
2013 		iter->trace->open(iter);
2014 
2015 	/* Annotate start of buffers if we had overruns */
2016 	if (ring_buffer_overruns(iter->tr->buffer))
2017 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
2018 
2019 	/* stop the trace while dumping */
2020 	tracing_stop();
2021 
2022 	if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
2023 		for_each_tracing_cpu(cpu) {
2024 
2025 			iter->buffer_iter[cpu] =
2026 				ring_buffer_read_start(iter->tr->buffer, cpu);
2027 			tracing_iter_reset(iter, cpu);
2028 		}
2029 	} else {
2030 		cpu = iter->cpu_file;
2031 		iter->buffer_iter[cpu] =
2032 				ring_buffer_read_start(iter->tr->buffer, cpu);
2033 		tracing_iter_reset(iter, cpu);
2034 	}
2035 
2036 	ret = seq_open(file, &tracer_seq_ops);
2037 	if (ret < 0) {
2038 		fail_ret = ERR_PTR(ret);
2039 		goto fail_buffer;
2040 	}
2041 
2042 	m = file->private_data;
2043 	m->private = iter;
2044 
2045 	mutex_unlock(&trace_types_lock);
2046 
2047 	return iter;
2048 
2049  fail_buffer:
2050 	for_each_tracing_cpu(cpu) {
2051 		if (iter->buffer_iter[cpu])
2052 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
2053 	}
2054 	free_cpumask_var(iter->started);
2055 	tracing_start();
2056  fail:
2057 	mutex_unlock(&trace_types_lock);
2058 	kfree(iter->trace);
2059 	kfree(iter);
2060 
2061 	return fail_ret;
2062 }
2063 
2064 int tracing_open_generic(struct inode *inode, struct file *filp)
2065 {
2066 	if (tracing_disabled)
2067 		return -ENODEV;
2068 
2069 	filp->private_data = inode->i_private;
2070 	return 0;
2071 }
2072 
2073 static int tracing_release(struct inode *inode, struct file *file)
2074 {
2075 	struct seq_file *m = (struct seq_file *)file->private_data;
2076 	struct trace_iterator *iter;
2077 	int cpu;
2078 
2079 	if (!(file->f_mode & FMODE_READ))
2080 		return 0;
2081 
2082 	iter = m->private;
2083 
2084 	mutex_lock(&trace_types_lock);
2085 	for_each_tracing_cpu(cpu) {
2086 		if (iter->buffer_iter[cpu])
2087 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
2088 	}
2089 
2090 	if (iter->trace && iter->trace->close)
2091 		iter->trace->close(iter);
2092 
2093 	/* reenable tracing if it was previously enabled */
2094 	tracing_start();
2095 	mutex_unlock(&trace_types_lock);
2096 
2097 	seq_release(inode, file);
2098 	mutex_destroy(&iter->mutex);
2099 	free_cpumask_var(iter->started);
2100 	kfree(iter->trace);
2101 	kfree(iter);
2102 	return 0;
2103 }
2104 
2105 static int tracing_open(struct inode *inode, struct file *file)
2106 {
2107 	struct trace_iterator *iter;
2108 	int ret = 0;
2109 
2110 	/* If this file was open for write, then erase contents */
2111 	if ((file->f_mode & FMODE_WRITE) &&
2112 	    (file->f_flags & O_TRUNC)) {
2113 		long cpu = (long) inode->i_private;
2114 
2115 		if (cpu == TRACE_PIPE_ALL_CPU)
2116 			tracing_reset_online_cpus(&global_trace);
2117 		else
2118 			tracing_reset(&global_trace, cpu);
2119 	}
2120 
2121 	if (file->f_mode & FMODE_READ) {
2122 		iter = __tracing_open(inode, file);
2123 		if (IS_ERR(iter))
2124 			ret = PTR_ERR(iter);
2125 		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
2126 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
2127 	}
2128 	return ret;
2129 }
2130 
2131 static void *
2132 t_next(struct seq_file *m, void *v, loff_t *pos)
2133 {
2134 	struct tracer *t = v;
2135 
2136 	(*pos)++;
2137 
2138 	if (t)
2139 		t = t->next;
2140 
2141 	return t;
2142 }
2143 
2144 static void *t_start(struct seq_file *m, loff_t *pos)
2145 {
2146 	struct tracer *t;
2147 	loff_t l = 0;
2148 
2149 	mutex_lock(&trace_types_lock);
2150 	for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
2151 		;
2152 
2153 	return t;
2154 }
2155 
2156 static void t_stop(struct seq_file *m, void *p)
2157 {
2158 	mutex_unlock(&trace_types_lock);
2159 }
2160 
2161 static int t_show(struct seq_file *m, void *v)
2162 {
2163 	struct tracer *t = v;
2164 
2165 	if (!t)
2166 		return 0;
2167 
2168 	seq_printf(m, "%s", t->name);
2169 	if (t->next)
2170 		seq_putc(m, ' ');
2171 	else
2172 		seq_putc(m, '\n');
2173 
2174 	return 0;
2175 }
2176 
2177 static struct seq_operations show_traces_seq_ops = {
2178 	.start		= t_start,
2179 	.next		= t_next,
2180 	.stop		= t_stop,
2181 	.show		= t_show,
2182 };
2183 
2184 static int show_traces_open(struct inode *inode, struct file *file)
2185 {
2186 	if (tracing_disabled)
2187 		return -ENODEV;
2188 
2189 	return seq_open(file, &show_traces_seq_ops);
2190 }
2191 
2192 static ssize_t
2193 tracing_write_stub(struct file *filp, const char __user *ubuf,
2194 		   size_t count, loff_t *ppos)
2195 {
2196 	return count;
2197 }
2198 
2199 static const struct file_operations tracing_fops = {
2200 	.open		= tracing_open,
2201 	.read		= seq_read,
2202 	.write		= tracing_write_stub,
2203 	.llseek		= seq_lseek,
2204 	.release	= tracing_release,
2205 };
2206 
2207 static const struct file_operations show_traces_fops = {
2208 	.open		= show_traces_open,
2209 	.read		= seq_read,
2210 	.release	= seq_release,
2211 };
2212 
2213 /*
2214  * Only trace on a CPU if the bitmask is set:
2215  */
2216 static cpumask_var_t tracing_cpumask;
2217 
2218 /*
2219  * The tracer itself will not take this lock, but still we want
2220  * to provide a consistent cpumask to user-space:
2221  */
2222 static DEFINE_MUTEX(tracing_cpumask_update_lock);
2223 
2224 /*
2225  * Temporary storage for the character representation of the
2226  * CPU bitmask (and one more byte for the newline):
2227  */
2228 static char mask_str[NR_CPUS + 1];
2229 
2230 static ssize_t
2231 tracing_cpumask_read(struct file *filp, char __user *ubuf,
2232 		     size_t count, loff_t *ppos)
2233 {
2234 	int len;
2235 
2236 	mutex_lock(&tracing_cpumask_update_lock);
2237 
2238 	len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2239 	if (count - len < 2) {
2240 		count = -EINVAL;
2241 		goto out_err;
2242 	}
2243 	len += sprintf(mask_str + len, "\n");
2244 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2245 
2246 out_err:
2247 	mutex_unlock(&tracing_cpumask_update_lock);
2248 
2249 	return count;
2250 }
2251 
2252 static ssize_t
2253 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2254 		      size_t count, loff_t *ppos)
2255 {
2256 	int err, cpu;
2257 	cpumask_var_t tracing_cpumask_new;
2258 
2259 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
2260 		return -ENOMEM;
2261 
2262 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2263 	if (err)
2264 		goto err_unlock;
2265 
2266 	mutex_lock(&tracing_cpumask_update_lock);
2267 
2268 	local_irq_disable();
2269 	__raw_spin_lock(&ftrace_max_lock);
2270 	for_each_tracing_cpu(cpu) {
2271 		/*
2272 		 * Increase/decrease the disabled counter if we are
2273 		 * about to flip a bit in the cpumask:
2274 		 */
2275 		if (cpumask_test_cpu(cpu, tracing_cpumask) &&
2276 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2277 			atomic_inc(&global_trace.data[cpu]->disabled);
2278 		}
2279 		if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
2280 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2281 			atomic_dec(&global_trace.data[cpu]->disabled);
2282 		}
2283 	}
2284 	__raw_spin_unlock(&ftrace_max_lock);
2285 	local_irq_enable();
2286 
2287 	cpumask_copy(tracing_cpumask, tracing_cpumask_new);
2288 
2289 	mutex_unlock(&tracing_cpumask_update_lock);
2290 	free_cpumask_var(tracing_cpumask_new);
2291 
2292 	return count;
2293 
2294 err_unlock:
2295 	free_cpumask_var(tracing_cpumask_new);
2296 
2297 	return err;
2298 }
2299 
2300 static const struct file_operations tracing_cpumask_fops = {
2301 	.open		= tracing_open_generic,
2302 	.read		= tracing_cpumask_read,
2303 	.write		= tracing_cpumask_write,
2304 };
2305 
2306 static ssize_t
2307 tracing_trace_options_read(struct file *filp, char __user *ubuf,
2308 		       size_t cnt, loff_t *ppos)
2309 {
2310 	struct tracer_opt *trace_opts;
2311 	u32 tracer_flags;
2312 	int len = 0;
2313 	char *buf;
2314 	int r = 0;
2315 	int i;
2316 
2317 
2318 	/* calculate max size */
2319 	for (i = 0; trace_options[i]; i++) {
2320 		len += strlen(trace_options[i]);
2321 		len += 3; /* "no" and newline */
2322 	}
2323 
2324 	mutex_lock(&trace_types_lock);
2325 	tracer_flags = current_trace->flags->val;
2326 	trace_opts = current_trace->flags->opts;
2327 
2328 	/*
2329 	 * Increase the size with names of options specific
2330 	 * of the current tracer.
2331 	 */
2332 	for (i = 0; trace_opts[i].name; i++) {
2333 		len += strlen(trace_opts[i].name);
2334 		len += 3; /* "no" and newline */
2335 	}
2336 
2337 	/* +1 for \0 */
2338 	buf = kmalloc(len + 1, GFP_KERNEL);
2339 	if (!buf) {
2340 		mutex_unlock(&trace_types_lock);
2341 		return -ENOMEM;
2342 	}
2343 
2344 	for (i = 0; trace_options[i]; i++) {
2345 		if (trace_flags & (1 << i))
2346 			r += sprintf(buf + r, "%s\n", trace_options[i]);
2347 		else
2348 			r += sprintf(buf + r, "no%s\n", trace_options[i]);
2349 	}
2350 
2351 	for (i = 0; trace_opts[i].name; i++) {
2352 		if (tracer_flags & trace_opts[i].bit)
2353 			r += sprintf(buf + r, "%s\n",
2354 				trace_opts[i].name);
2355 		else
2356 			r += sprintf(buf + r, "no%s\n",
2357 				trace_opts[i].name);
2358 	}
2359 	mutex_unlock(&trace_types_lock);
2360 
2361 	WARN_ON(r >= len + 1);
2362 
2363 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2364 
2365 	kfree(buf);
2366 	return r;
2367 }
2368 
2369 /* Try to assign a tracer specific option */
2370 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2371 {
2372 	struct tracer_flags *tracer_flags = trace->flags;
2373 	struct tracer_opt *opts = NULL;
2374 	int ret = 0, i = 0;
2375 	int len;
2376 
2377 	for (i = 0; tracer_flags->opts[i].name; i++) {
2378 		opts = &tracer_flags->opts[i];
2379 		len = strlen(opts->name);
2380 
2381 		if (strncmp(cmp, opts->name, len) == 0) {
2382 			ret = trace->set_flag(tracer_flags->val,
2383 				opts->bit, !neg);
2384 			break;
2385 		}
2386 	}
2387 	/* Not found */
2388 	if (!tracer_flags->opts[i].name)
2389 		return -EINVAL;
2390 
2391 	/* Refused to handle */
2392 	if (ret)
2393 		return ret;
2394 
2395 	if (neg)
2396 		tracer_flags->val &= ~opts->bit;
2397 	else
2398 		tracer_flags->val |= opts->bit;
2399 
2400 	return 0;
2401 }
2402 
2403 static void set_tracer_flags(unsigned int mask, int enabled)
2404 {
2405 	/* do nothing if flag is already set */
2406 	if (!!(trace_flags & mask) == !!enabled)
2407 		return;
2408 
2409 	if (enabled)
2410 		trace_flags |= mask;
2411 	else
2412 		trace_flags &= ~mask;
2413 }
2414 
2415 static ssize_t
2416 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2417 			size_t cnt, loff_t *ppos)
2418 {
2419 	char buf[64];
2420 	char *cmp = buf;
2421 	int neg = 0;
2422 	int ret;
2423 	int i;
2424 
2425 	if (cnt >= sizeof(buf))
2426 		return -EINVAL;
2427 
2428 	if (copy_from_user(&buf, ubuf, cnt))
2429 		return -EFAULT;
2430 
2431 	buf[cnt] = 0;
2432 
2433 	if (strncmp(buf, "no", 2) == 0) {
2434 		neg = 1;
2435 		cmp += 2;
2436 	}
2437 
2438 	for (i = 0; trace_options[i]; i++) {
2439 		int len = strlen(trace_options[i]);
2440 
2441 		if (strncmp(cmp, trace_options[i], len) == 0) {
2442 			set_tracer_flags(1 << i, !neg);
2443 			break;
2444 		}
2445 	}
2446 
2447 	/* If no option could be set, test the specific tracer options */
2448 	if (!trace_options[i]) {
2449 		mutex_lock(&trace_types_lock);
2450 		ret = set_tracer_option(current_trace, cmp, neg);
2451 		mutex_unlock(&trace_types_lock);
2452 		if (ret)
2453 			return ret;
2454 	}
2455 
2456 	filp->f_pos += cnt;
2457 
2458 	return cnt;
2459 }
2460 
2461 static const struct file_operations tracing_iter_fops = {
2462 	.open		= tracing_open_generic,
2463 	.read		= tracing_trace_options_read,
2464 	.write		= tracing_trace_options_write,
2465 };
2466 
2467 static const char readme_msg[] =
2468 	"tracing mini-HOWTO:\n\n"
2469 	"# mount -t debugfs nodev /sys/kernel/debug\n\n"
2470 	"# cat /sys/kernel/debug/tracing/available_tracers\n"
2471 	"wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
2472 	"# cat /sys/kernel/debug/tracing/current_tracer\n"
2473 	"nop\n"
2474 	"# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n"
2475 	"# cat /sys/kernel/debug/tracing/current_tracer\n"
2476 	"sched_switch\n"
2477 	"# cat /sys/kernel/debug/tracing/trace_options\n"
2478 	"noprint-parent nosym-offset nosym-addr noverbose\n"
2479 	"# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
2480 	"# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n"
2481 	"# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
2482 	"# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n"
2483 ;
2484 
2485 static ssize_t
2486 tracing_readme_read(struct file *filp, char __user *ubuf,
2487 		       size_t cnt, loff_t *ppos)
2488 {
2489 	return simple_read_from_buffer(ubuf, cnt, ppos,
2490 					readme_msg, strlen(readme_msg));
2491 }
2492 
2493 static const struct file_operations tracing_readme_fops = {
2494 	.open		= tracing_open_generic,
2495 	.read		= tracing_readme_read,
2496 };
2497 
2498 static ssize_t
2499 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
2500 				size_t cnt, loff_t *ppos)
2501 {
2502 	char *buf_comm;
2503 	char *file_buf;
2504 	char *buf;
2505 	int len = 0;
2506 	int pid;
2507 	int i;
2508 
2509 	file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
2510 	if (!file_buf)
2511 		return -ENOMEM;
2512 
2513 	buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
2514 	if (!buf_comm) {
2515 		kfree(file_buf);
2516 		return -ENOMEM;
2517 	}
2518 
2519 	buf = file_buf;
2520 
2521 	for (i = 0; i < SAVED_CMDLINES; i++) {
2522 		int r;
2523 
2524 		pid = map_cmdline_to_pid[i];
2525 		if (pid == -1 || pid == NO_CMDLINE_MAP)
2526 			continue;
2527 
2528 		trace_find_cmdline(pid, buf_comm);
2529 		r = sprintf(buf, "%d %s\n", pid, buf_comm);
2530 		buf += r;
2531 		len += r;
2532 	}
2533 
2534 	len = simple_read_from_buffer(ubuf, cnt, ppos,
2535 				      file_buf, len);
2536 
2537 	kfree(file_buf);
2538 	kfree(buf_comm);
2539 
2540 	return len;
2541 }
2542 
2543 static const struct file_operations tracing_saved_cmdlines_fops = {
2544     .open       = tracing_open_generic,
2545     .read       = tracing_saved_cmdlines_read,
2546 };
2547 
2548 static ssize_t
2549 tracing_ctrl_read(struct file *filp, char __user *ubuf,
2550 		  size_t cnt, loff_t *ppos)
2551 {
2552 	char buf[64];
2553 	int r;
2554 
2555 	r = sprintf(buf, "%u\n", tracer_enabled);
2556 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2557 }
2558 
2559 static ssize_t
2560 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2561 		   size_t cnt, loff_t *ppos)
2562 {
2563 	struct trace_array *tr = filp->private_data;
2564 	char buf[64];
2565 	unsigned long val;
2566 	int ret;
2567 
2568 	if (cnt >= sizeof(buf))
2569 		return -EINVAL;
2570 
2571 	if (copy_from_user(&buf, ubuf, cnt))
2572 		return -EFAULT;
2573 
2574 	buf[cnt] = 0;
2575 
2576 	ret = strict_strtoul(buf, 10, &val);
2577 	if (ret < 0)
2578 		return ret;
2579 
2580 	val = !!val;
2581 
2582 	mutex_lock(&trace_types_lock);
2583 	if (tracer_enabled ^ val) {
2584 		if (val) {
2585 			tracer_enabled = 1;
2586 			if (current_trace->start)
2587 				current_trace->start(tr);
2588 			tracing_start();
2589 		} else {
2590 			tracer_enabled = 0;
2591 			tracing_stop();
2592 			if (current_trace->stop)
2593 				current_trace->stop(tr);
2594 		}
2595 	}
2596 	mutex_unlock(&trace_types_lock);
2597 
2598 	filp->f_pos += cnt;
2599 
2600 	return cnt;
2601 }
2602 
2603 static ssize_t
2604 tracing_set_trace_read(struct file *filp, char __user *ubuf,
2605 		       size_t cnt, loff_t *ppos)
2606 {
2607 	char buf[max_tracer_type_len+2];
2608 	int r;
2609 
2610 	mutex_lock(&trace_types_lock);
2611 	if (current_trace)
2612 		r = sprintf(buf, "%s\n", current_trace->name);
2613 	else
2614 		r = sprintf(buf, "\n");
2615 	mutex_unlock(&trace_types_lock);
2616 
2617 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2618 }
2619 
2620 int tracer_init(struct tracer *t, struct trace_array *tr)
2621 {
2622 	tracing_reset_online_cpus(tr);
2623 	return t->init(tr);
2624 }
2625 
2626 static int tracing_resize_ring_buffer(unsigned long size)
2627 {
2628 	int ret;
2629 
2630 	/*
2631 	 * If kernel or user changes the size of the ring buffer
2632 	 * we use the size that was given, and we can forget about
2633 	 * expanding it later.
2634 	 */
2635 	ring_buffer_expanded = 1;
2636 
2637 	ret = ring_buffer_resize(global_trace.buffer, size);
2638 	if (ret < 0)
2639 		return ret;
2640 
2641 	ret = ring_buffer_resize(max_tr.buffer, size);
2642 	if (ret < 0) {
2643 		int r;
2644 
2645 		r = ring_buffer_resize(global_trace.buffer,
2646 				       global_trace.entries);
2647 		if (r < 0) {
2648 			/*
2649 			 * AARGH! We are left with different
2650 			 * size max buffer!!!!
2651 			 * The max buffer is our "snapshot" buffer.
2652 			 * When a tracer needs a snapshot (one of the
2653 			 * latency tracers), it swaps the max buffer
2654 			 * with the saved snap shot. We succeeded to
2655 			 * update the size of the main buffer, but failed to
2656 			 * update the size of the max buffer. But when we tried
2657 			 * to reset the main buffer to the original size, we
2658 			 * failed there too. This is very unlikely to
2659 			 * happen, but if it does, warn and kill all
2660 			 * tracing.
2661 			 */
2662 			WARN_ON(1);
2663 			tracing_disabled = 1;
2664 		}
2665 		return ret;
2666 	}
2667 
2668 	global_trace.entries = size;
2669 
2670 	return ret;
2671 }
2672 
2673 /**
2674  * tracing_update_buffers - used by tracing facility to expand ring buffers
2675  *
2676  * To save on memory when the tracing is never used on a system with it
2677  * configured in. The ring buffers are set to a minimum size. But once
2678  * a user starts to use the tracing facility, then they need to grow
2679  * to their default size.
2680  *
2681  * This function is to be called when a tracer is about to be used.
2682  */
2683 int tracing_update_buffers(void)
2684 {
2685 	int ret = 0;
2686 
2687 	mutex_lock(&trace_types_lock);
2688 	if (!ring_buffer_expanded)
2689 		ret = tracing_resize_ring_buffer(trace_buf_size);
2690 	mutex_unlock(&trace_types_lock);
2691 
2692 	return ret;
2693 }
2694 
2695 struct trace_option_dentry;
2696 
2697 static struct trace_option_dentry *
2698 create_trace_option_files(struct tracer *tracer);
2699 
2700 static void
2701 destroy_trace_option_files(struct trace_option_dentry *topts);
2702 
2703 static int tracing_set_tracer(const char *buf)
2704 {
2705 	static struct trace_option_dentry *topts;
2706 	struct trace_array *tr = &global_trace;
2707 	struct tracer *t;
2708 	int ret = 0;
2709 
2710 	mutex_lock(&trace_types_lock);
2711 
2712 	if (!ring_buffer_expanded) {
2713 		ret = tracing_resize_ring_buffer(trace_buf_size);
2714 		if (ret < 0)
2715 			goto out;
2716 		ret = 0;
2717 	}
2718 
2719 	for (t = trace_types; t; t = t->next) {
2720 		if (strcmp(t->name, buf) == 0)
2721 			break;
2722 	}
2723 	if (!t) {
2724 		ret = -EINVAL;
2725 		goto out;
2726 	}
2727 	if (t == current_trace)
2728 		goto out;
2729 
2730 	trace_branch_disable();
2731 	if (current_trace && current_trace->reset)
2732 		current_trace->reset(tr);
2733 
2734 	destroy_trace_option_files(topts);
2735 
2736 	current_trace = t;
2737 
2738 	topts = create_trace_option_files(current_trace);
2739 
2740 	if (t->init) {
2741 		ret = tracer_init(t, tr);
2742 		if (ret)
2743 			goto out;
2744 	}
2745 
2746 	trace_branch_enable(tr);
2747  out:
2748 	mutex_unlock(&trace_types_lock);
2749 
2750 	return ret;
2751 }
2752 
2753 static ssize_t
2754 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2755 			size_t cnt, loff_t *ppos)
2756 {
2757 	char buf[max_tracer_type_len+1];
2758 	int i;
2759 	size_t ret;
2760 	int err;
2761 
2762 	ret = cnt;
2763 
2764 	if (cnt > max_tracer_type_len)
2765 		cnt = max_tracer_type_len;
2766 
2767 	if (copy_from_user(&buf, ubuf, cnt))
2768 		return -EFAULT;
2769 
2770 	buf[cnt] = 0;
2771 
2772 	/* strip ending whitespace. */
2773 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2774 		buf[i] = 0;
2775 
2776 	err = tracing_set_tracer(buf);
2777 	if (err)
2778 		return err;
2779 
2780 	filp->f_pos += ret;
2781 
2782 	return ret;
2783 }
2784 
2785 static ssize_t
2786 tracing_max_lat_read(struct file *filp, char __user *ubuf,
2787 		     size_t cnt, loff_t *ppos)
2788 {
2789 	unsigned long *ptr = filp->private_data;
2790 	char buf[64];
2791 	int r;
2792 
2793 	r = snprintf(buf, sizeof(buf), "%ld\n",
2794 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2795 	if (r > sizeof(buf))
2796 		r = sizeof(buf);
2797 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2798 }
2799 
2800 static ssize_t
2801 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2802 		      size_t cnt, loff_t *ppos)
2803 {
2804 	unsigned long *ptr = filp->private_data;
2805 	char buf[64];
2806 	unsigned long val;
2807 	int ret;
2808 
2809 	if (cnt >= sizeof(buf))
2810 		return -EINVAL;
2811 
2812 	if (copy_from_user(&buf, ubuf, cnt))
2813 		return -EFAULT;
2814 
2815 	buf[cnt] = 0;
2816 
2817 	ret = strict_strtoul(buf, 10, &val);
2818 	if (ret < 0)
2819 		return ret;
2820 
2821 	*ptr = val * 1000;
2822 
2823 	return cnt;
2824 }
2825 
2826 static int tracing_open_pipe(struct inode *inode, struct file *filp)
2827 {
2828 	long cpu_file = (long) inode->i_private;
2829 	struct trace_iterator *iter;
2830 	int ret = 0;
2831 
2832 	if (tracing_disabled)
2833 		return -ENODEV;
2834 
2835 	mutex_lock(&trace_types_lock);
2836 
2837 	/* We only allow one reader per cpu */
2838 	if (cpu_file == TRACE_PIPE_ALL_CPU) {
2839 		if (!cpumask_empty(tracing_reader_cpumask)) {
2840 			ret = -EBUSY;
2841 			goto out;
2842 		}
2843 		cpumask_setall(tracing_reader_cpumask);
2844 	} else {
2845 		if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2846 			cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2847 		else {
2848 			ret = -EBUSY;
2849 			goto out;
2850 		}
2851 	}
2852 
2853 	/* create a buffer to store the information to pass to userspace */
2854 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2855 	if (!iter) {
2856 		ret = -ENOMEM;
2857 		goto out;
2858 	}
2859 
2860 	/*
2861 	 * We make a copy of the current tracer to avoid concurrent
2862 	 * changes on it while we are reading.
2863 	 */
2864 	iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
2865 	if (!iter->trace) {
2866 		ret = -ENOMEM;
2867 		goto fail;
2868 	}
2869 	if (current_trace)
2870 		*iter->trace = *current_trace;
2871 
2872 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
2873 		ret = -ENOMEM;
2874 		goto fail;
2875 	}
2876 
2877 	/* trace pipe does not show start of buffer */
2878 	cpumask_setall(iter->started);
2879 
2880 	if (trace_flags & TRACE_ITER_LATENCY_FMT)
2881 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
2882 
2883 	iter->cpu_file = cpu_file;
2884 	iter->tr = &global_trace;
2885 	mutex_init(&iter->mutex);
2886 	filp->private_data = iter;
2887 
2888 	if (iter->trace->pipe_open)
2889 		iter->trace->pipe_open(iter);
2890 
2891 out:
2892 	mutex_unlock(&trace_types_lock);
2893 	return ret;
2894 
2895 fail:
2896 	kfree(iter->trace);
2897 	kfree(iter);
2898 	mutex_unlock(&trace_types_lock);
2899 	return ret;
2900 }
2901 
2902 static int tracing_release_pipe(struct inode *inode, struct file *file)
2903 {
2904 	struct trace_iterator *iter = file->private_data;
2905 
2906 	mutex_lock(&trace_types_lock);
2907 
2908 	if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
2909 		cpumask_clear(tracing_reader_cpumask);
2910 	else
2911 		cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2912 
2913 	mutex_unlock(&trace_types_lock);
2914 
2915 	free_cpumask_var(iter->started);
2916 	mutex_destroy(&iter->mutex);
2917 	kfree(iter->trace);
2918 	kfree(iter);
2919 
2920 	return 0;
2921 }
2922 
2923 static unsigned int
2924 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2925 {
2926 	struct trace_iterator *iter = filp->private_data;
2927 
2928 	if (trace_flags & TRACE_ITER_BLOCK) {
2929 		/*
2930 		 * Always select as readable when in blocking mode
2931 		 */
2932 		return POLLIN | POLLRDNORM;
2933 	} else {
2934 		if (!trace_empty(iter))
2935 			return POLLIN | POLLRDNORM;
2936 		poll_wait(filp, &trace_wait, poll_table);
2937 		if (!trace_empty(iter))
2938 			return POLLIN | POLLRDNORM;
2939 
2940 		return 0;
2941 	}
2942 }
2943 
2944 
2945 void default_wait_pipe(struct trace_iterator *iter)
2946 {
2947 	DEFINE_WAIT(wait);
2948 
2949 	prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
2950 
2951 	if (trace_empty(iter))
2952 		schedule();
2953 
2954 	finish_wait(&trace_wait, &wait);
2955 }
2956 
2957 /*
2958  * This is a make-shift waitqueue.
2959  * A tracer might use this callback on some rare cases:
2960  *
2961  *  1) the current tracer might hold the runqueue lock when it wakes up
2962  *     a reader, hence a deadlock (sched, function, and function graph tracers)
2963  *  2) the function tracers, trace all functions, we don't want
2964  *     the overhead of calling wake_up and friends
2965  *     (and tracing them too)
2966  *
2967  *     Anyway, this is really very primitive wakeup.
2968  */
2969 void poll_wait_pipe(struct trace_iterator *iter)
2970 {
2971 	set_current_state(TASK_INTERRUPTIBLE);
2972 	/* sleep for 100 msecs, and try again. */
2973 	schedule_timeout(HZ / 10);
2974 }
2975 
2976 /* Must be called with trace_types_lock mutex held. */
2977 static int tracing_wait_pipe(struct file *filp)
2978 {
2979 	struct trace_iterator *iter = filp->private_data;
2980 
2981 	while (trace_empty(iter)) {
2982 
2983 		if ((filp->f_flags & O_NONBLOCK)) {
2984 			return -EAGAIN;
2985 		}
2986 
2987 		mutex_unlock(&iter->mutex);
2988 
2989 		iter->trace->wait_pipe(iter);
2990 
2991 		mutex_lock(&iter->mutex);
2992 
2993 		if (signal_pending(current))
2994 			return -EINTR;
2995 
2996 		/*
2997 		 * We block until we read something and tracing is disabled.
2998 		 * We still block if tracing is disabled, but we have never
2999 		 * read anything. This allows a user to cat this file, and
3000 		 * then enable tracing. But after we have read something,
3001 		 * we give an EOF when tracing is again disabled.
3002 		 *
3003 		 * iter->pos will be 0 if we haven't read anything.
3004 		 */
3005 		if (!tracer_enabled && iter->pos)
3006 			break;
3007 	}
3008 
3009 	return 1;
3010 }
3011 
3012 /*
3013  * Consumer reader.
3014  */
3015 static ssize_t
3016 tracing_read_pipe(struct file *filp, char __user *ubuf,
3017 		  size_t cnt, loff_t *ppos)
3018 {
3019 	struct trace_iterator *iter = filp->private_data;
3020 	static struct tracer *old_tracer;
3021 	ssize_t sret;
3022 
3023 	/* return any leftover data */
3024 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3025 	if (sret != -EBUSY)
3026 		return sret;
3027 
3028 	trace_seq_init(&iter->seq);
3029 
3030 	/* copy the tracer to avoid using a global lock all around */
3031 	mutex_lock(&trace_types_lock);
3032 	if (unlikely(old_tracer != current_trace && current_trace)) {
3033 		old_tracer = current_trace;
3034 		*iter->trace = *current_trace;
3035 	}
3036 	mutex_unlock(&trace_types_lock);
3037 
3038 	/*
3039 	 * Avoid more than one consumer on a single file descriptor
3040 	 * This is just a matter of traces coherency, the ring buffer itself
3041 	 * is protected.
3042 	 */
3043 	mutex_lock(&iter->mutex);
3044 	if (iter->trace->read) {
3045 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
3046 		if (sret)
3047 			goto out;
3048 	}
3049 
3050 waitagain:
3051 	sret = tracing_wait_pipe(filp);
3052 	if (sret <= 0)
3053 		goto out;
3054 
3055 	/* stop when tracing is finished */
3056 	if (trace_empty(iter)) {
3057 		sret = 0;
3058 		goto out;
3059 	}
3060 
3061 	if (cnt >= PAGE_SIZE)
3062 		cnt = PAGE_SIZE - 1;
3063 
3064 	/* reset all but tr, trace, and overruns */
3065 	memset(&iter->seq, 0,
3066 	       sizeof(struct trace_iterator) -
3067 	       offsetof(struct trace_iterator, seq));
3068 	iter->pos = -1;
3069 
3070 	trace_event_read_lock();
3071 	while (find_next_entry_inc(iter) != NULL) {
3072 		enum print_line_t ret;
3073 		int len = iter->seq.len;
3074 
3075 		ret = print_trace_line(iter);
3076 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
3077 			/* don't print partial lines */
3078 			iter->seq.len = len;
3079 			break;
3080 		}
3081 		if (ret != TRACE_TYPE_NO_CONSUME)
3082 			trace_consume(iter);
3083 
3084 		if (iter->seq.len >= cnt)
3085 			break;
3086 	}
3087 	trace_event_read_unlock();
3088 
3089 	/* Now copy what we have to the user */
3090 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3091 	if (iter->seq.readpos >= iter->seq.len)
3092 		trace_seq_init(&iter->seq);
3093 
3094 	/*
3095 	 * If there was nothing to send to user, inspite of consuming trace
3096 	 * entries, go back to wait for more entries.
3097 	 */
3098 	if (sret == -EBUSY)
3099 		goto waitagain;
3100 
3101 out:
3102 	mutex_unlock(&iter->mutex);
3103 
3104 	return sret;
3105 }
3106 
3107 static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
3108 				     struct pipe_buffer *buf)
3109 {
3110 	__free_page(buf->page);
3111 }
3112 
3113 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3114 				     unsigned int idx)
3115 {
3116 	__free_page(spd->pages[idx]);
3117 }
3118 
3119 static struct pipe_buf_operations tracing_pipe_buf_ops = {
3120 	.can_merge		= 0,
3121 	.map			= generic_pipe_buf_map,
3122 	.unmap			= generic_pipe_buf_unmap,
3123 	.confirm		= generic_pipe_buf_confirm,
3124 	.release		= tracing_pipe_buf_release,
3125 	.steal			= generic_pipe_buf_steal,
3126 	.get			= generic_pipe_buf_get,
3127 };
3128 
3129 static size_t
3130 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
3131 {
3132 	size_t count;
3133 	int ret;
3134 
3135 	/* Seq buffer is page-sized, exactly what we need. */
3136 	for (;;) {
3137 		count = iter->seq.len;
3138 		ret = print_trace_line(iter);
3139 		count = iter->seq.len - count;
3140 		if (rem < count) {
3141 			rem = 0;
3142 			iter->seq.len -= count;
3143 			break;
3144 		}
3145 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
3146 			iter->seq.len -= count;
3147 			break;
3148 		}
3149 
3150 		if (ret != TRACE_TYPE_NO_CONSUME)
3151 			trace_consume(iter);
3152 		rem -= count;
3153 		if (!find_next_entry_inc(iter))	{
3154 			rem = 0;
3155 			iter->ent = NULL;
3156 			break;
3157 		}
3158 	}
3159 
3160 	return rem;
3161 }
3162 
3163 static ssize_t tracing_splice_read_pipe(struct file *filp,
3164 					loff_t *ppos,
3165 					struct pipe_inode_info *pipe,
3166 					size_t len,
3167 					unsigned int flags)
3168 {
3169 	struct page *pages[PIPE_BUFFERS];
3170 	struct partial_page partial[PIPE_BUFFERS];
3171 	struct trace_iterator *iter = filp->private_data;
3172 	struct splice_pipe_desc spd = {
3173 		.pages		= pages,
3174 		.partial	= partial,
3175 		.nr_pages	= 0, /* This gets updated below. */
3176 		.flags		= flags,
3177 		.ops		= &tracing_pipe_buf_ops,
3178 		.spd_release	= tracing_spd_release_pipe,
3179 	};
3180 	static struct tracer *old_tracer;
3181 	ssize_t ret;
3182 	size_t rem;
3183 	unsigned int i;
3184 
3185 	/* copy the tracer to avoid using a global lock all around */
3186 	mutex_lock(&trace_types_lock);
3187 	if (unlikely(old_tracer != current_trace && current_trace)) {
3188 		old_tracer = current_trace;
3189 		*iter->trace = *current_trace;
3190 	}
3191 	mutex_unlock(&trace_types_lock);
3192 
3193 	mutex_lock(&iter->mutex);
3194 
3195 	if (iter->trace->splice_read) {
3196 		ret = iter->trace->splice_read(iter, filp,
3197 					       ppos, pipe, len, flags);
3198 		if (ret)
3199 			goto out_err;
3200 	}
3201 
3202 	ret = tracing_wait_pipe(filp);
3203 	if (ret <= 0)
3204 		goto out_err;
3205 
3206 	if (!iter->ent && !find_next_entry_inc(iter)) {
3207 		ret = -EFAULT;
3208 		goto out_err;
3209 	}
3210 
3211 	trace_event_read_lock();
3212 
3213 	/* Fill as many pages as possible. */
3214 	for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
3215 		pages[i] = alloc_page(GFP_KERNEL);
3216 		if (!pages[i])
3217 			break;
3218 
3219 		rem = tracing_fill_pipe_page(rem, iter);
3220 
3221 		/* Copy the data into the page, so we can start over. */
3222 		ret = trace_seq_to_buffer(&iter->seq,
3223 					  page_address(pages[i]),
3224 					  iter->seq.len);
3225 		if (ret < 0) {
3226 			__free_page(pages[i]);
3227 			break;
3228 		}
3229 		partial[i].offset = 0;
3230 		partial[i].len = iter->seq.len;
3231 
3232 		trace_seq_init(&iter->seq);
3233 	}
3234 
3235 	trace_event_read_unlock();
3236 	mutex_unlock(&iter->mutex);
3237 
3238 	spd.nr_pages = i;
3239 
3240 	return splice_to_pipe(pipe, &spd);
3241 
3242 out_err:
3243 	mutex_unlock(&iter->mutex);
3244 
3245 	return ret;
3246 }
3247 
3248 static ssize_t
3249 tracing_entries_read(struct file *filp, char __user *ubuf,
3250 		     size_t cnt, loff_t *ppos)
3251 {
3252 	struct trace_array *tr = filp->private_data;
3253 	char buf[96];
3254 	int r;
3255 
3256 	mutex_lock(&trace_types_lock);
3257 	if (!ring_buffer_expanded)
3258 		r = sprintf(buf, "%lu (expanded: %lu)\n",
3259 			    tr->entries >> 10,
3260 			    trace_buf_size >> 10);
3261 	else
3262 		r = sprintf(buf, "%lu\n", tr->entries >> 10);
3263 	mutex_unlock(&trace_types_lock);
3264 
3265 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3266 }
3267 
3268 static ssize_t
3269 tracing_entries_write(struct file *filp, const char __user *ubuf,
3270 		      size_t cnt, loff_t *ppos)
3271 {
3272 	unsigned long val;
3273 	char buf[64];
3274 	int ret, cpu;
3275 
3276 	if (cnt >= sizeof(buf))
3277 		return -EINVAL;
3278 
3279 	if (copy_from_user(&buf, ubuf, cnt))
3280 		return -EFAULT;
3281 
3282 	buf[cnt] = 0;
3283 
3284 	ret = strict_strtoul(buf, 10, &val);
3285 	if (ret < 0)
3286 		return ret;
3287 
3288 	/* must have at least 1 entry */
3289 	if (!val)
3290 		return -EINVAL;
3291 
3292 	mutex_lock(&trace_types_lock);
3293 
3294 	tracing_stop();
3295 
3296 	/* disable all cpu buffers */
3297 	for_each_tracing_cpu(cpu) {
3298 		if (global_trace.data[cpu])
3299 			atomic_inc(&global_trace.data[cpu]->disabled);
3300 		if (max_tr.data[cpu])
3301 			atomic_inc(&max_tr.data[cpu]->disabled);
3302 	}
3303 
3304 	/* value is in KB */
3305 	val <<= 10;
3306 
3307 	if (val != global_trace.entries) {
3308 		ret = tracing_resize_ring_buffer(val);
3309 		if (ret < 0) {
3310 			cnt = ret;
3311 			goto out;
3312 		}
3313 	}
3314 
3315 	filp->f_pos += cnt;
3316 
3317 	/* If check pages failed, return ENOMEM */
3318 	if (tracing_disabled)
3319 		cnt = -ENOMEM;
3320  out:
3321 	for_each_tracing_cpu(cpu) {
3322 		if (global_trace.data[cpu])
3323 			atomic_dec(&global_trace.data[cpu]->disabled);
3324 		if (max_tr.data[cpu])
3325 			atomic_dec(&max_tr.data[cpu]->disabled);
3326 	}
3327 
3328 	tracing_start();
3329 	max_tr.entries = global_trace.entries;
3330 	mutex_unlock(&trace_types_lock);
3331 
3332 	return cnt;
3333 }
3334 
3335 static int mark_printk(const char *fmt, ...)
3336 {
3337 	int ret;
3338 	va_list args;
3339 	va_start(args, fmt);
3340 	ret = trace_vprintk(0, fmt, args);
3341 	va_end(args);
3342 	return ret;
3343 }
3344 
3345 static ssize_t
3346 tracing_mark_write(struct file *filp, const char __user *ubuf,
3347 					size_t cnt, loff_t *fpos)
3348 {
3349 	char *buf;
3350 	char *end;
3351 
3352 	if (tracing_disabled)
3353 		return -EINVAL;
3354 
3355 	if (cnt > TRACE_BUF_SIZE)
3356 		cnt = TRACE_BUF_SIZE;
3357 
3358 	buf = kmalloc(cnt + 1, GFP_KERNEL);
3359 	if (buf == NULL)
3360 		return -ENOMEM;
3361 
3362 	if (copy_from_user(buf, ubuf, cnt)) {
3363 		kfree(buf);
3364 		return -EFAULT;
3365 	}
3366 
3367 	/* Cut from the first nil or newline. */
3368 	buf[cnt] = '\0';
3369 	end = strchr(buf, '\n');
3370 	if (end)
3371 		*end = '\0';
3372 
3373 	cnt = mark_printk("%s\n", buf);
3374 	kfree(buf);
3375 	*fpos += cnt;
3376 
3377 	return cnt;
3378 }
3379 
3380 static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf,
3381 				  size_t cnt, loff_t *ppos)
3382 {
3383 	char buf[64];
3384 	int bufiter = 0;
3385 	int i;
3386 
3387 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3388 		bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter,
3389 			"%s%s%s%s", i ? " " : "",
3390 			i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3391 			i == trace_clock_id ? "]" : "");
3392 	bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n");
3393 
3394 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter);
3395 }
3396 
3397 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3398 				   size_t cnt, loff_t *fpos)
3399 {
3400 	char buf[64];
3401 	const char *clockstr;
3402 	int i;
3403 
3404 	if (cnt >= sizeof(buf))
3405 		return -EINVAL;
3406 
3407 	if (copy_from_user(&buf, ubuf, cnt))
3408 		return -EFAULT;
3409 
3410 	buf[cnt] = 0;
3411 
3412 	clockstr = strstrip(buf);
3413 
3414 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
3415 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
3416 			break;
3417 	}
3418 	if (i == ARRAY_SIZE(trace_clocks))
3419 		return -EINVAL;
3420 
3421 	trace_clock_id = i;
3422 
3423 	mutex_lock(&trace_types_lock);
3424 
3425 	ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func);
3426 	if (max_tr.buffer)
3427 		ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
3428 
3429 	mutex_unlock(&trace_types_lock);
3430 
3431 	*fpos += cnt;
3432 
3433 	return cnt;
3434 }
3435 
3436 static const struct file_operations tracing_max_lat_fops = {
3437 	.open		= tracing_open_generic,
3438 	.read		= tracing_max_lat_read,
3439 	.write		= tracing_max_lat_write,
3440 };
3441 
3442 static const struct file_operations tracing_ctrl_fops = {
3443 	.open		= tracing_open_generic,
3444 	.read		= tracing_ctrl_read,
3445 	.write		= tracing_ctrl_write,
3446 };
3447 
3448 static const struct file_operations set_tracer_fops = {
3449 	.open		= tracing_open_generic,
3450 	.read		= tracing_set_trace_read,
3451 	.write		= tracing_set_trace_write,
3452 };
3453 
3454 static const struct file_operations tracing_pipe_fops = {
3455 	.open		= tracing_open_pipe,
3456 	.poll		= tracing_poll_pipe,
3457 	.read		= tracing_read_pipe,
3458 	.splice_read	= tracing_splice_read_pipe,
3459 	.release	= tracing_release_pipe,
3460 };
3461 
3462 static const struct file_operations tracing_entries_fops = {
3463 	.open		= tracing_open_generic,
3464 	.read		= tracing_entries_read,
3465 	.write		= tracing_entries_write,
3466 };
3467 
3468 static const struct file_operations tracing_mark_fops = {
3469 	.open		= tracing_open_generic,
3470 	.write		= tracing_mark_write,
3471 };
3472 
3473 static const struct file_operations trace_clock_fops = {
3474 	.open		= tracing_open_generic,
3475 	.read		= tracing_clock_read,
3476 	.write		= tracing_clock_write,
3477 };
3478 
3479 struct ftrace_buffer_info {
3480 	struct trace_array	*tr;
3481 	void			*spare;
3482 	int			cpu;
3483 	unsigned int		read;
3484 };
3485 
3486 static int tracing_buffers_open(struct inode *inode, struct file *filp)
3487 {
3488 	int cpu = (int)(long)inode->i_private;
3489 	struct ftrace_buffer_info *info;
3490 
3491 	if (tracing_disabled)
3492 		return -ENODEV;
3493 
3494 	info = kzalloc(sizeof(*info), GFP_KERNEL);
3495 	if (!info)
3496 		return -ENOMEM;
3497 
3498 	info->tr	= &global_trace;
3499 	info->cpu	= cpu;
3500 	info->spare	= NULL;
3501 	/* Force reading ring buffer for first read */
3502 	info->read	= (unsigned int)-1;
3503 
3504 	filp->private_data = info;
3505 
3506 	return nonseekable_open(inode, filp);
3507 }
3508 
3509 static ssize_t
3510 tracing_buffers_read(struct file *filp, char __user *ubuf,
3511 		     size_t count, loff_t *ppos)
3512 {
3513 	struct ftrace_buffer_info *info = filp->private_data;
3514 	unsigned int pos;
3515 	ssize_t ret;
3516 	size_t size;
3517 
3518 	if (!count)
3519 		return 0;
3520 
3521 	if (!info->spare)
3522 		info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
3523 	if (!info->spare)
3524 		return -ENOMEM;
3525 
3526 	/* Do we have previous read data to read? */
3527 	if (info->read < PAGE_SIZE)
3528 		goto read;
3529 
3530 	info->read = 0;
3531 
3532 	ret = ring_buffer_read_page(info->tr->buffer,
3533 				    &info->spare,
3534 				    count,
3535 				    info->cpu, 0);
3536 	if (ret < 0)
3537 		return 0;
3538 
3539 	pos = ring_buffer_page_len(info->spare);
3540 
3541 	if (pos < PAGE_SIZE)
3542 		memset(info->spare + pos, 0, PAGE_SIZE - pos);
3543 
3544 read:
3545 	size = PAGE_SIZE - info->read;
3546 	if (size > count)
3547 		size = count;
3548 
3549 	ret = copy_to_user(ubuf, info->spare + info->read, size);
3550 	if (ret == size)
3551 		return -EFAULT;
3552 	size -= ret;
3553 
3554 	*ppos += size;
3555 	info->read += size;
3556 
3557 	return size;
3558 }
3559 
3560 static int tracing_buffers_release(struct inode *inode, struct file *file)
3561 {
3562 	struct ftrace_buffer_info *info = file->private_data;
3563 
3564 	if (info->spare)
3565 		ring_buffer_free_read_page(info->tr->buffer, info->spare);
3566 	kfree(info);
3567 
3568 	return 0;
3569 }
3570 
3571 struct buffer_ref {
3572 	struct ring_buffer	*buffer;
3573 	void			*page;
3574 	int			ref;
3575 };
3576 
3577 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
3578 				    struct pipe_buffer *buf)
3579 {
3580 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3581 
3582 	if (--ref->ref)
3583 		return;
3584 
3585 	ring_buffer_free_read_page(ref->buffer, ref->page);
3586 	kfree(ref);
3587 	buf->private = 0;
3588 }
3589 
3590 static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
3591 				 struct pipe_buffer *buf)
3592 {
3593 	return 1;
3594 }
3595 
3596 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3597 				struct pipe_buffer *buf)
3598 {
3599 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3600 
3601 	ref->ref++;
3602 }
3603 
3604 /* Pipe buffer operations for a buffer. */
3605 static struct pipe_buf_operations buffer_pipe_buf_ops = {
3606 	.can_merge		= 0,
3607 	.map			= generic_pipe_buf_map,
3608 	.unmap			= generic_pipe_buf_unmap,
3609 	.confirm		= generic_pipe_buf_confirm,
3610 	.release		= buffer_pipe_buf_release,
3611 	.steal			= buffer_pipe_buf_steal,
3612 	.get			= buffer_pipe_buf_get,
3613 };
3614 
3615 /*
3616  * Callback from splice_to_pipe(), if we need to release some pages
3617  * at the end of the spd in case we error'ed out in filling the pipe.
3618  */
3619 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
3620 {
3621 	struct buffer_ref *ref =
3622 		(struct buffer_ref *)spd->partial[i].private;
3623 
3624 	if (--ref->ref)
3625 		return;
3626 
3627 	ring_buffer_free_read_page(ref->buffer, ref->page);
3628 	kfree(ref);
3629 	spd->partial[i].private = 0;
3630 }
3631 
3632 static ssize_t
3633 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3634 			    struct pipe_inode_info *pipe, size_t len,
3635 			    unsigned int flags)
3636 {
3637 	struct ftrace_buffer_info *info = file->private_data;
3638 	struct partial_page partial[PIPE_BUFFERS];
3639 	struct page *pages[PIPE_BUFFERS];
3640 	struct splice_pipe_desc spd = {
3641 		.pages		= pages,
3642 		.partial	= partial,
3643 		.flags		= flags,
3644 		.ops		= &buffer_pipe_buf_ops,
3645 		.spd_release	= buffer_spd_release,
3646 	};
3647 	struct buffer_ref *ref;
3648 	int entries, size, i;
3649 	size_t ret;
3650 
3651 	if (*ppos & (PAGE_SIZE - 1)) {
3652 		WARN_ONCE(1, "Ftrace: previous read must page-align\n");
3653 		return -EINVAL;
3654 	}
3655 
3656 	if (len & (PAGE_SIZE - 1)) {
3657 		WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
3658 		if (len < PAGE_SIZE)
3659 			return -EINVAL;
3660 		len &= PAGE_MASK;
3661 	}
3662 
3663 	entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3664 
3665 	for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
3666 		struct page *page;
3667 		int r;
3668 
3669 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
3670 		if (!ref)
3671 			break;
3672 
3673 		ref->ref = 1;
3674 		ref->buffer = info->tr->buffer;
3675 		ref->page = ring_buffer_alloc_read_page(ref->buffer);
3676 		if (!ref->page) {
3677 			kfree(ref);
3678 			break;
3679 		}
3680 
3681 		r = ring_buffer_read_page(ref->buffer, &ref->page,
3682 					  len, info->cpu, 1);
3683 		if (r < 0) {
3684 			ring_buffer_free_read_page(ref->buffer,
3685 						   ref->page);
3686 			kfree(ref);
3687 			break;
3688 		}
3689 
3690 		/*
3691 		 * zero out any left over data, this is going to
3692 		 * user land.
3693 		 */
3694 		size = ring_buffer_page_len(ref->page);
3695 		if (size < PAGE_SIZE)
3696 			memset(ref->page + size, 0, PAGE_SIZE - size);
3697 
3698 		page = virt_to_page(ref->page);
3699 
3700 		spd.pages[i] = page;
3701 		spd.partial[i].len = PAGE_SIZE;
3702 		spd.partial[i].offset = 0;
3703 		spd.partial[i].private = (unsigned long)ref;
3704 		spd.nr_pages++;
3705 		*ppos += PAGE_SIZE;
3706 
3707 		entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3708 	}
3709 
3710 	spd.nr_pages = i;
3711 
3712 	/* did we read anything? */
3713 	if (!spd.nr_pages) {
3714 		if (flags & SPLICE_F_NONBLOCK)
3715 			ret = -EAGAIN;
3716 		else
3717 			ret = 0;
3718 		/* TODO: block */
3719 		return ret;
3720 	}
3721 
3722 	ret = splice_to_pipe(pipe, &spd);
3723 
3724 	return ret;
3725 }
3726 
3727 static const struct file_operations tracing_buffers_fops = {
3728 	.open		= tracing_buffers_open,
3729 	.read		= tracing_buffers_read,
3730 	.release	= tracing_buffers_release,
3731 	.splice_read	= tracing_buffers_splice_read,
3732 	.llseek		= no_llseek,
3733 };
3734 
3735 static ssize_t
3736 tracing_stats_read(struct file *filp, char __user *ubuf,
3737 		   size_t count, loff_t *ppos)
3738 {
3739 	unsigned long cpu = (unsigned long)filp->private_data;
3740 	struct trace_array *tr = &global_trace;
3741 	struct trace_seq *s;
3742 	unsigned long cnt;
3743 
3744 	s = kmalloc(sizeof(*s), GFP_KERNEL);
3745 	if (!s)
3746 		return ENOMEM;
3747 
3748 	trace_seq_init(s);
3749 
3750 	cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
3751 	trace_seq_printf(s, "entries: %ld\n", cnt);
3752 
3753 	cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
3754 	trace_seq_printf(s, "overrun: %ld\n", cnt);
3755 
3756 	cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
3757 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
3758 
3759 	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
3760 
3761 	kfree(s);
3762 
3763 	return count;
3764 }
3765 
3766 static const struct file_operations tracing_stats_fops = {
3767 	.open		= tracing_open_generic,
3768 	.read		= tracing_stats_read,
3769 };
3770 
3771 #ifdef CONFIG_DYNAMIC_FTRACE
3772 
3773 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
3774 {
3775 	return 0;
3776 }
3777 
3778 static ssize_t
3779 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
3780 		  size_t cnt, loff_t *ppos)
3781 {
3782 	static char ftrace_dyn_info_buffer[1024];
3783 	static DEFINE_MUTEX(dyn_info_mutex);
3784 	unsigned long *p = filp->private_data;
3785 	char *buf = ftrace_dyn_info_buffer;
3786 	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
3787 	int r;
3788 
3789 	mutex_lock(&dyn_info_mutex);
3790 	r = sprintf(buf, "%ld ", *p);
3791 
3792 	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
3793 	buf[r++] = '\n';
3794 
3795 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3796 
3797 	mutex_unlock(&dyn_info_mutex);
3798 
3799 	return r;
3800 }
3801 
3802 static const struct file_operations tracing_dyn_info_fops = {
3803 	.open		= tracing_open_generic,
3804 	.read		= tracing_read_dyn_info,
3805 };
3806 #endif
3807 
3808 static struct dentry *d_tracer;
3809 
3810 struct dentry *tracing_init_dentry(void)
3811 {
3812 	static int once;
3813 
3814 	if (d_tracer)
3815 		return d_tracer;
3816 
3817 	if (!debugfs_initialized())
3818 		return NULL;
3819 
3820 	d_tracer = debugfs_create_dir("tracing", NULL);
3821 
3822 	if (!d_tracer && !once) {
3823 		once = 1;
3824 		pr_warning("Could not create debugfs directory 'tracing'\n");
3825 		return NULL;
3826 	}
3827 
3828 	return d_tracer;
3829 }
3830 
3831 static struct dentry *d_percpu;
3832 
3833 struct dentry *tracing_dentry_percpu(void)
3834 {
3835 	static int once;
3836 	struct dentry *d_tracer;
3837 
3838 	if (d_percpu)
3839 		return d_percpu;
3840 
3841 	d_tracer = tracing_init_dentry();
3842 
3843 	if (!d_tracer)
3844 		return NULL;
3845 
3846 	d_percpu = debugfs_create_dir("per_cpu", d_tracer);
3847 
3848 	if (!d_percpu && !once) {
3849 		once = 1;
3850 		pr_warning("Could not create debugfs directory 'per_cpu'\n");
3851 		return NULL;
3852 	}
3853 
3854 	return d_percpu;
3855 }
3856 
3857 static void tracing_init_debugfs_percpu(long cpu)
3858 {
3859 	struct dentry *d_percpu = tracing_dentry_percpu();
3860 	struct dentry *d_cpu;
3861 	/* strlen(cpu) + MAX(log10(cpu)) + '\0' */
3862 	char cpu_dir[7];
3863 
3864 	if (cpu > 999 || cpu < 0)
3865 		return;
3866 
3867 	sprintf(cpu_dir, "cpu%ld", cpu);
3868 	d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
3869 	if (!d_cpu) {
3870 		pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
3871 		return;
3872 	}
3873 
3874 	/* per cpu trace_pipe */
3875 	trace_create_file("trace_pipe", 0444, d_cpu,
3876 			(void *) cpu, &tracing_pipe_fops);
3877 
3878 	/* per cpu trace */
3879 	trace_create_file("trace", 0644, d_cpu,
3880 			(void *) cpu, &tracing_fops);
3881 
3882 	trace_create_file("trace_pipe_raw", 0444, d_cpu,
3883 			(void *) cpu, &tracing_buffers_fops);
3884 
3885 	trace_create_file("stats", 0444, d_cpu,
3886 			(void *) cpu, &tracing_stats_fops);
3887 }
3888 
3889 #ifdef CONFIG_FTRACE_SELFTEST
3890 /* Let selftest have access to static functions in this file */
3891 #include "trace_selftest.c"
3892 #endif
3893 
3894 struct trace_option_dentry {
3895 	struct tracer_opt		*opt;
3896 	struct tracer_flags		*flags;
3897 	struct dentry			*entry;
3898 };
3899 
3900 static ssize_t
3901 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
3902 			loff_t *ppos)
3903 {
3904 	struct trace_option_dentry *topt = filp->private_data;
3905 	char *buf;
3906 
3907 	if (topt->flags->val & topt->opt->bit)
3908 		buf = "1\n";
3909 	else
3910 		buf = "0\n";
3911 
3912 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
3913 }
3914 
3915 static ssize_t
3916 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3917 			 loff_t *ppos)
3918 {
3919 	struct trace_option_dentry *topt = filp->private_data;
3920 	unsigned long val;
3921 	char buf[64];
3922 	int ret;
3923 
3924 	if (cnt >= sizeof(buf))
3925 		return -EINVAL;
3926 
3927 	if (copy_from_user(&buf, ubuf, cnt))
3928 		return -EFAULT;
3929 
3930 	buf[cnt] = 0;
3931 
3932 	ret = strict_strtoul(buf, 10, &val);
3933 	if (ret < 0)
3934 		return ret;
3935 
3936 	ret = 0;
3937 	switch (val) {
3938 	case 0:
3939 		/* do nothing if already cleared */
3940 		if (!(topt->flags->val & topt->opt->bit))
3941 			break;
3942 
3943 		mutex_lock(&trace_types_lock);
3944 		if (current_trace->set_flag)
3945 			ret = current_trace->set_flag(topt->flags->val,
3946 						      topt->opt->bit, 0);
3947 		mutex_unlock(&trace_types_lock);
3948 		if (ret)
3949 			return ret;
3950 		topt->flags->val &= ~topt->opt->bit;
3951 		break;
3952 	case 1:
3953 		/* do nothing if already set */
3954 		if (topt->flags->val & topt->opt->bit)
3955 			break;
3956 
3957 		mutex_lock(&trace_types_lock);
3958 		if (current_trace->set_flag)
3959 			ret = current_trace->set_flag(topt->flags->val,
3960 						      topt->opt->bit, 1);
3961 		mutex_unlock(&trace_types_lock);
3962 		if (ret)
3963 			return ret;
3964 		topt->flags->val |= topt->opt->bit;
3965 		break;
3966 
3967 	default:
3968 		return -EINVAL;
3969 	}
3970 
3971 	*ppos += cnt;
3972 
3973 	return cnt;
3974 }
3975 
3976 
3977 static const struct file_operations trace_options_fops = {
3978 	.open = tracing_open_generic,
3979 	.read = trace_options_read,
3980 	.write = trace_options_write,
3981 };
3982 
3983 static ssize_t
3984 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
3985 			loff_t *ppos)
3986 {
3987 	long index = (long)filp->private_data;
3988 	char *buf;
3989 
3990 	if (trace_flags & (1 << index))
3991 		buf = "1\n";
3992 	else
3993 		buf = "0\n";
3994 
3995 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
3996 }
3997 
3998 static ssize_t
3999 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
4000 			 loff_t *ppos)
4001 {
4002 	long index = (long)filp->private_data;
4003 	char buf[64];
4004 	unsigned long val;
4005 	int ret;
4006 
4007 	if (cnt >= sizeof(buf))
4008 		return -EINVAL;
4009 
4010 	if (copy_from_user(&buf, ubuf, cnt))
4011 		return -EFAULT;
4012 
4013 	buf[cnt] = 0;
4014 
4015 	ret = strict_strtoul(buf, 10, &val);
4016 	if (ret < 0)
4017 		return ret;
4018 
4019 	if (val != 0 && val != 1)
4020 		return -EINVAL;
4021 	set_tracer_flags(1 << index, val);
4022 
4023 	*ppos += cnt;
4024 
4025 	return cnt;
4026 }
4027 
4028 static const struct file_operations trace_options_core_fops = {
4029 	.open = tracing_open_generic,
4030 	.read = trace_options_core_read,
4031 	.write = trace_options_core_write,
4032 };
4033 
4034 struct dentry *trace_create_file(const char *name,
4035 				 mode_t mode,
4036 				 struct dentry *parent,
4037 				 void *data,
4038 				 const struct file_operations *fops)
4039 {
4040 	struct dentry *ret;
4041 
4042 	ret = debugfs_create_file(name, mode, parent, data, fops);
4043 	if (!ret)
4044 		pr_warning("Could not create debugfs '%s' entry\n", name);
4045 
4046 	return ret;
4047 }
4048 
4049 
4050 static struct dentry *trace_options_init_dentry(void)
4051 {
4052 	struct dentry *d_tracer;
4053 	static struct dentry *t_options;
4054 
4055 	if (t_options)
4056 		return t_options;
4057 
4058 	d_tracer = tracing_init_dentry();
4059 	if (!d_tracer)
4060 		return NULL;
4061 
4062 	t_options = debugfs_create_dir("options", d_tracer);
4063 	if (!t_options) {
4064 		pr_warning("Could not create debugfs directory 'options'\n");
4065 		return NULL;
4066 	}
4067 
4068 	return t_options;
4069 }
4070 
4071 static void
4072 create_trace_option_file(struct trace_option_dentry *topt,
4073 			 struct tracer_flags *flags,
4074 			 struct tracer_opt *opt)
4075 {
4076 	struct dentry *t_options;
4077 
4078 	t_options = trace_options_init_dentry();
4079 	if (!t_options)
4080 		return;
4081 
4082 	topt->flags = flags;
4083 	topt->opt = opt;
4084 
4085 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
4086 				    &trace_options_fops);
4087 
4088 }
4089 
4090 static struct trace_option_dentry *
4091 create_trace_option_files(struct tracer *tracer)
4092 {
4093 	struct trace_option_dentry *topts;
4094 	struct tracer_flags *flags;
4095 	struct tracer_opt *opts;
4096 	int cnt;
4097 
4098 	if (!tracer)
4099 		return NULL;
4100 
4101 	flags = tracer->flags;
4102 
4103 	if (!flags || !flags->opts)
4104 		return NULL;
4105 
4106 	opts = flags->opts;
4107 
4108 	for (cnt = 0; opts[cnt].name; cnt++)
4109 		;
4110 
4111 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
4112 	if (!topts)
4113 		return NULL;
4114 
4115 	for (cnt = 0; opts[cnt].name; cnt++)
4116 		create_trace_option_file(&topts[cnt], flags,
4117 					 &opts[cnt]);
4118 
4119 	return topts;
4120 }
4121 
4122 static void
4123 destroy_trace_option_files(struct trace_option_dentry *topts)
4124 {
4125 	int cnt;
4126 
4127 	if (!topts)
4128 		return;
4129 
4130 	for (cnt = 0; topts[cnt].opt; cnt++) {
4131 		if (topts[cnt].entry)
4132 			debugfs_remove(topts[cnt].entry);
4133 	}
4134 
4135 	kfree(topts);
4136 }
4137 
4138 static struct dentry *
4139 create_trace_option_core_file(const char *option, long index)
4140 {
4141 	struct dentry *t_options;
4142 
4143 	t_options = trace_options_init_dentry();
4144 	if (!t_options)
4145 		return NULL;
4146 
4147 	return trace_create_file(option, 0644, t_options, (void *)index,
4148 				    &trace_options_core_fops);
4149 }
4150 
4151 static __init void create_trace_options_dir(void)
4152 {
4153 	struct dentry *t_options;
4154 	int i;
4155 
4156 	t_options = trace_options_init_dentry();
4157 	if (!t_options)
4158 		return;
4159 
4160 	for (i = 0; trace_options[i]; i++)
4161 		create_trace_option_core_file(trace_options[i], i);
4162 }
4163 
4164 static __init int tracer_init_debugfs(void)
4165 {
4166 	struct dentry *d_tracer;
4167 	int cpu;
4168 
4169 	d_tracer = tracing_init_dentry();
4170 
4171 	trace_create_file("tracing_enabled", 0644, d_tracer,
4172 			&global_trace, &tracing_ctrl_fops);
4173 
4174 	trace_create_file("trace_options", 0644, d_tracer,
4175 			NULL, &tracing_iter_fops);
4176 
4177 	trace_create_file("tracing_cpumask", 0644, d_tracer,
4178 			NULL, &tracing_cpumask_fops);
4179 
4180 	trace_create_file("trace", 0644, d_tracer,
4181 			(void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
4182 
4183 	trace_create_file("available_tracers", 0444, d_tracer,
4184 			&global_trace, &show_traces_fops);
4185 
4186 	trace_create_file("current_tracer", 0644, d_tracer,
4187 			&global_trace, &set_tracer_fops);
4188 
4189 #ifdef CONFIG_TRACER_MAX_TRACE
4190 	trace_create_file("tracing_max_latency", 0644, d_tracer,
4191 			&tracing_max_latency, &tracing_max_lat_fops);
4192 
4193 	trace_create_file("tracing_thresh", 0644, d_tracer,
4194 			&tracing_thresh, &tracing_max_lat_fops);
4195 #endif
4196 
4197 	trace_create_file("README", 0444, d_tracer,
4198 			NULL, &tracing_readme_fops);
4199 
4200 	trace_create_file("trace_pipe", 0444, d_tracer,
4201 			(void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
4202 
4203 	trace_create_file("buffer_size_kb", 0644, d_tracer,
4204 			&global_trace, &tracing_entries_fops);
4205 
4206 	trace_create_file("trace_marker", 0220, d_tracer,
4207 			NULL, &tracing_mark_fops);
4208 
4209 	trace_create_file("saved_cmdlines", 0444, d_tracer,
4210 			NULL, &tracing_saved_cmdlines_fops);
4211 
4212 	trace_create_file("trace_clock", 0644, d_tracer, NULL,
4213 			  &trace_clock_fops);
4214 
4215 #ifdef CONFIG_DYNAMIC_FTRACE
4216 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4217 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
4218 #endif
4219 #ifdef CONFIG_SYSPROF_TRACER
4220 	init_tracer_sysprof_debugfs(d_tracer);
4221 #endif
4222 
4223 	create_trace_options_dir();
4224 
4225 	for_each_tracing_cpu(cpu)
4226 		tracing_init_debugfs_percpu(cpu);
4227 
4228 	return 0;
4229 }
4230 
4231 static int trace_panic_handler(struct notifier_block *this,
4232 			       unsigned long event, void *unused)
4233 {
4234 	if (ftrace_dump_on_oops)
4235 		ftrace_dump();
4236 	return NOTIFY_OK;
4237 }
4238 
4239 static struct notifier_block trace_panic_notifier = {
4240 	.notifier_call  = trace_panic_handler,
4241 	.next           = NULL,
4242 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
4243 };
4244 
4245 static int trace_die_handler(struct notifier_block *self,
4246 			     unsigned long val,
4247 			     void *data)
4248 {
4249 	switch (val) {
4250 	case DIE_OOPS:
4251 		if (ftrace_dump_on_oops)
4252 			ftrace_dump();
4253 		break;
4254 	default:
4255 		break;
4256 	}
4257 	return NOTIFY_OK;
4258 }
4259 
4260 static struct notifier_block trace_die_notifier = {
4261 	.notifier_call = trace_die_handler,
4262 	.priority = 200
4263 };
4264 
4265 /*
4266  * printk is set to max of 1024, we really don't need it that big.
4267  * Nothing should be printing 1000 characters anyway.
4268  */
4269 #define TRACE_MAX_PRINT		1000
4270 
4271 /*
4272  * Define here KERN_TRACE so that we have one place to modify
4273  * it if we decide to change what log level the ftrace dump
4274  * should be at.
4275  */
4276 #define KERN_TRACE		KERN_EMERG
4277 
4278 static void
4279 trace_printk_seq(struct trace_seq *s)
4280 {
4281 	/* Probably should print a warning here. */
4282 	if (s->len >= 1000)
4283 		s->len = 1000;
4284 
4285 	/* should be zero ended, but we are paranoid. */
4286 	s->buffer[s->len] = 0;
4287 
4288 	printk(KERN_TRACE "%s", s->buffer);
4289 
4290 	trace_seq_init(s);
4291 }
4292 
4293 static void __ftrace_dump(bool disable_tracing)
4294 {
4295 	static raw_spinlock_t ftrace_dump_lock =
4296 		(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
4297 	/* use static because iter can be a bit big for the stack */
4298 	static struct trace_iterator iter;
4299 	unsigned int old_userobj;
4300 	static int dump_ran;
4301 	unsigned long flags;
4302 	int cnt = 0, cpu;
4303 
4304 	/* only one dump */
4305 	local_irq_save(flags);
4306 	__raw_spin_lock(&ftrace_dump_lock);
4307 	if (dump_ran)
4308 		goto out;
4309 
4310 	dump_ran = 1;
4311 
4312 	tracing_off();
4313 
4314 	if (disable_tracing)
4315 		ftrace_kill();
4316 
4317 	for_each_tracing_cpu(cpu) {
4318 		atomic_inc(&global_trace.data[cpu]->disabled);
4319 	}
4320 
4321 	old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
4322 
4323 	/* don't look at user memory in panic mode */
4324 	trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
4325 
4326 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
4327 
4328 	/* Simulate the iterator */
4329 	iter.tr = &global_trace;
4330 	iter.trace = current_trace;
4331 	iter.cpu_file = TRACE_PIPE_ALL_CPU;
4332 
4333 	/*
4334 	 * We need to stop all tracing on all CPUS to read the
4335 	 * the next buffer. This is a bit expensive, but is
4336 	 * not done often. We fill all what we can read,
4337 	 * and then release the locks again.
4338 	 */
4339 
4340 	while (!trace_empty(&iter)) {
4341 
4342 		if (!cnt)
4343 			printk(KERN_TRACE "---------------------------------\n");
4344 
4345 		cnt++;
4346 
4347 		/* reset all but tr, trace, and overruns */
4348 		memset(&iter.seq, 0,
4349 		       sizeof(struct trace_iterator) -
4350 		       offsetof(struct trace_iterator, seq));
4351 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
4352 		iter.pos = -1;
4353 
4354 		if (find_next_entry_inc(&iter) != NULL) {
4355 			int ret;
4356 
4357 			ret = print_trace_line(&iter);
4358 			if (ret != TRACE_TYPE_NO_CONSUME)
4359 				trace_consume(&iter);
4360 		}
4361 
4362 		trace_printk_seq(&iter.seq);
4363 	}
4364 
4365 	if (!cnt)
4366 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
4367 	else
4368 		printk(KERN_TRACE "---------------------------------\n");
4369 
4370 	/* Re-enable tracing if requested */
4371 	if (!disable_tracing) {
4372 		trace_flags |= old_userobj;
4373 
4374 		for_each_tracing_cpu(cpu) {
4375 			atomic_dec(&global_trace.data[cpu]->disabled);
4376 		}
4377 		tracing_on();
4378 	}
4379 
4380  out:
4381 	__raw_spin_unlock(&ftrace_dump_lock);
4382 	local_irq_restore(flags);
4383 }
4384 
4385 /* By default: disable tracing after the dump */
4386 void ftrace_dump(void)
4387 {
4388 	__ftrace_dump(true);
4389 }
4390 
4391 __init static int tracer_alloc_buffers(void)
4392 {
4393 	int ring_buf_size;
4394 	int i;
4395 	int ret = -ENOMEM;
4396 
4397 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
4398 		goto out;
4399 
4400 	if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4401 		goto out_free_buffer_mask;
4402 
4403 	if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4404 		goto out_free_tracing_cpumask;
4405 
4406 	/* To save memory, keep the ring buffer size to its minimum */
4407 	if (ring_buffer_expanded)
4408 		ring_buf_size = trace_buf_size;
4409 	else
4410 		ring_buf_size = 1;
4411 
4412 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
4413 	cpumask_copy(tracing_cpumask, cpu_all_mask);
4414 	cpumask_clear(tracing_reader_cpumask);
4415 
4416 	/* TODO: make the number of buffers hot pluggable with CPUS */
4417 	global_trace.buffer = ring_buffer_alloc(ring_buf_size,
4418 						   TRACE_BUFFER_FLAGS);
4419 	if (!global_trace.buffer) {
4420 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
4421 		WARN_ON(1);
4422 		goto out_free_cpumask;
4423 	}
4424 	global_trace.entries = ring_buffer_size(global_trace.buffer);
4425 
4426 
4427 #ifdef CONFIG_TRACER_MAX_TRACE
4428 	max_tr.buffer = ring_buffer_alloc(ring_buf_size,
4429 					     TRACE_BUFFER_FLAGS);
4430 	if (!max_tr.buffer) {
4431 		printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
4432 		WARN_ON(1);
4433 		ring_buffer_free(global_trace.buffer);
4434 		goto out_free_cpumask;
4435 	}
4436 	max_tr.entries = ring_buffer_size(max_tr.buffer);
4437 	WARN_ON(max_tr.entries != global_trace.entries);
4438 #endif
4439 
4440 	/* Allocate the first page for all buffers */
4441 	for_each_tracing_cpu(i) {
4442 		global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4443 		max_tr.data[i] = &per_cpu(max_data, i);
4444 	}
4445 
4446 	trace_init_cmdlines();
4447 
4448 	register_tracer(&nop_trace);
4449 	current_trace = &nop_trace;
4450 #ifdef CONFIG_BOOT_TRACER
4451 	register_tracer(&boot_tracer);
4452 #endif
4453 	/* All seems OK, enable tracing */
4454 	tracing_disabled = 0;
4455 
4456 	atomic_notifier_chain_register(&panic_notifier_list,
4457 				       &trace_panic_notifier);
4458 
4459 	register_die_notifier(&trace_die_notifier);
4460 
4461 	return 0;
4462 
4463 out_free_cpumask:
4464 	free_cpumask_var(tracing_reader_cpumask);
4465 out_free_tracing_cpumask:
4466 	free_cpumask_var(tracing_cpumask);
4467 out_free_buffer_mask:
4468 	free_cpumask_var(tracing_buffer_mask);
4469 out:
4470 	return ret;
4471 }
4472 
4473 __init static int clear_boot_tracer(void)
4474 {
4475 	/*
4476 	 * The default tracer at boot buffer is an init section.
4477 	 * This function is called in lateinit. If we did not
4478 	 * find the boot tracer, then clear it out, to prevent
4479 	 * later registration from accessing the buffer that is
4480 	 * about to be freed.
4481 	 */
4482 	if (!default_bootup_tracer)
4483 		return 0;
4484 
4485 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
4486 	       default_bootup_tracer);
4487 	default_bootup_tracer = NULL;
4488 
4489 	return 0;
4490 }
4491 
4492 early_initcall(tracer_alloc_buffers);
4493 fs_initcall(tracer_init_debugfs);
4494 late_initcall(clear_boot_tracer);
4495