1 /*
2  * trace task wakeup timings
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Based on code from the latency_tracer, that is:
8  *
9  *  Copyright (C) 2004-2006 Ingo Molnar
10  *  Copyright (C) 2004 William Lee Irwin III
11  */
12 #include <linux/module.h>
13 #include <linux/fs.h>
14 #include <linux/debugfs.h>
15 #include <linux/kallsyms.h>
16 #include <linux/uaccess.h>
17 #include <linux/ftrace.h>
18 #include <trace/events/sched.h>
19 
20 #include "trace.h"
21 
22 static struct trace_array	*wakeup_trace;
23 static int __read_mostly	tracer_enabled;
24 
25 static struct task_struct	*wakeup_task;
26 static int			wakeup_cpu;
27 static int			wakeup_current_cpu;
28 static unsigned			wakeup_prio = -1;
29 static int			wakeup_rt;
30 
31 static arch_spinlock_t wakeup_lock =
32 	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 
34 static void wakeup_reset(struct trace_array *tr);
35 static void __wakeup_reset(struct trace_array *tr);
36 static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
37 static void wakeup_graph_return(struct ftrace_graph_ret *trace);
38 
39 static int save_lat_flag;
40 
41 #define TRACE_DISPLAY_GRAPH     1
42 
43 static struct tracer_opt trace_opts[] = {
44 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
45 	/* display latency trace as call graph */
46 	{ TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
47 #endif
48 	{ } /* Empty entry */
49 };
50 
51 static struct tracer_flags tracer_flags = {
52 	.val  = 0,
53 	.opts = trace_opts,
54 };
55 
56 #define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
57 
58 #ifdef CONFIG_FUNCTION_TRACER
59 
60 /*
61  * Prologue for the wakeup function tracers.
62  *
63  * Returns 1 if it is OK to continue, and preemption
64  *            is disabled and data->disabled is incremented.
65  *         0 if the trace is to be ignored, and preemption
66  *            is not disabled and data->disabled is
67  *            kept the same.
68  *
69  * Note, this function is also used outside this ifdef but
70  *  inside the #ifdef of the function graph tracer below.
71  *  This is OK, since the function graph tracer is
72  *  dependent on the function tracer.
73  */
74 static int
75 func_prolog_preempt_disable(struct trace_array *tr,
76 			    struct trace_array_cpu **data,
77 			    int *pc)
78 {
79 	long disabled;
80 	int cpu;
81 
82 	if (likely(!wakeup_task))
83 		return 0;
84 
85 	*pc = preempt_count();
86 	preempt_disable_notrace();
87 
88 	cpu = raw_smp_processor_id();
89 	if (cpu != wakeup_current_cpu)
90 		goto out_enable;
91 
92 	*data = tr->data[cpu];
93 	disabled = atomic_inc_return(&(*data)->disabled);
94 	if (unlikely(disabled != 1))
95 		goto out;
96 
97 	return 1;
98 
99 out:
100 	atomic_dec(&(*data)->disabled);
101 
102 out_enable:
103 	preempt_enable_notrace();
104 	return 0;
105 }
106 
107 /*
108  * wakeup uses its own tracer function to keep the overhead down:
109  */
110 static void
111 wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
112 {
113 	struct trace_array *tr = wakeup_trace;
114 	struct trace_array_cpu *data;
115 	unsigned long flags;
116 	int pc;
117 
118 	if (!func_prolog_preempt_disable(tr, &data, &pc))
119 		return;
120 
121 	local_irq_save(flags);
122 	trace_function(tr, ip, parent_ip, flags, pc);
123 	local_irq_restore(flags);
124 
125 	atomic_dec(&data->disabled);
126 	preempt_enable_notrace();
127 }
128 
129 static struct ftrace_ops trace_ops __read_mostly =
130 {
131 	.func = wakeup_tracer_call,
132 	.flags = FTRACE_OPS_FL_GLOBAL,
133 };
134 #endif /* CONFIG_FUNCTION_TRACER */
135 
136 static int start_func_tracer(int graph)
137 {
138 	int ret;
139 
140 	if (!graph)
141 		ret = register_ftrace_function(&trace_ops);
142 	else
143 		ret = register_ftrace_graph(&wakeup_graph_return,
144 					    &wakeup_graph_entry);
145 
146 	if (!ret && tracing_is_enabled())
147 		tracer_enabled = 1;
148 	else
149 		tracer_enabled = 0;
150 
151 	return ret;
152 }
153 
154 static void stop_func_tracer(int graph)
155 {
156 	tracer_enabled = 0;
157 
158 	if (!graph)
159 		unregister_ftrace_function(&trace_ops);
160 	else
161 		unregister_ftrace_graph();
162 }
163 
164 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
165 static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
166 {
167 
168 	if (!(bit & TRACE_DISPLAY_GRAPH))
169 		return -EINVAL;
170 
171 	if (!(is_graph() ^ set))
172 		return 0;
173 
174 	stop_func_tracer(!set);
175 
176 	wakeup_reset(wakeup_trace);
177 	tracing_max_latency = 0;
178 
179 	return start_func_tracer(set);
180 }
181 
182 static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
183 {
184 	struct trace_array *tr = wakeup_trace;
185 	struct trace_array_cpu *data;
186 	unsigned long flags;
187 	int pc, ret = 0;
188 
189 	if (!func_prolog_preempt_disable(tr, &data, &pc))
190 		return 0;
191 
192 	local_save_flags(flags);
193 	ret = __trace_graph_entry(tr, trace, flags, pc);
194 	atomic_dec(&data->disabled);
195 	preempt_enable_notrace();
196 
197 	return ret;
198 }
199 
200 static void wakeup_graph_return(struct ftrace_graph_ret *trace)
201 {
202 	struct trace_array *tr = wakeup_trace;
203 	struct trace_array_cpu *data;
204 	unsigned long flags;
205 	int pc;
206 
207 	if (!func_prolog_preempt_disable(tr, &data, &pc))
208 		return;
209 
210 	local_save_flags(flags);
211 	__trace_graph_return(tr, trace, flags, pc);
212 	atomic_dec(&data->disabled);
213 
214 	preempt_enable_notrace();
215 	return;
216 }
217 
218 static void wakeup_trace_open(struct trace_iterator *iter)
219 {
220 	if (is_graph())
221 		graph_trace_open(iter);
222 }
223 
224 static void wakeup_trace_close(struct trace_iterator *iter)
225 {
226 	if (iter->private)
227 		graph_trace_close(iter);
228 }
229 
230 #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \
231 			    TRACE_GRAPH_PRINT_ABS_TIME | \
232 			    TRACE_GRAPH_PRINT_DURATION)
233 
234 static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
235 {
236 	/*
237 	 * In graph mode call the graph tracer output function,
238 	 * otherwise go with the TRACE_FN event handler
239 	 */
240 	if (is_graph())
241 		return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
242 
243 	return TRACE_TYPE_UNHANDLED;
244 }
245 
246 static void wakeup_print_header(struct seq_file *s)
247 {
248 	if (is_graph())
249 		print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
250 	else
251 		trace_default_header(s);
252 }
253 
254 static void
255 __trace_function(struct trace_array *tr,
256 		 unsigned long ip, unsigned long parent_ip,
257 		 unsigned long flags, int pc)
258 {
259 	if (is_graph())
260 		trace_graph_function(tr, ip, parent_ip, flags, pc);
261 	else
262 		trace_function(tr, ip, parent_ip, flags, pc);
263 }
264 #else
265 #define __trace_function trace_function
266 
267 static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
268 {
269 	return -EINVAL;
270 }
271 
272 static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
273 {
274 	return -1;
275 }
276 
277 static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
278 {
279 	return TRACE_TYPE_UNHANDLED;
280 }
281 
282 static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
283 static void wakeup_trace_open(struct trace_iterator *iter) { }
284 static void wakeup_trace_close(struct trace_iterator *iter) { }
285 
286 #ifdef CONFIG_FUNCTION_TRACER
287 static void wakeup_print_header(struct seq_file *s)
288 {
289 	trace_default_header(s);
290 }
291 #else
292 static void wakeup_print_header(struct seq_file *s)
293 {
294 	trace_latency_header(s);
295 }
296 #endif /* CONFIG_FUNCTION_TRACER */
297 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
298 
299 /*
300  * Should this new latency be reported/recorded?
301  */
302 static int report_latency(cycle_t delta)
303 {
304 	if (tracing_thresh) {
305 		if (delta < tracing_thresh)
306 			return 0;
307 	} else {
308 		if (delta <= tracing_max_latency)
309 			return 0;
310 	}
311 	return 1;
312 }
313 
314 static void
315 probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu)
316 {
317 	if (task != wakeup_task)
318 		return;
319 
320 	wakeup_current_cpu = cpu;
321 }
322 
323 static void notrace
324 probe_wakeup_sched_switch(void *ignore,
325 			  struct task_struct *prev, struct task_struct *next)
326 {
327 	struct trace_array_cpu *data;
328 	cycle_t T0, T1, delta;
329 	unsigned long flags;
330 	long disabled;
331 	int cpu;
332 	int pc;
333 
334 	tracing_record_cmdline(prev);
335 
336 	if (unlikely(!tracer_enabled))
337 		return;
338 
339 	/*
340 	 * When we start a new trace, we set wakeup_task to NULL
341 	 * and then set tracer_enabled = 1. We want to make sure
342 	 * that another CPU does not see the tracer_enabled = 1
343 	 * and the wakeup_task with an older task, that might
344 	 * actually be the same as next.
345 	 */
346 	smp_rmb();
347 
348 	if (next != wakeup_task)
349 		return;
350 
351 	pc = preempt_count();
352 
353 	/* disable local data, not wakeup_cpu data */
354 	cpu = raw_smp_processor_id();
355 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
356 	if (likely(disabled != 1))
357 		goto out;
358 
359 	local_irq_save(flags);
360 	arch_spin_lock(&wakeup_lock);
361 
362 	/* We could race with grabbing wakeup_lock */
363 	if (unlikely(!tracer_enabled || next != wakeup_task))
364 		goto out_unlock;
365 
366 	/* The task we are waiting for is waking up */
367 	data = wakeup_trace->data[wakeup_cpu];
368 
369 	__trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
370 	tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
371 
372 	T0 = data->preempt_timestamp;
373 	T1 = ftrace_now(cpu);
374 	delta = T1-T0;
375 
376 	if (!report_latency(delta))
377 		goto out_unlock;
378 
379 	if (likely(!is_tracing_stopped())) {
380 		tracing_max_latency = delta;
381 		update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
382 	}
383 
384 out_unlock:
385 	__wakeup_reset(wakeup_trace);
386 	arch_spin_unlock(&wakeup_lock);
387 	local_irq_restore(flags);
388 out:
389 	atomic_dec(&wakeup_trace->data[cpu]->disabled);
390 }
391 
392 static void __wakeup_reset(struct trace_array *tr)
393 {
394 	wakeup_cpu = -1;
395 	wakeup_prio = -1;
396 
397 	if (wakeup_task)
398 		put_task_struct(wakeup_task);
399 
400 	wakeup_task = NULL;
401 }
402 
403 static void wakeup_reset(struct trace_array *tr)
404 {
405 	unsigned long flags;
406 
407 	tracing_reset_online_cpus(tr);
408 
409 	local_irq_save(flags);
410 	arch_spin_lock(&wakeup_lock);
411 	__wakeup_reset(tr);
412 	arch_spin_unlock(&wakeup_lock);
413 	local_irq_restore(flags);
414 }
415 
416 static void
417 probe_wakeup(void *ignore, struct task_struct *p, int success)
418 {
419 	struct trace_array_cpu *data;
420 	int cpu = smp_processor_id();
421 	unsigned long flags;
422 	long disabled;
423 	int pc;
424 
425 	if (likely(!tracer_enabled))
426 		return;
427 
428 	tracing_record_cmdline(p);
429 	tracing_record_cmdline(current);
430 
431 	if ((wakeup_rt && !rt_task(p)) ||
432 			p->prio >= wakeup_prio ||
433 			p->prio >= current->prio)
434 		return;
435 
436 	pc = preempt_count();
437 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
438 	if (unlikely(disabled != 1))
439 		goto out;
440 
441 	/* interrupts should be off from try_to_wake_up */
442 	arch_spin_lock(&wakeup_lock);
443 
444 	/* check for races. */
445 	if (!tracer_enabled || p->prio >= wakeup_prio)
446 		goto out_locked;
447 
448 	/* reset the trace */
449 	__wakeup_reset(wakeup_trace);
450 
451 	wakeup_cpu = task_cpu(p);
452 	wakeup_current_cpu = wakeup_cpu;
453 	wakeup_prio = p->prio;
454 
455 	wakeup_task = p;
456 	get_task_struct(wakeup_task);
457 
458 	local_save_flags(flags);
459 
460 	data = wakeup_trace->data[wakeup_cpu];
461 	data->preempt_timestamp = ftrace_now(cpu);
462 	tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
463 
464 	/*
465 	 * We must be careful in using CALLER_ADDR2. But since wake_up
466 	 * is not called by an assembly function  (where as schedule is)
467 	 * it should be safe to use it here.
468 	 */
469 	__trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
470 
471 out_locked:
472 	arch_spin_unlock(&wakeup_lock);
473 out:
474 	atomic_dec(&wakeup_trace->data[cpu]->disabled);
475 }
476 
477 static void start_wakeup_tracer(struct trace_array *tr)
478 {
479 	int ret;
480 
481 	ret = register_trace_sched_wakeup(probe_wakeup, NULL);
482 	if (ret) {
483 		pr_info("wakeup trace: Couldn't activate tracepoint"
484 			" probe to kernel_sched_wakeup\n");
485 		return;
486 	}
487 
488 	ret = register_trace_sched_wakeup_new(probe_wakeup, NULL);
489 	if (ret) {
490 		pr_info("wakeup trace: Couldn't activate tracepoint"
491 			" probe to kernel_sched_wakeup_new\n");
492 		goto fail_deprobe;
493 	}
494 
495 	ret = register_trace_sched_switch(probe_wakeup_sched_switch, NULL);
496 	if (ret) {
497 		pr_info("sched trace: Couldn't activate tracepoint"
498 			" probe to kernel_sched_switch\n");
499 		goto fail_deprobe_wake_new;
500 	}
501 
502 	ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
503 	if (ret) {
504 		pr_info("wakeup trace: Couldn't activate tracepoint"
505 			" probe to kernel_sched_migrate_task\n");
506 		return;
507 	}
508 
509 	wakeup_reset(tr);
510 
511 	/*
512 	 * Don't let the tracer_enabled = 1 show up before
513 	 * the wakeup_task is reset. This may be overkill since
514 	 * wakeup_reset does a spin_unlock after setting the
515 	 * wakeup_task to NULL, but I want to be safe.
516 	 * This is a slow path anyway.
517 	 */
518 	smp_wmb();
519 
520 	if (start_func_tracer(is_graph()))
521 		printk(KERN_ERR "failed to start wakeup tracer\n");
522 
523 	return;
524 fail_deprobe_wake_new:
525 	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
526 fail_deprobe:
527 	unregister_trace_sched_wakeup(probe_wakeup, NULL);
528 }
529 
530 static void stop_wakeup_tracer(struct trace_array *tr)
531 {
532 	tracer_enabled = 0;
533 	stop_func_tracer(is_graph());
534 	unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
535 	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
536 	unregister_trace_sched_wakeup(probe_wakeup, NULL);
537 	unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
538 }
539 
540 static int __wakeup_tracer_init(struct trace_array *tr)
541 {
542 	save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
543 	trace_flags |= TRACE_ITER_LATENCY_FMT;
544 
545 	tracing_max_latency = 0;
546 	wakeup_trace = tr;
547 	start_wakeup_tracer(tr);
548 	return 0;
549 }
550 
551 static int wakeup_tracer_init(struct trace_array *tr)
552 {
553 	wakeup_rt = 0;
554 	return __wakeup_tracer_init(tr);
555 }
556 
557 static int wakeup_rt_tracer_init(struct trace_array *tr)
558 {
559 	wakeup_rt = 1;
560 	return __wakeup_tracer_init(tr);
561 }
562 
563 static void wakeup_tracer_reset(struct trace_array *tr)
564 {
565 	stop_wakeup_tracer(tr);
566 	/* make sure we put back any tasks we are tracing */
567 	wakeup_reset(tr);
568 
569 	if (!save_lat_flag)
570 		trace_flags &= ~TRACE_ITER_LATENCY_FMT;
571 }
572 
573 static void wakeup_tracer_start(struct trace_array *tr)
574 {
575 	wakeup_reset(tr);
576 	tracer_enabled = 1;
577 }
578 
579 static void wakeup_tracer_stop(struct trace_array *tr)
580 {
581 	tracer_enabled = 0;
582 }
583 
584 static struct tracer wakeup_tracer __read_mostly =
585 {
586 	.name		= "wakeup",
587 	.init		= wakeup_tracer_init,
588 	.reset		= wakeup_tracer_reset,
589 	.start		= wakeup_tracer_start,
590 	.stop		= wakeup_tracer_stop,
591 	.print_max	= 1,
592 	.print_header	= wakeup_print_header,
593 	.print_line	= wakeup_print_line,
594 	.flags		= &tracer_flags,
595 	.set_flag	= wakeup_set_flag,
596 #ifdef CONFIG_FTRACE_SELFTEST
597 	.selftest    = trace_selftest_startup_wakeup,
598 #endif
599 	.open		= wakeup_trace_open,
600 	.close		= wakeup_trace_close,
601 	.use_max_tr	= 1,
602 };
603 
604 static struct tracer wakeup_rt_tracer __read_mostly =
605 {
606 	.name		= "wakeup_rt",
607 	.init		= wakeup_rt_tracer_init,
608 	.reset		= wakeup_tracer_reset,
609 	.start		= wakeup_tracer_start,
610 	.stop		= wakeup_tracer_stop,
611 	.wait_pipe	= poll_wait_pipe,
612 	.print_max	= 1,
613 	.print_header	= wakeup_print_header,
614 	.print_line	= wakeup_print_line,
615 	.flags		= &tracer_flags,
616 	.set_flag	= wakeup_set_flag,
617 #ifdef CONFIG_FTRACE_SELFTEST
618 	.selftest    = trace_selftest_startup_wakeup,
619 #endif
620 	.open		= wakeup_trace_open,
621 	.close		= wakeup_trace_close,
622 	.use_max_tr	= 1,
623 };
624 
625 __init static int init_wakeup_tracer(void)
626 {
627 	int ret;
628 
629 	ret = register_tracer(&wakeup_tracer);
630 	if (ret)
631 		return ret;
632 
633 	ret = register_tracer(&wakeup_rt_tracer);
634 	if (ret)
635 		return ret;
636 
637 	return 0;
638 }
639 device_initcall(init_wakeup_tracer);
640