1 /*
2  * trace task wakeup timings
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Based on code from the latency_tracer, that is:
8  *
9  *  Copyright (C) 2004-2006 Ingo Molnar
10  *  Copyright (C) 2004 Nadia Yvette Chambers
11  */
12 #include <linux/module.h>
13 #include <linux/fs.h>
14 #include <linux/debugfs.h>
15 #include <linux/kallsyms.h>
16 #include <linux/uaccess.h>
17 #include <linux/ftrace.h>
18 #include <trace/events/sched.h>
19 
20 #include "trace.h"
21 
22 static struct trace_array	*wakeup_trace;
23 static int __read_mostly	tracer_enabled;
24 
25 static struct task_struct	*wakeup_task;
26 static int			wakeup_cpu;
27 static int			wakeup_current_cpu;
28 static unsigned			wakeup_prio = -1;
29 static int			wakeup_rt;
30 
31 static arch_spinlock_t wakeup_lock =
32 	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 
34 static void wakeup_reset(struct trace_array *tr);
35 static void __wakeup_reset(struct trace_array *tr);
36 static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
37 static void wakeup_graph_return(struct ftrace_graph_ret *trace);
38 
39 static int save_lat_flag;
40 
41 #define TRACE_DISPLAY_GRAPH     1
42 
43 static struct tracer_opt trace_opts[] = {
44 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
45 	/* display latency trace as call graph */
46 	{ TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
47 #endif
48 	{ } /* Empty entry */
49 };
50 
51 static struct tracer_flags tracer_flags = {
52 	.val  = 0,
53 	.opts = trace_opts,
54 };
55 
56 #define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
57 
58 #ifdef CONFIG_FUNCTION_TRACER
59 
60 /*
61  * Prologue for the wakeup function tracers.
62  *
63  * Returns 1 if it is OK to continue, and preemption
64  *            is disabled and data->disabled is incremented.
65  *         0 if the trace is to be ignored, and preemption
66  *            is not disabled and data->disabled is
67  *            kept the same.
68  *
69  * Note, this function is also used outside this ifdef but
70  *  inside the #ifdef of the function graph tracer below.
71  *  This is OK, since the function graph tracer is
72  *  dependent on the function tracer.
73  */
74 static int
75 func_prolog_preempt_disable(struct trace_array *tr,
76 			    struct trace_array_cpu **data,
77 			    int *pc)
78 {
79 	long disabled;
80 	int cpu;
81 
82 	if (likely(!wakeup_task))
83 		return 0;
84 
85 	*pc = preempt_count();
86 	preempt_disable_notrace();
87 
88 	cpu = raw_smp_processor_id();
89 	if (cpu != wakeup_current_cpu)
90 		goto out_enable;
91 
92 	*data = tr->data[cpu];
93 	disabled = atomic_inc_return(&(*data)->disabled);
94 	if (unlikely(disabled != 1))
95 		goto out;
96 
97 	return 1;
98 
99 out:
100 	atomic_dec(&(*data)->disabled);
101 
102 out_enable:
103 	preempt_enable_notrace();
104 	return 0;
105 }
106 
107 /*
108  * wakeup uses its own tracer function to keep the overhead down:
109  */
110 static void
111 wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
112 		   struct ftrace_ops *op, struct pt_regs *pt_regs)
113 {
114 	struct trace_array *tr = wakeup_trace;
115 	struct trace_array_cpu *data;
116 	unsigned long flags;
117 	int pc;
118 
119 	if (!func_prolog_preempt_disable(tr, &data, &pc))
120 		return;
121 
122 	local_irq_save(flags);
123 	trace_function(tr, ip, parent_ip, flags, pc);
124 	local_irq_restore(flags);
125 
126 	atomic_dec(&data->disabled);
127 	preempt_enable_notrace();
128 }
129 
130 static struct ftrace_ops trace_ops __read_mostly =
131 {
132 	.func = wakeup_tracer_call,
133 	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
134 };
135 #endif /* CONFIG_FUNCTION_TRACER */
136 
137 static int start_func_tracer(int graph)
138 {
139 	int ret;
140 
141 	if (!graph)
142 		ret = register_ftrace_function(&trace_ops);
143 	else
144 		ret = register_ftrace_graph(&wakeup_graph_return,
145 					    &wakeup_graph_entry);
146 
147 	if (!ret && tracing_is_enabled())
148 		tracer_enabled = 1;
149 	else
150 		tracer_enabled = 0;
151 
152 	return ret;
153 }
154 
155 static void stop_func_tracer(int graph)
156 {
157 	tracer_enabled = 0;
158 
159 	if (!graph)
160 		unregister_ftrace_function(&trace_ops);
161 	else
162 		unregister_ftrace_graph();
163 }
164 
165 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
166 static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
167 {
168 
169 	if (!(bit & TRACE_DISPLAY_GRAPH))
170 		return -EINVAL;
171 
172 	if (!(is_graph() ^ set))
173 		return 0;
174 
175 	stop_func_tracer(!set);
176 
177 	wakeup_reset(wakeup_trace);
178 	tracing_max_latency = 0;
179 
180 	return start_func_tracer(set);
181 }
182 
183 static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
184 {
185 	struct trace_array *tr = wakeup_trace;
186 	struct trace_array_cpu *data;
187 	unsigned long flags;
188 	int pc, ret = 0;
189 
190 	if (!func_prolog_preempt_disable(tr, &data, &pc))
191 		return 0;
192 
193 	local_save_flags(flags);
194 	ret = __trace_graph_entry(tr, trace, flags, pc);
195 	atomic_dec(&data->disabled);
196 	preempt_enable_notrace();
197 
198 	return ret;
199 }
200 
201 static void wakeup_graph_return(struct ftrace_graph_ret *trace)
202 {
203 	struct trace_array *tr = wakeup_trace;
204 	struct trace_array_cpu *data;
205 	unsigned long flags;
206 	int pc;
207 
208 	if (!func_prolog_preempt_disable(tr, &data, &pc))
209 		return;
210 
211 	local_save_flags(flags);
212 	__trace_graph_return(tr, trace, flags, pc);
213 	atomic_dec(&data->disabled);
214 
215 	preempt_enable_notrace();
216 	return;
217 }
218 
219 static void wakeup_trace_open(struct trace_iterator *iter)
220 {
221 	if (is_graph())
222 		graph_trace_open(iter);
223 }
224 
225 static void wakeup_trace_close(struct trace_iterator *iter)
226 {
227 	if (iter->private)
228 		graph_trace_close(iter);
229 }
230 
231 #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \
232 			    TRACE_GRAPH_PRINT_ABS_TIME | \
233 			    TRACE_GRAPH_PRINT_DURATION)
234 
235 static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
236 {
237 	/*
238 	 * In graph mode call the graph tracer output function,
239 	 * otherwise go with the TRACE_FN event handler
240 	 */
241 	if (is_graph())
242 		return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
243 
244 	return TRACE_TYPE_UNHANDLED;
245 }
246 
247 static void wakeup_print_header(struct seq_file *s)
248 {
249 	if (is_graph())
250 		print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
251 	else
252 		trace_default_header(s);
253 }
254 
255 static void
256 __trace_function(struct trace_array *tr,
257 		 unsigned long ip, unsigned long parent_ip,
258 		 unsigned long flags, int pc)
259 {
260 	if (is_graph())
261 		trace_graph_function(tr, ip, parent_ip, flags, pc);
262 	else
263 		trace_function(tr, ip, parent_ip, flags, pc);
264 }
265 #else
266 #define __trace_function trace_function
267 
268 static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
269 {
270 	return -EINVAL;
271 }
272 
273 static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
274 {
275 	return -1;
276 }
277 
278 static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
279 {
280 	return TRACE_TYPE_UNHANDLED;
281 }
282 
283 static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
284 static void wakeup_trace_open(struct trace_iterator *iter) { }
285 static void wakeup_trace_close(struct trace_iterator *iter) { }
286 
287 #ifdef CONFIG_FUNCTION_TRACER
288 static void wakeup_print_header(struct seq_file *s)
289 {
290 	trace_default_header(s);
291 }
292 #else
293 static void wakeup_print_header(struct seq_file *s)
294 {
295 	trace_latency_header(s);
296 }
297 #endif /* CONFIG_FUNCTION_TRACER */
298 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
299 
300 /*
301  * Should this new latency be reported/recorded?
302  */
303 static int report_latency(cycle_t delta)
304 {
305 	if (tracing_thresh) {
306 		if (delta < tracing_thresh)
307 			return 0;
308 	} else {
309 		if (delta <= tracing_max_latency)
310 			return 0;
311 	}
312 	return 1;
313 }
314 
315 static void
316 probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu)
317 {
318 	if (task != wakeup_task)
319 		return;
320 
321 	wakeup_current_cpu = cpu;
322 }
323 
324 static void notrace
325 probe_wakeup_sched_switch(void *ignore,
326 			  struct task_struct *prev, struct task_struct *next)
327 {
328 	struct trace_array_cpu *data;
329 	cycle_t T0, T1, delta;
330 	unsigned long flags;
331 	long disabled;
332 	int cpu;
333 	int pc;
334 
335 	tracing_record_cmdline(prev);
336 
337 	if (unlikely(!tracer_enabled))
338 		return;
339 
340 	/*
341 	 * When we start a new trace, we set wakeup_task to NULL
342 	 * and then set tracer_enabled = 1. We want to make sure
343 	 * that another CPU does not see the tracer_enabled = 1
344 	 * and the wakeup_task with an older task, that might
345 	 * actually be the same as next.
346 	 */
347 	smp_rmb();
348 
349 	if (next != wakeup_task)
350 		return;
351 
352 	pc = preempt_count();
353 
354 	/* disable local data, not wakeup_cpu data */
355 	cpu = raw_smp_processor_id();
356 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
357 	if (likely(disabled != 1))
358 		goto out;
359 
360 	local_irq_save(flags);
361 	arch_spin_lock(&wakeup_lock);
362 
363 	/* We could race with grabbing wakeup_lock */
364 	if (unlikely(!tracer_enabled || next != wakeup_task))
365 		goto out_unlock;
366 
367 	/* The task we are waiting for is waking up */
368 	data = wakeup_trace->data[wakeup_cpu];
369 
370 	__trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
371 	tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
372 
373 	T0 = data->preempt_timestamp;
374 	T1 = ftrace_now(cpu);
375 	delta = T1-T0;
376 
377 	if (!report_latency(delta))
378 		goto out_unlock;
379 
380 	if (likely(!is_tracing_stopped())) {
381 		tracing_max_latency = delta;
382 		update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
383 	}
384 
385 out_unlock:
386 	__wakeup_reset(wakeup_trace);
387 	arch_spin_unlock(&wakeup_lock);
388 	local_irq_restore(flags);
389 out:
390 	atomic_dec(&wakeup_trace->data[cpu]->disabled);
391 }
392 
393 static void __wakeup_reset(struct trace_array *tr)
394 {
395 	wakeup_cpu = -1;
396 	wakeup_prio = -1;
397 
398 	if (wakeup_task)
399 		put_task_struct(wakeup_task);
400 
401 	wakeup_task = NULL;
402 }
403 
404 static void wakeup_reset(struct trace_array *tr)
405 {
406 	unsigned long flags;
407 
408 	tracing_reset_online_cpus(tr);
409 
410 	local_irq_save(flags);
411 	arch_spin_lock(&wakeup_lock);
412 	__wakeup_reset(tr);
413 	arch_spin_unlock(&wakeup_lock);
414 	local_irq_restore(flags);
415 }
416 
417 static void
418 probe_wakeup(void *ignore, struct task_struct *p, int success)
419 {
420 	struct trace_array_cpu *data;
421 	int cpu = smp_processor_id();
422 	unsigned long flags;
423 	long disabled;
424 	int pc;
425 
426 	if (likely(!tracer_enabled))
427 		return;
428 
429 	tracing_record_cmdline(p);
430 	tracing_record_cmdline(current);
431 
432 	if ((wakeup_rt && !rt_task(p)) ||
433 			p->prio >= wakeup_prio ||
434 			p->prio >= current->prio)
435 		return;
436 
437 	pc = preempt_count();
438 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
439 	if (unlikely(disabled != 1))
440 		goto out;
441 
442 	/* interrupts should be off from try_to_wake_up */
443 	arch_spin_lock(&wakeup_lock);
444 
445 	/* check for races. */
446 	if (!tracer_enabled || p->prio >= wakeup_prio)
447 		goto out_locked;
448 
449 	/* reset the trace */
450 	__wakeup_reset(wakeup_trace);
451 
452 	wakeup_cpu = task_cpu(p);
453 	wakeup_current_cpu = wakeup_cpu;
454 	wakeup_prio = p->prio;
455 
456 	wakeup_task = p;
457 	get_task_struct(wakeup_task);
458 
459 	local_save_flags(flags);
460 
461 	data = wakeup_trace->data[wakeup_cpu];
462 	data->preempt_timestamp = ftrace_now(cpu);
463 	tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
464 
465 	/*
466 	 * We must be careful in using CALLER_ADDR2. But since wake_up
467 	 * is not called by an assembly function  (where as schedule is)
468 	 * it should be safe to use it here.
469 	 */
470 	__trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
471 
472 out_locked:
473 	arch_spin_unlock(&wakeup_lock);
474 out:
475 	atomic_dec(&wakeup_trace->data[cpu]->disabled);
476 }
477 
478 static void start_wakeup_tracer(struct trace_array *tr)
479 {
480 	int ret;
481 
482 	ret = register_trace_sched_wakeup(probe_wakeup, NULL);
483 	if (ret) {
484 		pr_info("wakeup trace: Couldn't activate tracepoint"
485 			" probe to kernel_sched_wakeup\n");
486 		return;
487 	}
488 
489 	ret = register_trace_sched_wakeup_new(probe_wakeup, NULL);
490 	if (ret) {
491 		pr_info("wakeup trace: Couldn't activate tracepoint"
492 			" probe to kernel_sched_wakeup_new\n");
493 		goto fail_deprobe;
494 	}
495 
496 	ret = register_trace_sched_switch(probe_wakeup_sched_switch, NULL);
497 	if (ret) {
498 		pr_info("sched trace: Couldn't activate tracepoint"
499 			" probe to kernel_sched_switch\n");
500 		goto fail_deprobe_wake_new;
501 	}
502 
503 	ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
504 	if (ret) {
505 		pr_info("wakeup trace: Couldn't activate tracepoint"
506 			" probe to kernel_sched_migrate_task\n");
507 		return;
508 	}
509 
510 	wakeup_reset(tr);
511 
512 	/*
513 	 * Don't let the tracer_enabled = 1 show up before
514 	 * the wakeup_task is reset. This may be overkill since
515 	 * wakeup_reset does a spin_unlock after setting the
516 	 * wakeup_task to NULL, but I want to be safe.
517 	 * This is a slow path anyway.
518 	 */
519 	smp_wmb();
520 
521 	if (start_func_tracer(is_graph()))
522 		printk(KERN_ERR "failed to start wakeup tracer\n");
523 
524 	return;
525 fail_deprobe_wake_new:
526 	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
527 fail_deprobe:
528 	unregister_trace_sched_wakeup(probe_wakeup, NULL);
529 }
530 
531 static void stop_wakeup_tracer(struct trace_array *tr)
532 {
533 	tracer_enabled = 0;
534 	stop_func_tracer(is_graph());
535 	unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
536 	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
537 	unregister_trace_sched_wakeup(probe_wakeup, NULL);
538 	unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
539 }
540 
541 static int __wakeup_tracer_init(struct trace_array *tr)
542 {
543 	save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
544 	trace_flags |= TRACE_ITER_LATENCY_FMT;
545 
546 	tracing_max_latency = 0;
547 	wakeup_trace = tr;
548 	start_wakeup_tracer(tr);
549 	return 0;
550 }
551 
552 static int wakeup_tracer_init(struct trace_array *tr)
553 {
554 	wakeup_rt = 0;
555 	return __wakeup_tracer_init(tr);
556 }
557 
558 static int wakeup_rt_tracer_init(struct trace_array *tr)
559 {
560 	wakeup_rt = 1;
561 	return __wakeup_tracer_init(tr);
562 }
563 
564 static void wakeup_tracer_reset(struct trace_array *tr)
565 {
566 	stop_wakeup_tracer(tr);
567 	/* make sure we put back any tasks we are tracing */
568 	wakeup_reset(tr);
569 
570 	if (!save_lat_flag)
571 		trace_flags &= ~TRACE_ITER_LATENCY_FMT;
572 }
573 
574 static void wakeup_tracer_start(struct trace_array *tr)
575 {
576 	wakeup_reset(tr);
577 	tracer_enabled = 1;
578 }
579 
580 static void wakeup_tracer_stop(struct trace_array *tr)
581 {
582 	tracer_enabled = 0;
583 }
584 
585 static struct tracer wakeup_tracer __read_mostly =
586 {
587 	.name		= "wakeup",
588 	.init		= wakeup_tracer_init,
589 	.reset		= wakeup_tracer_reset,
590 	.start		= wakeup_tracer_start,
591 	.stop		= wakeup_tracer_stop,
592 	.print_max	= true,
593 	.print_header	= wakeup_print_header,
594 	.print_line	= wakeup_print_line,
595 	.flags		= &tracer_flags,
596 	.set_flag	= wakeup_set_flag,
597 #ifdef CONFIG_FTRACE_SELFTEST
598 	.selftest    = trace_selftest_startup_wakeup,
599 #endif
600 	.open		= wakeup_trace_open,
601 	.close		= wakeup_trace_close,
602 	.use_max_tr	= true,
603 };
604 
605 static struct tracer wakeup_rt_tracer __read_mostly =
606 {
607 	.name		= "wakeup_rt",
608 	.init		= wakeup_rt_tracer_init,
609 	.reset		= wakeup_tracer_reset,
610 	.start		= wakeup_tracer_start,
611 	.stop		= wakeup_tracer_stop,
612 	.wait_pipe	= poll_wait_pipe,
613 	.print_max	= true,
614 	.print_header	= wakeup_print_header,
615 	.print_line	= wakeup_print_line,
616 	.flags		= &tracer_flags,
617 	.set_flag	= wakeup_set_flag,
618 #ifdef CONFIG_FTRACE_SELFTEST
619 	.selftest    = trace_selftest_startup_wakeup,
620 #endif
621 	.open		= wakeup_trace_open,
622 	.close		= wakeup_trace_close,
623 	.use_max_tr	= true,
624 };
625 
626 __init static int init_wakeup_tracer(void)
627 {
628 	int ret;
629 
630 	ret = register_tracer(&wakeup_tracer);
631 	if (ret)
632 		return ret;
633 
634 	ret = register_tracer(&wakeup_rt_tracer);
635 	if (ret)
636 		return ret;
637 
638 	return 0;
639 }
640 core_initcall(init_wakeup_tracer);
641