1 /*
2  * trace task wakeup timings
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Based on code from the latency_tracer, that is:
8  *
9  *  Copyright (C) 2004-2006 Ingo Molnar
10  *  Copyright (C) 2004 William Lee Irwin III
11  */
12 #include <linux/module.h>
13 #include <linux/fs.h>
14 #include <linux/debugfs.h>
15 #include <linux/kallsyms.h>
16 #include <linux/uaccess.h>
17 #include <linux/ftrace.h>
18 #include <trace/events/sched.h>
19 
20 #include "trace.h"
21 
22 static struct trace_array	*wakeup_trace;
23 static int __read_mostly	tracer_enabled;
24 
25 static struct task_struct	*wakeup_task;
26 static int			wakeup_cpu;
27 static int			wakeup_current_cpu;
28 static unsigned			wakeup_prio = -1;
29 static int			wakeup_rt;
30 
31 static arch_spinlock_t wakeup_lock =
32 	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 
34 static void wakeup_reset(struct trace_array *tr);
35 static void __wakeup_reset(struct trace_array *tr);
36 static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
37 static void wakeup_graph_return(struct ftrace_graph_ret *trace);
38 
39 static int save_lat_flag;
40 
41 #define TRACE_DISPLAY_GRAPH     1
42 
43 static struct tracer_opt trace_opts[] = {
44 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
45 	/* display latency trace as call graph */
46 	{ TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
47 #endif
48 	{ } /* Empty entry */
49 };
50 
51 static struct tracer_flags tracer_flags = {
52 	.val  = 0,
53 	.opts = trace_opts,
54 };
55 
56 #define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
57 
58 #ifdef CONFIG_FUNCTION_TRACER
59 
60 /*
61  * Prologue for the wakeup function tracers.
62  *
63  * Returns 1 if it is OK to continue, and preemption
64  *            is disabled and data->disabled is incremented.
65  *         0 if the trace is to be ignored, and preemption
66  *            is not disabled and data->disabled is
67  *            kept the same.
68  *
69  * Note, this function is also used outside this ifdef but
70  *  inside the #ifdef of the function graph tracer below.
71  *  This is OK, since the function graph tracer is
72  *  dependent on the function tracer.
73  */
74 static int
75 func_prolog_preempt_disable(struct trace_array *tr,
76 			    struct trace_array_cpu **data,
77 			    int *pc)
78 {
79 	long disabled;
80 	int cpu;
81 
82 	if (likely(!wakeup_task))
83 		return 0;
84 
85 	*pc = preempt_count();
86 	preempt_disable_notrace();
87 
88 	cpu = raw_smp_processor_id();
89 	if (cpu != wakeup_current_cpu)
90 		goto out_enable;
91 
92 	*data = tr->data[cpu];
93 	disabled = atomic_inc_return(&(*data)->disabled);
94 	if (unlikely(disabled != 1))
95 		goto out;
96 
97 	return 1;
98 
99 out:
100 	atomic_dec(&(*data)->disabled);
101 
102 out_enable:
103 	preempt_enable_notrace();
104 	return 0;
105 }
106 
107 /*
108  * wakeup uses its own tracer function to keep the overhead down:
109  */
110 static void
111 wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
112 {
113 	struct trace_array *tr = wakeup_trace;
114 	struct trace_array_cpu *data;
115 	unsigned long flags;
116 	int pc;
117 
118 	if (!func_prolog_preempt_disable(tr, &data, &pc))
119 		return;
120 
121 	local_irq_save(flags);
122 	trace_function(tr, ip, parent_ip, flags, pc);
123 	local_irq_restore(flags);
124 
125 	atomic_dec(&data->disabled);
126 	preempt_enable_notrace();
127 }
128 
129 static struct ftrace_ops trace_ops __read_mostly =
130 {
131 	.func = wakeup_tracer_call,
132 	.flags = FTRACE_OPS_FL_GLOBAL,
133 };
134 #endif /* CONFIG_FUNCTION_TRACER */
135 
136 static int start_func_tracer(int graph)
137 {
138 	int ret;
139 
140 	if (!graph)
141 		ret = register_ftrace_function(&trace_ops);
142 	else
143 		ret = register_ftrace_graph(&wakeup_graph_return,
144 					    &wakeup_graph_entry);
145 
146 	if (!ret && tracing_is_enabled())
147 		tracer_enabled = 1;
148 	else
149 		tracer_enabled = 0;
150 
151 	return ret;
152 }
153 
154 static void stop_func_tracer(int graph)
155 {
156 	tracer_enabled = 0;
157 
158 	if (!graph)
159 		unregister_ftrace_function(&trace_ops);
160 	else
161 		unregister_ftrace_graph();
162 }
163 
164 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
165 static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
166 {
167 
168 	if (!(bit & TRACE_DISPLAY_GRAPH))
169 		return -EINVAL;
170 
171 	if (!(is_graph() ^ set))
172 		return 0;
173 
174 	stop_func_tracer(!set);
175 
176 	wakeup_reset(wakeup_trace);
177 	tracing_max_latency = 0;
178 
179 	return start_func_tracer(set);
180 }
181 
182 static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
183 {
184 	struct trace_array *tr = wakeup_trace;
185 	struct trace_array_cpu *data;
186 	unsigned long flags;
187 	int pc, ret = 0;
188 
189 	if (!func_prolog_preempt_disable(tr, &data, &pc))
190 		return 0;
191 
192 	local_save_flags(flags);
193 	ret = __trace_graph_entry(tr, trace, flags, pc);
194 	atomic_dec(&data->disabled);
195 	preempt_enable_notrace();
196 
197 	return ret;
198 }
199 
200 static void wakeup_graph_return(struct ftrace_graph_ret *trace)
201 {
202 	struct trace_array *tr = wakeup_trace;
203 	struct trace_array_cpu *data;
204 	unsigned long flags;
205 	int pc;
206 
207 	if (!func_prolog_preempt_disable(tr, &data, &pc))
208 		return;
209 
210 	local_save_flags(flags);
211 	__trace_graph_return(tr, trace, flags, pc);
212 	atomic_dec(&data->disabled);
213 
214 	preempt_enable_notrace();
215 	return;
216 }
217 
218 static void wakeup_trace_open(struct trace_iterator *iter)
219 {
220 	if (is_graph())
221 		graph_trace_open(iter);
222 }
223 
224 static void wakeup_trace_close(struct trace_iterator *iter)
225 {
226 	if (iter->private)
227 		graph_trace_close(iter);
228 }
229 
230 #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \
231 			    TRACE_GRAPH_PRINT_ABS_TIME | \
232 			    TRACE_GRAPH_PRINT_DURATION)
233 
234 static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
235 {
236 	/*
237 	 * In graph mode call the graph tracer output function,
238 	 * otherwise go with the TRACE_FN event handler
239 	 */
240 	if (is_graph())
241 		return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
242 
243 	return TRACE_TYPE_UNHANDLED;
244 }
245 
246 static void wakeup_print_header(struct seq_file *s)
247 {
248 	if (is_graph())
249 		print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
250 	else
251 		trace_default_header(s);
252 }
253 
254 static void
255 __trace_function(struct trace_array *tr,
256 		 unsigned long ip, unsigned long parent_ip,
257 		 unsigned long flags, int pc)
258 {
259 	if (is_graph())
260 		trace_graph_function(tr, ip, parent_ip, flags, pc);
261 	else
262 		trace_function(tr, ip, parent_ip, flags, pc);
263 }
264 #else
265 #define __trace_function trace_function
266 
267 static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
268 {
269 	return -EINVAL;
270 }
271 
272 static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
273 {
274 	return -1;
275 }
276 
277 static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
278 {
279 	return TRACE_TYPE_UNHANDLED;
280 }
281 
282 static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
283 static void wakeup_print_header(struct seq_file *s) { }
284 static void wakeup_trace_open(struct trace_iterator *iter) { }
285 static void wakeup_trace_close(struct trace_iterator *iter) { }
286 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
287 
288 /*
289  * Should this new latency be reported/recorded?
290  */
291 static int report_latency(cycle_t delta)
292 {
293 	if (tracing_thresh) {
294 		if (delta < tracing_thresh)
295 			return 0;
296 	} else {
297 		if (delta <= tracing_max_latency)
298 			return 0;
299 	}
300 	return 1;
301 }
302 
303 static void
304 probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu)
305 {
306 	if (task != wakeup_task)
307 		return;
308 
309 	wakeup_current_cpu = cpu;
310 }
311 
312 static void notrace
313 probe_wakeup_sched_switch(void *ignore,
314 			  struct task_struct *prev, struct task_struct *next)
315 {
316 	struct trace_array_cpu *data;
317 	cycle_t T0, T1, delta;
318 	unsigned long flags;
319 	long disabled;
320 	int cpu;
321 	int pc;
322 
323 	tracing_record_cmdline(prev);
324 
325 	if (unlikely(!tracer_enabled))
326 		return;
327 
328 	/*
329 	 * When we start a new trace, we set wakeup_task to NULL
330 	 * and then set tracer_enabled = 1. We want to make sure
331 	 * that another CPU does not see the tracer_enabled = 1
332 	 * and the wakeup_task with an older task, that might
333 	 * actually be the same as next.
334 	 */
335 	smp_rmb();
336 
337 	if (next != wakeup_task)
338 		return;
339 
340 	pc = preempt_count();
341 
342 	/* disable local data, not wakeup_cpu data */
343 	cpu = raw_smp_processor_id();
344 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
345 	if (likely(disabled != 1))
346 		goto out;
347 
348 	local_irq_save(flags);
349 	arch_spin_lock(&wakeup_lock);
350 
351 	/* We could race with grabbing wakeup_lock */
352 	if (unlikely(!tracer_enabled || next != wakeup_task))
353 		goto out_unlock;
354 
355 	/* The task we are waiting for is waking up */
356 	data = wakeup_trace->data[wakeup_cpu];
357 
358 	__trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
359 	tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
360 
361 	T0 = data->preempt_timestamp;
362 	T1 = ftrace_now(cpu);
363 	delta = T1-T0;
364 
365 	if (!report_latency(delta))
366 		goto out_unlock;
367 
368 	if (likely(!is_tracing_stopped())) {
369 		tracing_max_latency = delta;
370 		update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
371 	}
372 
373 out_unlock:
374 	__wakeup_reset(wakeup_trace);
375 	arch_spin_unlock(&wakeup_lock);
376 	local_irq_restore(flags);
377 out:
378 	atomic_dec(&wakeup_trace->data[cpu]->disabled);
379 }
380 
381 static void __wakeup_reset(struct trace_array *tr)
382 {
383 	wakeup_cpu = -1;
384 	wakeup_prio = -1;
385 
386 	if (wakeup_task)
387 		put_task_struct(wakeup_task);
388 
389 	wakeup_task = NULL;
390 }
391 
392 static void wakeup_reset(struct trace_array *tr)
393 {
394 	unsigned long flags;
395 
396 	tracing_reset_online_cpus(tr);
397 
398 	local_irq_save(flags);
399 	arch_spin_lock(&wakeup_lock);
400 	__wakeup_reset(tr);
401 	arch_spin_unlock(&wakeup_lock);
402 	local_irq_restore(flags);
403 }
404 
405 static void
406 probe_wakeup(void *ignore, struct task_struct *p, int success)
407 {
408 	struct trace_array_cpu *data;
409 	int cpu = smp_processor_id();
410 	unsigned long flags;
411 	long disabled;
412 	int pc;
413 
414 	if (likely(!tracer_enabled))
415 		return;
416 
417 	tracing_record_cmdline(p);
418 	tracing_record_cmdline(current);
419 
420 	if ((wakeup_rt && !rt_task(p)) ||
421 			p->prio >= wakeup_prio ||
422 			p->prio >= current->prio)
423 		return;
424 
425 	pc = preempt_count();
426 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
427 	if (unlikely(disabled != 1))
428 		goto out;
429 
430 	/* interrupts should be off from try_to_wake_up */
431 	arch_spin_lock(&wakeup_lock);
432 
433 	/* check for races. */
434 	if (!tracer_enabled || p->prio >= wakeup_prio)
435 		goto out_locked;
436 
437 	/* reset the trace */
438 	__wakeup_reset(wakeup_trace);
439 
440 	wakeup_cpu = task_cpu(p);
441 	wakeup_current_cpu = wakeup_cpu;
442 	wakeup_prio = p->prio;
443 
444 	wakeup_task = p;
445 	get_task_struct(wakeup_task);
446 
447 	local_save_flags(flags);
448 
449 	data = wakeup_trace->data[wakeup_cpu];
450 	data->preempt_timestamp = ftrace_now(cpu);
451 	tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
452 
453 	/*
454 	 * We must be careful in using CALLER_ADDR2. But since wake_up
455 	 * is not called by an assembly function  (where as schedule is)
456 	 * it should be safe to use it here.
457 	 */
458 	__trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
459 
460 out_locked:
461 	arch_spin_unlock(&wakeup_lock);
462 out:
463 	atomic_dec(&wakeup_trace->data[cpu]->disabled);
464 }
465 
466 static void start_wakeup_tracer(struct trace_array *tr)
467 {
468 	int ret;
469 
470 	ret = register_trace_sched_wakeup(probe_wakeup, NULL);
471 	if (ret) {
472 		pr_info("wakeup trace: Couldn't activate tracepoint"
473 			" probe to kernel_sched_wakeup\n");
474 		return;
475 	}
476 
477 	ret = register_trace_sched_wakeup_new(probe_wakeup, NULL);
478 	if (ret) {
479 		pr_info("wakeup trace: Couldn't activate tracepoint"
480 			" probe to kernel_sched_wakeup_new\n");
481 		goto fail_deprobe;
482 	}
483 
484 	ret = register_trace_sched_switch(probe_wakeup_sched_switch, NULL);
485 	if (ret) {
486 		pr_info("sched trace: Couldn't activate tracepoint"
487 			" probe to kernel_sched_switch\n");
488 		goto fail_deprobe_wake_new;
489 	}
490 
491 	ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
492 	if (ret) {
493 		pr_info("wakeup trace: Couldn't activate tracepoint"
494 			" probe to kernel_sched_migrate_task\n");
495 		return;
496 	}
497 
498 	wakeup_reset(tr);
499 
500 	/*
501 	 * Don't let the tracer_enabled = 1 show up before
502 	 * the wakeup_task is reset. This may be overkill since
503 	 * wakeup_reset does a spin_unlock after setting the
504 	 * wakeup_task to NULL, but I want to be safe.
505 	 * This is a slow path anyway.
506 	 */
507 	smp_wmb();
508 
509 	if (start_func_tracer(is_graph()))
510 		printk(KERN_ERR "failed to start wakeup tracer\n");
511 
512 	return;
513 fail_deprobe_wake_new:
514 	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
515 fail_deprobe:
516 	unregister_trace_sched_wakeup(probe_wakeup, NULL);
517 }
518 
519 static void stop_wakeup_tracer(struct trace_array *tr)
520 {
521 	tracer_enabled = 0;
522 	stop_func_tracer(is_graph());
523 	unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
524 	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
525 	unregister_trace_sched_wakeup(probe_wakeup, NULL);
526 	unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
527 }
528 
529 static int __wakeup_tracer_init(struct trace_array *tr)
530 {
531 	save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
532 	trace_flags |= TRACE_ITER_LATENCY_FMT;
533 
534 	tracing_max_latency = 0;
535 	wakeup_trace = tr;
536 	start_wakeup_tracer(tr);
537 	return 0;
538 }
539 
540 static int wakeup_tracer_init(struct trace_array *tr)
541 {
542 	wakeup_rt = 0;
543 	return __wakeup_tracer_init(tr);
544 }
545 
546 static int wakeup_rt_tracer_init(struct trace_array *tr)
547 {
548 	wakeup_rt = 1;
549 	return __wakeup_tracer_init(tr);
550 }
551 
552 static void wakeup_tracer_reset(struct trace_array *tr)
553 {
554 	stop_wakeup_tracer(tr);
555 	/* make sure we put back any tasks we are tracing */
556 	wakeup_reset(tr);
557 
558 	if (!save_lat_flag)
559 		trace_flags &= ~TRACE_ITER_LATENCY_FMT;
560 }
561 
562 static void wakeup_tracer_start(struct trace_array *tr)
563 {
564 	wakeup_reset(tr);
565 	tracer_enabled = 1;
566 }
567 
568 static void wakeup_tracer_stop(struct trace_array *tr)
569 {
570 	tracer_enabled = 0;
571 }
572 
573 static struct tracer wakeup_tracer __read_mostly =
574 {
575 	.name		= "wakeup",
576 	.init		= wakeup_tracer_init,
577 	.reset		= wakeup_tracer_reset,
578 	.start		= wakeup_tracer_start,
579 	.stop		= wakeup_tracer_stop,
580 	.print_max	= 1,
581 	.print_header	= wakeup_print_header,
582 	.print_line	= wakeup_print_line,
583 	.flags		= &tracer_flags,
584 	.set_flag	= wakeup_set_flag,
585 #ifdef CONFIG_FTRACE_SELFTEST
586 	.selftest    = trace_selftest_startup_wakeup,
587 #endif
588 	.open		= wakeup_trace_open,
589 	.close		= wakeup_trace_close,
590 	.use_max_tr	= 1,
591 };
592 
593 static struct tracer wakeup_rt_tracer __read_mostly =
594 {
595 	.name		= "wakeup_rt",
596 	.init		= wakeup_rt_tracer_init,
597 	.reset		= wakeup_tracer_reset,
598 	.start		= wakeup_tracer_start,
599 	.stop		= wakeup_tracer_stop,
600 	.wait_pipe	= poll_wait_pipe,
601 	.print_max	= 1,
602 	.print_header	= wakeup_print_header,
603 	.print_line	= wakeup_print_line,
604 	.flags		= &tracer_flags,
605 	.set_flag	= wakeup_set_flag,
606 #ifdef CONFIG_FTRACE_SELFTEST
607 	.selftest    = trace_selftest_startup_wakeup,
608 #endif
609 	.open		= wakeup_trace_open,
610 	.close		= wakeup_trace_close,
611 	.use_max_tr	= 1,
612 };
613 
614 __init static int init_wakeup_tracer(void)
615 {
616 	int ret;
617 
618 	ret = register_tracer(&wakeup_tracer);
619 	if (ret)
620 		return ret;
621 
622 	ret = register_tracer(&wakeup_rt_tracer);
623 	if (ret)
624 		return ret;
625 
626 	return 0;
627 }
628 device_initcall(init_wakeup_tracer);
629