xref: /openbmc/linux/kernel/trace/trace_sched_wakeup.c (revision f15cbe6f1a4b4d9df59142fc8e4abb973302cf44)
1 /*
2  * trace task wakeup timings
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Based on code from the latency_tracer, that is:
8  *
9  *  Copyright (C) 2004-2006 Ingo Molnar
10  *  Copyright (C) 2004 William Lee Irwin III
11  */
12 #include <linux/module.h>
13 #include <linux/fs.h>
14 #include <linux/debugfs.h>
15 #include <linux/kallsyms.h>
16 #include <linux/uaccess.h>
17 #include <linux/ftrace.h>
18 #include <linux/marker.h>
19 
20 #include "trace.h"
21 
22 static struct trace_array	*wakeup_trace;
23 static int __read_mostly	tracer_enabled;
24 
25 static struct task_struct	*wakeup_task;
26 static int			wakeup_cpu;
27 static unsigned			wakeup_prio = -1;
28 
29 static raw_spinlock_t wakeup_lock =
30 	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
31 
32 static void __wakeup_reset(struct trace_array *tr);
33 
34 #ifdef CONFIG_FTRACE
35 /*
36  * irqsoff uses its own tracer function to keep the overhead down:
37  */
38 static void
39 wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
40 {
41 	struct trace_array *tr = wakeup_trace;
42 	struct trace_array_cpu *data;
43 	unsigned long flags;
44 	long disabled;
45 	int resched;
46 	int cpu;
47 
48 	if (likely(!wakeup_task))
49 		return;
50 
51 	resched = need_resched();
52 	preempt_disable_notrace();
53 
54 	cpu = raw_smp_processor_id();
55 	data = tr->data[cpu];
56 	disabled = atomic_inc_return(&data->disabled);
57 	if (unlikely(disabled != 1))
58 		goto out;
59 
60 	local_irq_save(flags);
61 	__raw_spin_lock(&wakeup_lock);
62 
63 	if (unlikely(!wakeup_task))
64 		goto unlock;
65 
66 	/*
67 	 * The task can't disappear because it needs to
68 	 * wake up first, and we have the wakeup_lock.
69 	 */
70 	if (task_cpu(wakeup_task) != cpu)
71 		goto unlock;
72 
73 	trace_function(tr, data, ip, parent_ip, flags);
74 
75  unlock:
76 	__raw_spin_unlock(&wakeup_lock);
77 	local_irq_restore(flags);
78 
79  out:
80 	atomic_dec(&data->disabled);
81 
82 	/*
83 	 * To prevent recursion from the scheduler, if the
84 	 * resched flag was set before we entered, then
85 	 * don't reschedule.
86 	 */
87 	if (resched)
88 		preempt_enable_no_resched_notrace();
89 	else
90 		preempt_enable_notrace();
91 }
92 
93 static struct ftrace_ops trace_ops __read_mostly =
94 {
95 	.func = wakeup_tracer_call,
96 };
97 #endif /* CONFIG_FTRACE */
98 
99 /*
100  * Should this new latency be reported/recorded?
101  */
102 static int report_latency(cycle_t delta)
103 {
104 	if (tracing_thresh) {
105 		if (delta < tracing_thresh)
106 			return 0;
107 	} else {
108 		if (delta <= tracing_max_latency)
109 			return 0;
110 	}
111 	return 1;
112 }
113 
114 static void notrace
115 wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
116 	struct task_struct *next)
117 {
118 	unsigned long latency = 0, t0 = 0, t1 = 0;
119 	struct trace_array **ptr = private;
120 	struct trace_array *tr = *ptr;
121 	struct trace_array_cpu *data;
122 	cycle_t T0, T1, delta;
123 	unsigned long flags;
124 	long disabled;
125 	int cpu;
126 
127 	if (unlikely(!tracer_enabled))
128 		return;
129 
130 	/*
131 	 * When we start a new trace, we set wakeup_task to NULL
132 	 * and then set tracer_enabled = 1. We want to make sure
133 	 * that another CPU does not see the tracer_enabled = 1
134 	 * and the wakeup_task with an older task, that might
135 	 * actually be the same as next.
136 	 */
137 	smp_rmb();
138 
139 	if (next != wakeup_task)
140 		return;
141 
142 	/* The task we are waiting for is waking up */
143 	data = tr->data[wakeup_cpu];
144 
145 	/* disable local data, not wakeup_cpu data */
146 	cpu = raw_smp_processor_id();
147 	disabled = atomic_inc_return(&tr->data[cpu]->disabled);
148 	if (likely(disabled != 1))
149 		goto out;
150 
151 	local_irq_save(flags);
152 	__raw_spin_lock(&wakeup_lock);
153 
154 	/* We could race with grabbing wakeup_lock */
155 	if (unlikely(!tracer_enabled || next != wakeup_task))
156 		goto out_unlock;
157 
158 	trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
159 
160 	/*
161 	 * usecs conversion is slow so we try to delay the conversion
162 	 * as long as possible:
163 	 */
164 	T0 = data->preempt_timestamp;
165 	T1 = ftrace_now(cpu);
166 	delta = T1-T0;
167 
168 	if (!report_latency(delta))
169 		goto out_unlock;
170 
171 	latency = nsecs_to_usecs(delta);
172 
173 	tracing_max_latency = delta;
174 	t0 = nsecs_to_usecs(T0);
175 	t1 = nsecs_to_usecs(T1);
176 
177 	update_max_tr(tr, wakeup_task, wakeup_cpu);
178 
179 out_unlock:
180 	__wakeup_reset(tr);
181 	__raw_spin_unlock(&wakeup_lock);
182 	local_irq_restore(flags);
183 out:
184 	atomic_dec(&tr->data[cpu]->disabled);
185 }
186 
187 static notrace void
188 sched_switch_callback(void *probe_data, void *call_data,
189 		      const char *format, va_list *args)
190 {
191 	struct task_struct *prev;
192 	struct task_struct *next;
193 	struct rq *__rq;
194 
195 	/* skip prev_pid %d next_pid %d prev_state %ld */
196 	(void)va_arg(*args, int);
197 	(void)va_arg(*args, int);
198 	(void)va_arg(*args, long);
199 	__rq = va_arg(*args, typeof(__rq));
200 	prev = va_arg(*args, typeof(prev));
201 	next = va_arg(*args, typeof(next));
202 
203 	tracing_record_cmdline(prev);
204 
205 	/*
206 	 * If tracer_switch_func only points to the local
207 	 * switch func, it still needs the ptr passed to it.
208 	 */
209 	wakeup_sched_switch(probe_data, __rq, prev, next);
210 }
211 
212 static void __wakeup_reset(struct trace_array *tr)
213 {
214 	struct trace_array_cpu *data;
215 	int cpu;
216 
217 	for_each_possible_cpu(cpu) {
218 		data = tr->data[cpu];
219 		tracing_reset(data);
220 	}
221 
222 	wakeup_cpu = -1;
223 	wakeup_prio = -1;
224 
225 	if (wakeup_task)
226 		put_task_struct(wakeup_task);
227 
228 	wakeup_task = NULL;
229 }
230 
231 static void wakeup_reset(struct trace_array *tr)
232 {
233 	unsigned long flags;
234 
235 	local_irq_save(flags);
236 	__raw_spin_lock(&wakeup_lock);
237 	__wakeup_reset(tr);
238 	__raw_spin_unlock(&wakeup_lock);
239 	local_irq_restore(flags);
240 }
241 
242 static void
243 wakeup_check_start(struct trace_array *tr, struct task_struct *p,
244 		   struct task_struct *curr)
245 {
246 	int cpu = smp_processor_id();
247 	unsigned long flags;
248 	long disabled;
249 
250 	if (likely(!rt_task(p)) ||
251 			p->prio >= wakeup_prio ||
252 			p->prio >= curr->prio)
253 		return;
254 
255 	disabled = atomic_inc_return(&tr->data[cpu]->disabled);
256 	if (unlikely(disabled != 1))
257 		goto out;
258 
259 	/* interrupts should be off from try_to_wake_up */
260 	__raw_spin_lock(&wakeup_lock);
261 
262 	/* check for races. */
263 	if (!tracer_enabled || p->prio >= wakeup_prio)
264 		goto out_locked;
265 
266 	/* reset the trace */
267 	__wakeup_reset(tr);
268 
269 	wakeup_cpu = task_cpu(p);
270 	wakeup_prio = p->prio;
271 
272 	wakeup_task = p;
273 	get_task_struct(wakeup_task);
274 
275 	local_save_flags(flags);
276 
277 	tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
278 	trace_function(tr, tr->data[wakeup_cpu],
279 		       CALLER_ADDR1, CALLER_ADDR2, flags);
280 
281 out_locked:
282 	__raw_spin_unlock(&wakeup_lock);
283 out:
284 	atomic_dec(&tr->data[cpu]->disabled);
285 }
286 
287 static notrace void
288 wake_up_callback(void *probe_data, void *call_data,
289 		 const char *format, va_list *args)
290 {
291 	struct trace_array **ptr = probe_data;
292 	struct trace_array *tr = *ptr;
293 	struct task_struct *curr;
294 	struct task_struct *task;
295 	struct rq *__rq;
296 
297 	if (likely(!tracer_enabled))
298 		return;
299 
300 	/* Skip pid %d state %ld */
301 	(void)va_arg(*args, int);
302 	(void)va_arg(*args, long);
303 	/* now get the meat: "rq %p task %p rq->curr %p" */
304 	__rq = va_arg(*args, typeof(__rq));
305 	task = va_arg(*args, typeof(task));
306 	curr = va_arg(*args, typeof(curr));
307 
308 	tracing_record_cmdline(task);
309 	tracing_record_cmdline(curr);
310 
311 	wakeup_check_start(tr, task, curr);
312 }
313 
314 static void start_wakeup_tracer(struct trace_array *tr)
315 {
316 	int ret;
317 
318 	ret = marker_probe_register("kernel_sched_wakeup",
319 			"pid %d state %ld ## rq %p task %p rq->curr %p",
320 			wake_up_callback,
321 			&wakeup_trace);
322 	if (ret) {
323 		pr_info("wakeup trace: Couldn't add marker"
324 			" probe to kernel_sched_wakeup\n");
325 		return;
326 	}
327 
328 	ret = marker_probe_register("kernel_sched_wakeup_new",
329 			"pid %d state %ld ## rq %p task %p rq->curr %p",
330 			wake_up_callback,
331 			&wakeup_trace);
332 	if (ret) {
333 		pr_info("wakeup trace: Couldn't add marker"
334 			" probe to kernel_sched_wakeup_new\n");
335 		goto fail_deprobe;
336 	}
337 
338 	ret = marker_probe_register("kernel_sched_schedule",
339 		"prev_pid %d next_pid %d prev_state %ld "
340 		"## rq %p prev %p next %p",
341 		sched_switch_callback,
342 		&wakeup_trace);
343 	if (ret) {
344 		pr_info("sched trace: Couldn't add marker"
345 			" probe to kernel_sched_schedule\n");
346 		goto fail_deprobe_wake_new;
347 	}
348 
349 	wakeup_reset(tr);
350 
351 	/*
352 	 * Don't let the tracer_enabled = 1 show up before
353 	 * the wakeup_task is reset. This may be overkill since
354 	 * wakeup_reset does a spin_unlock after setting the
355 	 * wakeup_task to NULL, but I want to be safe.
356 	 * This is a slow path anyway.
357 	 */
358 	smp_wmb();
359 
360 	register_ftrace_function(&trace_ops);
361 
362 	tracer_enabled = 1;
363 
364 	return;
365 fail_deprobe_wake_new:
366 	marker_probe_unregister("kernel_sched_wakeup_new",
367 				wake_up_callback,
368 				&wakeup_trace);
369 fail_deprobe:
370 	marker_probe_unregister("kernel_sched_wakeup",
371 				wake_up_callback,
372 				&wakeup_trace);
373 }
374 
375 static void stop_wakeup_tracer(struct trace_array *tr)
376 {
377 	tracer_enabled = 0;
378 	unregister_ftrace_function(&trace_ops);
379 	marker_probe_unregister("kernel_sched_schedule",
380 				sched_switch_callback,
381 				&wakeup_trace);
382 	marker_probe_unregister("kernel_sched_wakeup_new",
383 				wake_up_callback,
384 				&wakeup_trace);
385 	marker_probe_unregister("kernel_sched_wakeup",
386 				wake_up_callback,
387 				&wakeup_trace);
388 }
389 
390 static void wakeup_tracer_init(struct trace_array *tr)
391 {
392 	wakeup_trace = tr;
393 
394 	if (tr->ctrl)
395 		start_wakeup_tracer(tr);
396 }
397 
398 static void wakeup_tracer_reset(struct trace_array *tr)
399 {
400 	if (tr->ctrl) {
401 		stop_wakeup_tracer(tr);
402 		/* make sure we put back any tasks we are tracing */
403 		wakeup_reset(tr);
404 	}
405 }
406 
407 static void wakeup_tracer_ctrl_update(struct trace_array *tr)
408 {
409 	if (tr->ctrl)
410 		start_wakeup_tracer(tr);
411 	else
412 		stop_wakeup_tracer(tr);
413 }
414 
415 static void wakeup_tracer_open(struct trace_iterator *iter)
416 {
417 	/* stop the trace while dumping */
418 	if (iter->tr->ctrl)
419 		stop_wakeup_tracer(iter->tr);
420 }
421 
422 static void wakeup_tracer_close(struct trace_iterator *iter)
423 {
424 	/* forget about any processes we were recording */
425 	if (iter->tr->ctrl)
426 		start_wakeup_tracer(iter->tr);
427 }
428 
429 static struct tracer wakeup_tracer __read_mostly =
430 {
431 	.name		= "wakeup",
432 	.init		= wakeup_tracer_init,
433 	.reset		= wakeup_tracer_reset,
434 	.open		= wakeup_tracer_open,
435 	.close		= wakeup_tracer_close,
436 	.ctrl_update	= wakeup_tracer_ctrl_update,
437 	.print_max	= 1,
438 #ifdef CONFIG_FTRACE_SELFTEST
439 	.selftest    = trace_selftest_startup_wakeup,
440 #endif
441 };
442 
443 __init static int init_wakeup_tracer(void)
444 {
445 	int ret;
446 
447 	ret = register_tracer(&wakeup_tracer);
448 	if (ret)
449 		return ret;
450 
451 	return 0;
452 }
453 device_initcall(init_wakeup_tracer);
454