xref: /openbmc/linux/kernel/trace/trace_sched_wakeup.c (revision b8bb76713ec50df2f11efee386e16f93d51e1076)
1 /*
2  * trace task wakeup timings
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Based on code from the latency_tracer, that is:
8  *
9  *  Copyright (C) 2004-2006 Ingo Molnar
10  *  Copyright (C) 2004 William Lee Irwin III
11  */
12 #include <linux/module.h>
13 #include <linux/fs.h>
14 #include <linux/debugfs.h>
15 #include <linux/kallsyms.h>
16 #include <linux/uaccess.h>
17 #include <linux/ftrace.h>
18 #include <trace/sched.h>
19 
20 #include "trace.h"
21 
22 static struct trace_array	*wakeup_trace;
23 static int __read_mostly	tracer_enabled;
24 
25 static struct task_struct	*wakeup_task;
26 static int			wakeup_cpu;
27 static unsigned			wakeup_prio = -1;
28 
29 static raw_spinlock_t wakeup_lock =
30 	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
31 
32 static void __wakeup_reset(struct trace_array *tr);
33 
34 #ifdef CONFIG_FUNCTION_TRACER
35 /*
36  * irqsoff uses its own tracer function to keep the overhead down:
37  */
38 static void
39 wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
40 {
41 	struct trace_array *tr = wakeup_trace;
42 	struct trace_array_cpu *data;
43 	unsigned long flags;
44 	long disabled;
45 	int resched;
46 	int cpu;
47 	int pc;
48 
49 	if (likely(!wakeup_task))
50 		return;
51 
52 	pc = preempt_count();
53 	resched = ftrace_preempt_disable();
54 
55 	cpu = raw_smp_processor_id();
56 	data = tr->data[cpu];
57 	disabled = atomic_inc_return(&data->disabled);
58 	if (unlikely(disabled != 1))
59 		goto out;
60 
61 	local_irq_save(flags);
62 	__raw_spin_lock(&wakeup_lock);
63 
64 	if (unlikely(!wakeup_task))
65 		goto unlock;
66 
67 	/*
68 	 * The task can't disappear because it needs to
69 	 * wake up first, and we have the wakeup_lock.
70 	 */
71 	if (task_cpu(wakeup_task) != cpu)
72 		goto unlock;
73 
74 	trace_function(tr, data, ip, parent_ip, flags, pc);
75 
76  unlock:
77 	__raw_spin_unlock(&wakeup_lock);
78 	local_irq_restore(flags);
79 
80  out:
81 	atomic_dec(&data->disabled);
82 
83 	ftrace_preempt_enable(resched);
84 }
85 
86 static struct ftrace_ops trace_ops __read_mostly =
87 {
88 	.func = wakeup_tracer_call,
89 };
90 #endif /* CONFIG_FUNCTION_TRACER */
91 
92 /*
93  * Should this new latency be reported/recorded?
94  */
95 static int report_latency(cycle_t delta)
96 {
97 	if (tracing_thresh) {
98 		if (delta < tracing_thresh)
99 			return 0;
100 	} else {
101 		if (delta <= tracing_max_latency)
102 			return 0;
103 	}
104 	return 1;
105 }
106 
107 static void notrace
108 probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
109 	struct task_struct *next)
110 {
111 	unsigned long latency = 0, t0 = 0, t1 = 0;
112 	struct trace_array_cpu *data;
113 	cycle_t T0, T1, delta;
114 	unsigned long flags;
115 	long disabled;
116 	int cpu;
117 	int pc;
118 
119 	tracing_record_cmdline(prev);
120 
121 	if (unlikely(!tracer_enabled))
122 		return;
123 
124 	/*
125 	 * When we start a new trace, we set wakeup_task to NULL
126 	 * and then set tracer_enabled = 1. We want to make sure
127 	 * that another CPU does not see the tracer_enabled = 1
128 	 * and the wakeup_task with an older task, that might
129 	 * actually be the same as next.
130 	 */
131 	smp_rmb();
132 
133 	if (next != wakeup_task)
134 		return;
135 
136 	pc = preempt_count();
137 
138 	/* The task we are waiting for is waking up */
139 	data = wakeup_trace->data[wakeup_cpu];
140 
141 	/* disable local data, not wakeup_cpu data */
142 	cpu = raw_smp_processor_id();
143 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
144 	if (likely(disabled != 1))
145 		goto out;
146 
147 	local_irq_save(flags);
148 	__raw_spin_lock(&wakeup_lock);
149 
150 	/* We could race with grabbing wakeup_lock */
151 	if (unlikely(!tracer_enabled || next != wakeup_task))
152 		goto out_unlock;
153 
154 	trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
155 
156 	/*
157 	 * usecs conversion is slow so we try to delay the conversion
158 	 * as long as possible:
159 	 */
160 	T0 = data->preempt_timestamp;
161 	T1 = ftrace_now(cpu);
162 	delta = T1-T0;
163 
164 	if (!report_latency(delta))
165 		goto out_unlock;
166 
167 	latency = nsecs_to_usecs(delta);
168 
169 	tracing_max_latency = delta;
170 	t0 = nsecs_to_usecs(T0);
171 	t1 = nsecs_to_usecs(T1);
172 
173 	update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
174 
175 out_unlock:
176 	__wakeup_reset(wakeup_trace);
177 	__raw_spin_unlock(&wakeup_lock);
178 	local_irq_restore(flags);
179 out:
180 	atomic_dec(&wakeup_trace->data[cpu]->disabled);
181 }
182 
183 static void __wakeup_reset(struct trace_array *tr)
184 {
185 	struct trace_array_cpu *data;
186 	int cpu;
187 
188 	for_each_possible_cpu(cpu) {
189 		data = tr->data[cpu];
190 		tracing_reset(tr, cpu);
191 	}
192 
193 	wakeup_cpu = -1;
194 	wakeup_prio = -1;
195 
196 	if (wakeup_task)
197 		put_task_struct(wakeup_task);
198 
199 	wakeup_task = NULL;
200 }
201 
202 static void wakeup_reset(struct trace_array *tr)
203 {
204 	unsigned long flags;
205 
206 	local_irq_save(flags);
207 	__raw_spin_lock(&wakeup_lock);
208 	__wakeup_reset(tr);
209 	__raw_spin_unlock(&wakeup_lock);
210 	local_irq_restore(flags);
211 }
212 
213 static void
214 probe_wakeup(struct rq *rq, struct task_struct *p, int success)
215 {
216 	int cpu = smp_processor_id();
217 	unsigned long flags;
218 	long disabled;
219 	int pc;
220 
221 	if (likely(!tracer_enabled))
222 		return;
223 
224 	tracing_record_cmdline(p);
225 	tracing_record_cmdline(current);
226 
227 	if (likely(!rt_task(p)) ||
228 			p->prio >= wakeup_prio ||
229 			p->prio >= current->prio)
230 		return;
231 
232 	pc = preempt_count();
233 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
234 	if (unlikely(disabled != 1))
235 		goto out;
236 
237 	/* interrupts should be off from try_to_wake_up */
238 	__raw_spin_lock(&wakeup_lock);
239 
240 	/* check for races. */
241 	if (!tracer_enabled || p->prio >= wakeup_prio)
242 		goto out_locked;
243 
244 	/* reset the trace */
245 	__wakeup_reset(wakeup_trace);
246 
247 	wakeup_cpu = task_cpu(p);
248 	wakeup_prio = p->prio;
249 
250 	wakeup_task = p;
251 	get_task_struct(wakeup_task);
252 
253 	local_save_flags(flags);
254 
255 	wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
256 	trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
257 		       CALLER_ADDR1, CALLER_ADDR2, flags, pc);
258 
259 out_locked:
260 	__raw_spin_unlock(&wakeup_lock);
261 out:
262 	atomic_dec(&wakeup_trace->data[cpu]->disabled);
263 }
264 
265 /*
266  * save_tracer_enabled is used to save the state of the tracer_enabled
267  * variable when we disable it when we open a trace output file.
268  */
269 static int save_tracer_enabled;
270 
271 static void start_wakeup_tracer(struct trace_array *tr)
272 {
273 	int ret;
274 
275 	ret = register_trace_sched_wakeup(probe_wakeup);
276 	if (ret) {
277 		pr_info("wakeup trace: Couldn't activate tracepoint"
278 			" probe to kernel_sched_wakeup\n");
279 		return;
280 	}
281 
282 	ret = register_trace_sched_wakeup_new(probe_wakeup);
283 	if (ret) {
284 		pr_info("wakeup trace: Couldn't activate tracepoint"
285 			" probe to kernel_sched_wakeup_new\n");
286 		goto fail_deprobe;
287 	}
288 
289 	ret = register_trace_sched_switch(probe_wakeup_sched_switch);
290 	if (ret) {
291 		pr_info("sched trace: Couldn't activate tracepoint"
292 			" probe to kernel_sched_schedule\n");
293 		goto fail_deprobe_wake_new;
294 	}
295 
296 	wakeup_reset(tr);
297 
298 	/*
299 	 * Don't let the tracer_enabled = 1 show up before
300 	 * the wakeup_task is reset. This may be overkill since
301 	 * wakeup_reset does a spin_unlock after setting the
302 	 * wakeup_task to NULL, but I want to be safe.
303 	 * This is a slow path anyway.
304 	 */
305 	smp_wmb();
306 
307 	register_ftrace_function(&trace_ops);
308 
309 	if (tracing_is_enabled()) {
310 		tracer_enabled = 1;
311 		save_tracer_enabled = 1;
312 	} else {
313 		tracer_enabled = 0;
314 		save_tracer_enabled = 0;
315 	}
316 
317 	return;
318 fail_deprobe_wake_new:
319 	unregister_trace_sched_wakeup_new(probe_wakeup);
320 fail_deprobe:
321 	unregister_trace_sched_wakeup(probe_wakeup);
322 }
323 
324 static void stop_wakeup_tracer(struct trace_array *tr)
325 {
326 	tracer_enabled = 0;
327 	save_tracer_enabled = 0;
328 	unregister_ftrace_function(&trace_ops);
329 	unregister_trace_sched_switch(probe_wakeup_sched_switch);
330 	unregister_trace_sched_wakeup_new(probe_wakeup);
331 	unregister_trace_sched_wakeup(probe_wakeup);
332 }
333 
334 static int wakeup_tracer_init(struct trace_array *tr)
335 {
336 	tracing_max_latency = 0;
337 	wakeup_trace = tr;
338 	start_wakeup_tracer(tr);
339 	return 0;
340 }
341 
342 static void wakeup_tracer_reset(struct trace_array *tr)
343 {
344 	stop_wakeup_tracer(tr);
345 	/* make sure we put back any tasks we are tracing */
346 	wakeup_reset(tr);
347 }
348 
349 static void wakeup_tracer_start(struct trace_array *tr)
350 {
351 	wakeup_reset(tr);
352 	tracer_enabled = 1;
353 	save_tracer_enabled = 1;
354 }
355 
356 static void wakeup_tracer_stop(struct trace_array *tr)
357 {
358 	tracer_enabled = 0;
359 	save_tracer_enabled = 0;
360 }
361 
362 static void wakeup_tracer_open(struct trace_iterator *iter)
363 {
364 	/* stop the trace while dumping */
365 	tracer_enabled = 0;
366 }
367 
368 static void wakeup_tracer_close(struct trace_iterator *iter)
369 {
370 	/* forget about any processes we were recording */
371 	if (save_tracer_enabled) {
372 		wakeup_reset(iter->tr);
373 		tracer_enabled = 1;
374 	}
375 }
376 
377 static struct tracer wakeup_tracer __read_mostly =
378 {
379 	.name		= "wakeup",
380 	.init		= wakeup_tracer_init,
381 	.reset		= wakeup_tracer_reset,
382 	.start		= wakeup_tracer_start,
383 	.stop		= wakeup_tracer_stop,
384 	.open		= wakeup_tracer_open,
385 	.close		= wakeup_tracer_close,
386 	.print_max	= 1,
387 #ifdef CONFIG_FTRACE_SELFTEST
388 	.selftest    = trace_selftest_startup_wakeup,
389 #endif
390 };
391 
392 __init static int init_wakeup_tracer(void)
393 {
394 	int ret;
395 
396 	ret = register_tracer(&wakeup_tracer);
397 	if (ret)
398 		return ret;
399 
400 	return 0;
401 }
402 device_initcall(init_wakeup_tracer);
403