xref: /openbmc/linux/include/trace/events/sched.h (revision 5a244f48)
1 #undef TRACE_SYSTEM
2 #define TRACE_SYSTEM sched
3 
4 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
5 #define _TRACE_SCHED_H
6 
7 #include <linux/sched/numa_balancing.h>
8 #include <linux/tracepoint.h>
9 #include <linux/binfmts.h>
10 
11 /*
12  * Tracepoint for calling kthread_stop, performed to end a kthread:
13  */
14 TRACE_EVENT(sched_kthread_stop,
15 
16 	TP_PROTO(struct task_struct *t),
17 
18 	TP_ARGS(t),
19 
20 	TP_STRUCT__entry(
21 		__array(	char,	comm,	TASK_COMM_LEN	)
22 		__field(	pid_t,	pid			)
23 	),
24 
25 	TP_fast_assign(
26 		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
27 		__entry->pid	= t->pid;
28 	),
29 
30 	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
31 );
32 
33 /*
34  * Tracepoint for the return value of the kthread stopping:
35  */
36 TRACE_EVENT(sched_kthread_stop_ret,
37 
38 	TP_PROTO(int ret),
39 
40 	TP_ARGS(ret),
41 
42 	TP_STRUCT__entry(
43 		__field(	int,	ret	)
44 	),
45 
46 	TP_fast_assign(
47 		__entry->ret	= ret;
48 	),
49 
50 	TP_printk("ret=%d", __entry->ret)
51 );
52 
53 /*
54  * Tracepoint for waking up a task:
55  */
56 DECLARE_EVENT_CLASS(sched_wakeup_template,
57 
58 	TP_PROTO(struct task_struct *p),
59 
60 	TP_ARGS(__perf_task(p)),
61 
62 	TP_STRUCT__entry(
63 		__array(	char,	comm,	TASK_COMM_LEN	)
64 		__field(	pid_t,	pid			)
65 		__field(	int,	prio			)
66 		__field(	int,	success			)
67 		__field(	int,	target_cpu		)
68 	),
69 
70 	TP_fast_assign(
71 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
72 		__entry->pid		= p->pid;
73 		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
74 		__entry->success	= 1; /* rudiment, kill when possible */
75 		__entry->target_cpu	= task_cpu(p);
76 	),
77 
78 	TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
79 		  __entry->comm, __entry->pid, __entry->prio,
80 		  __entry->target_cpu)
81 );
82 
83 /*
84  * Tracepoint called when waking a task; this tracepoint is guaranteed to be
85  * called from the waking context.
86  */
87 DEFINE_EVENT(sched_wakeup_template, sched_waking,
88 	     TP_PROTO(struct task_struct *p),
89 	     TP_ARGS(p));
90 
91 /*
92  * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
93  * It it not always called from the waking context.
94  */
95 DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
96 	     TP_PROTO(struct task_struct *p),
97 	     TP_ARGS(p));
98 
99 /*
100  * Tracepoint for waking up a new task:
101  */
102 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
103 	     TP_PROTO(struct task_struct *p),
104 	     TP_ARGS(p));
105 
106 #ifdef CREATE_TRACE_POINTS
107 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
108 {
109 #ifdef CONFIG_SCHED_DEBUG
110 	BUG_ON(p != current);
111 #endif /* CONFIG_SCHED_DEBUG */
112 
113 	/*
114 	 * Preemption ignores task state, therefore preempted tasks are always
115 	 * RUNNING (we will not have dequeued if state != RUNNING).
116 	 */
117 	if (preempt)
118 		return TASK_STATE_MAX;
119 
120 	return __get_task_state(p);
121 }
122 #endif /* CREATE_TRACE_POINTS */
123 
124 /*
125  * Tracepoint for task switches, performed by the scheduler:
126  */
127 TRACE_EVENT(sched_switch,
128 
129 	TP_PROTO(bool preempt,
130 		 struct task_struct *prev,
131 		 struct task_struct *next),
132 
133 	TP_ARGS(preempt, prev, next),
134 
135 	TP_STRUCT__entry(
136 		__array(	char,	prev_comm,	TASK_COMM_LEN	)
137 		__field(	pid_t,	prev_pid			)
138 		__field(	int,	prev_prio			)
139 		__field(	long,	prev_state			)
140 		__array(	char,	next_comm,	TASK_COMM_LEN	)
141 		__field(	pid_t,	next_pid			)
142 		__field(	int,	next_prio			)
143 	),
144 
145 	TP_fast_assign(
146 		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
147 		__entry->prev_pid	= prev->pid;
148 		__entry->prev_prio	= prev->prio;
149 		__entry->prev_state	= __trace_sched_switch_state(preempt, prev);
150 		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
151 		__entry->next_pid	= next->pid;
152 		__entry->next_prio	= next->prio;
153 		/* XXX SCHED_DEADLINE */
154 	),
155 
156 	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
157 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
158 
159 		(__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
160 		  __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
161 				{ 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" },
162 				{ 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" },
163 				{ 0x40, "P" }, { 0x80, "I" }) :
164 		  "R",
165 
166 		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
167 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
168 );
169 
170 /*
171  * Tracepoint for a task being migrated:
172  */
173 TRACE_EVENT(sched_migrate_task,
174 
175 	TP_PROTO(struct task_struct *p, int dest_cpu),
176 
177 	TP_ARGS(p, dest_cpu),
178 
179 	TP_STRUCT__entry(
180 		__array(	char,	comm,	TASK_COMM_LEN	)
181 		__field(	pid_t,	pid			)
182 		__field(	int,	prio			)
183 		__field(	int,	orig_cpu		)
184 		__field(	int,	dest_cpu		)
185 	),
186 
187 	TP_fast_assign(
188 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
189 		__entry->pid		= p->pid;
190 		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
191 		__entry->orig_cpu	= task_cpu(p);
192 		__entry->dest_cpu	= dest_cpu;
193 	),
194 
195 	TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
196 		  __entry->comm, __entry->pid, __entry->prio,
197 		  __entry->orig_cpu, __entry->dest_cpu)
198 );
199 
200 DECLARE_EVENT_CLASS(sched_process_template,
201 
202 	TP_PROTO(struct task_struct *p),
203 
204 	TP_ARGS(p),
205 
206 	TP_STRUCT__entry(
207 		__array(	char,	comm,	TASK_COMM_LEN	)
208 		__field(	pid_t,	pid			)
209 		__field(	int,	prio			)
210 	),
211 
212 	TP_fast_assign(
213 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
214 		__entry->pid		= p->pid;
215 		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
216 	),
217 
218 	TP_printk("comm=%s pid=%d prio=%d",
219 		  __entry->comm, __entry->pid, __entry->prio)
220 );
221 
222 /*
223  * Tracepoint for freeing a task:
224  */
225 DEFINE_EVENT(sched_process_template, sched_process_free,
226 	     TP_PROTO(struct task_struct *p),
227 	     TP_ARGS(p));
228 
229 
230 /*
231  * Tracepoint for a task exiting:
232  */
233 DEFINE_EVENT(sched_process_template, sched_process_exit,
234 	     TP_PROTO(struct task_struct *p),
235 	     TP_ARGS(p));
236 
237 /*
238  * Tracepoint for waiting on task to unschedule:
239  */
240 DEFINE_EVENT(sched_process_template, sched_wait_task,
241 	TP_PROTO(struct task_struct *p),
242 	TP_ARGS(p));
243 
244 /*
245  * Tracepoint for a waiting task:
246  */
247 TRACE_EVENT(sched_process_wait,
248 
249 	TP_PROTO(struct pid *pid),
250 
251 	TP_ARGS(pid),
252 
253 	TP_STRUCT__entry(
254 		__array(	char,	comm,	TASK_COMM_LEN	)
255 		__field(	pid_t,	pid			)
256 		__field(	int,	prio			)
257 	),
258 
259 	TP_fast_assign(
260 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
261 		__entry->pid		= pid_nr(pid);
262 		__entry->prio		= current->prio; /* XXX SCHED_DEADLINE */
263 	),
264 
265 	TP_printk("comm=%s pid=%d prio=%d",
266 		  __entry->comm, __entry->pid, __entry->prio)
267 );
268 
269 /*
270  * Tracepoint for do_fork:
271  */
272 TRACE_EVENT(sched_process_fork,
273 
274 	TP_PROTO(struct task_struct *parent, struct task_struct *child),
275 
276 	TP_ARGS(parent, child),
277 
278 	TP_STRUCT__entry(
279 		__array(	char,	parent_comm,	TASK_COMM_LEN	)
280 		__field(	pid_t,	parent_pid			)
281 		__array(	char,	child_comm,	TASK_COMM_LEN	)
282 		__field(	pid_t,	child_pid			)
283 	),
284 
285 	TP_fast_assign(
286 		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
287 		__entry->parent_pid	= parent->pid;
288 		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
289 		__entry->child_pid	= child->pid;
290 	),
291 
292 	TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
293 		__entry->parent_comm, __entry->parent_pid,
294 		__entry->child_comm, __entry->child_pid)
295 );
296 
297 /*
298  * Tracepoint for exec:
299  */
300 TRACE_EVENT(sched_process_exec,
301 
302 	TP_PROTO(struct task_struct *p, pid_t old_pid,
303 		 struct linux_binprm *bprm),
304 
305 	TP_ARGS(p, old_pid, bprm),
306 
307 	TP_STRUCT__entry(
308 		__string(	filename,	bprm->filename	)
309 		__field(	pid_t,		pid		)
310 		__field(	pid_t,		old_pid		)
311 	),
312 
313 	TP_fast_assign(
314 		__assign_str(filename, bprm->filename);
315 		__entry->pid		= p->pid;
316 		__entry->old_pid	= old_pid;
317 	),
318 
319 	TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
320 		  __entry->pid, __entry->old_pid)
321 );
322 
323 /*
324  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
325  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
326  */
327 DECLARE_EVENT_CLASS(sched_stat_template,
328 
329 	TP_PROTO(struct task_struct *tsk, u64 delay),
330 
331 	TP_ARGS(__perf_task(tsk), __perf_count(delay)),
332 
333 	TP_STRUCT__entry(
334 		__array( char,	comm,	TASK_COMM_LEN	)
335 		__field( pid_t,	pid			)
336 		__field( u64,	delay			)
337 	),
338 
339 	TP_fast_assign(
340 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
341 		__entry->pid	= tsk->pid;
342 		__entry->delay	= delay;
343 	),
344 
345 	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
346 			__entry->comm, __entry->pid,
347 			(unsigned long long)__entry->delay)
348 );
349 
350 
351 /*
352  * Tracepoint for accounting wait time (time the task is runnable
353  * but not actually running due to scheduler contention).
354  */
355 DEFINE_EVENT(sched_stat_template, sched_stat_wait,
356 	     TP_PROTO(struct task_struct *tsk, u64 delay),
357 	     TP_ARGS(tsk, delay));
358 
359 /*
360  * Tracepoint for accounting sleep time (time the task is not runnable,
361  * including iowait, see below).
362  */
363 DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
364 	     TP_PROTO(struct task_struct *tsk, u64 delay),
365 	     TP_ARGS(tsk, delay));
366 
367 /*
368  * Tracepoint for accounting iowait time (time the task is not runnable
369  * due to waiting on IO to complete).
370  */
371 DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
372 	     TP_PROTO(struct task_struct *tsk, u64 delay),
373 	     TP_ARGS(tsk, delay));
374 
375 /*
376  * Tracepoint for accounting blocked time (time the task is in uninterruptible).
377  */
378 DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
379 	     TP_PROTO(struct task_struct *tsk, u64 delay),
380 	     TP_ARGS(tsk, delay));
381 
382 /*
383  * Tracepoint for accounting runtime (time the task is executing
384  * on a CPU).
385  */
386 DECLARE_EVENT_CLASS(sched_stat_runtime,
387 
388 	TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
389 
390 	TP_ARGS(tsk, __perf_count(runtime), vruntime),
391 
392 	TP_STRUCT__entry(
393 		__array( char,	comm,	TASK_COMM_LEN	)
394 		__field( pid_t,	pid			)
395 		__field( u64,	runtime			)
396 		__field( u64,	vruntime			)
397 	),
398 
399 	TP_fast_assign(
400 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
401 		__entry->pid		= tsk->pid;
402 		__entry->runtime	= runtime;
403 		__entry->vruntime	= vruntime;
404 	),
405 
406 	TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
407 			__entry->comm, __entry->pid,
408 			(unsigned long long)__entry->runtime,
409 			(unsigned long long)__entry->vruntime)
410 );
411 
412 DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
413 	     TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
414 	     TP_ARGS(tsk, runtime, vruntime));
415 
416 /*
417  * Tracepoint for showing priority inheritance modifying a tasks
418  * priority.
419  */
420 TRACE_EVENT(sched_pi_setprio,
421 
422 	TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
423 
424 	TP_ARGS(tsk, pi_task),
425 
426 	TP_STRUCT__entry(
427 		__array( char,	comm,	TASK_COMM_LEN	)
428 		__field( pid_t,	pid			)
429 		__field( int,	oldprio			)
430 		__field( int,	newprio			)
431 	),
432 
433 	TP_fast_assign(
434 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
435 		__entry->pid		= tsk->pid;
436 		__entry->oldprio	= tsk->prio;
437 		__entry->newprio	= pi_task ? pi_task->prio : tsk->prio;
438 		/* XXX SCHED_DEADLINE bits missing */
439 	),
440 
441 	TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
442 			__entry->comm, __entry->pid,
443 			__entry->oldprio, __entry->newprio)
444 );
445 
446 #ifdef CONFIG_DETECT_HUNG_TASK
447 TRACE_EVENT(sched_process_hang,
448 	TP_PROTO(struct task_struct *tsk),
449 	TP_ARGS(tsk),
450 
451 	TP_STRUCT__entry(
452 		__array( char,	comm,	TASK_COMM_LEN	)
453 		__field( pid_t,	pid			)
454 	),
455 
456 	TP_fast_assign(
457 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
458 		__entry->pid = tsk->pid;
459 	),
460 
461 	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
462 );
463 #endif /* CONFIG_DETECT_HUNG_TASK */
464 
465 DECLARE_EVENT_CLASS(sched_move_task_template,
466 
467 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
468 
469 	TP_ARGS(tsk, src_cpu, dst_cpu),
470 
471 	TP_STRUCT__entry(
472 		__field( pid_t,	pid			)
473 		__field( pid_t,	tgid			)
474 		__field( pid_t,	ngid			)
475 		__field( int,	src_cpu			)
476 		__field( int,	src_nid			)
477 		__field( int,	dst_cpu			)
478 		__field( int,	dst_nid			)
479 	),
480 
481 	TP_fast_assign(
482 		__entry->pid		= task_pid_nr(tsk);
483 		__entry->tgid		= task_tgid_nr(tsk);
484 		__entry->ngid		= task_numa_group_id(tsk);
485 		__entry->src_cpu	= src_cpu;
486 		__entry->src_nid	= cpu_to_node(src_cpu);
487 		__entry->dst_cpu	= dst_cpu;
488 		__entry->dst_nid	= cpu_to_node(dst_cpu);
489 	),
490 
491 	TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
492 			__entry->pid, __entry->tgid, __entry->ngid,
493 			__entry->src_cpu, __entry->src_nid,
494 			__entry->dst_cpu, __entry->dst_nid)
495 );
496 
497 /*
498  * Tracks migration of tasks from one runqueue to another. Can be used to
499  * detect if automatic NUMA balancing is bouncing between nodes
500  */
501 DEFINE_EVENT(sched_move_task_template, sched_move_numa,
502 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
503 
504 	TP_ARGS(tsk, src_cpu, dst_cpu)
505 );
506 
507 DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
508 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
509 
510 	TP_ARGS(tsk, src_cpu, dst_cpu)
511 );
512 
513 TRACE_EVENT(sched_swap_numa,
514 
515 	TP_PROTO(struct task_struct *src_tsk, int src_cpu,
516 		 struct task_struct *dst_tsk, int dst_cpu),
517 
518 	TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
519 
520 	TP_STRUCT__entry(
521 		__field( pid_t,	src_pid			)
522 		__field( pid_t,	src_tgid		)
523 		__field( pid_t,	src_ngid		)
524 		__field( int,	src_cpu			)
525 		__field( int,	src_nid			)
526 		__field( pid_t,	dst_pid			)
527 		__field( pid_t,	dst_tgid		)
528 		__field( pid_t,	dst_ngid		)
529 		__field( int,	dst_cpu			)
530 		__field( int,	dst_nid			)
531 	),
532 
533 	TP_fast_assign(
534 		__entry->src_pid	= task_pid_nr(src_tsk);
535 		__entry->src_tgid	= task_tgid_nr(src_tsk);
536 		__entry->src_ngid	= task_numa_group_id(src_tsk);
537 		__entry->src_cpu	= src_cpu;
538 		__entry->src_nid	= cpu_to_node(src_cpu);
539 		__entry->dst_pid	= task_pid_nr(dst_tsk);
540 		__entry->dst_tgid	= task_tgid_nr(dst_tsk);
541 		__entry->dst_ngid	= task_numa_group_id(dst_tsk);
542 		__entry->dst_cpu	= dst_cpu;
543 		__entry->dst_nid	= cpu_to_node(dst_cpu);
544 	),
545 
546 	TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
547 			__entry->src_pid, __entry->src_tgid, __entry->src_ngid,
548 			__entry->src_cpu, __entry->src_nid,
549 			__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
550 			__entry->dst_cpu, __entry->dst_nid)
551 );
552 
553 /*
554  * Tracepoint for waking a polling cpu without an IPI.
555  */
556 TRACE_EVENT(sched_wake_idle_without_ipi,
557 
558 	TP_PROTO(int cpu),
559 
560 	TP_ARGS(cpu),
561 
562 	TP_STRUCT__entry(
563 		__field(	int,	cpu	)
564 	),
565 
566 	TP_fast_assign(
567 		__entry->cpu	= cpu;
568 	),
569 
570 	TP_printk("cpu=%d", __entry->cpu)
571 );
572 #endif /* _TRACE_SCHED_H */
573 
574 /* This part must be outside protection */
575 #include <trace/define_trace.h>
576