xref: /openbmc/linux/include/trace/events/sched.h (revision d003c346bf75f01d240c80000baf2fbf28e53782)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #undef TRACE_SYSTEM
3 #define TRACE_SYSTEM sched
4 
5 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
6 #define _TRACE_SCHED_H
7 
8 #include <linux/sched/numa_balancing.h>
9 #include <linux/tracepoint.h>
10 #include <linux/binfmts.h>
11 
12 /*
13  * Tracepoint for calling kthread_stop, performed to end a kthread:
14  */
15 TRACE_EVENT(sched_kthread_stop,
16 
17 	TP_PROTO(struct task_struct *t),
18 
19 	TP_ARGS(t),
20 
21 	TP_STRUCT__entry(
22 		__array(	char,	comm,	TASK_COMM_LEN	)
23 		__field(	pid_t,	pid			)
24 	),
25 
26 	TP_fast_assign(
27 		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
28 		__entry->pid	= t->pid;
29 	),
30 
31 	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
32 );
33 
34 /*
35  * Tracepoint for the return value of the kthread stopping:
36  */
37 TRACE_EVENT(sched_kthread_stop_ret,
38 
39 	TP_PROTO(int ret),
40 
41 	TP_ARGS(ret),
42 
43 	TP_STRUCT__entry(
44 		__field(	int,	ret	)
45 	),
46 
47 	TP_fast_assign(
48 		__entry->ret	= ret;
49 	),
50 
51 	TP_printk("ret=%d", __entry->ret)
52 );
53 
54 /*
55  * Tracepoint for waking up a task:
56  */
57 DECLARE_EVENT_CLASS(sched_wakeup_template,
58 
59 	TP_PROTO(struct task_struct *p),
60 
61 	TP_ARGS(__perf_task(p)),
62 
63 	TP_STRUCT__entry(
64 		__array(	char,	comm,	TASK_COMM_LEN	)
65 		__field(	pid_t,	pid			)
66 		__field(	int,	prio			)
67 		__field(	int,	success			)
68 		__field(	int,	target_cpu		)
69 	),
70 
71 	TP_fast_assign(
72 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
73 		__entry->pid		= p->pid;
74 		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
75 		__entry->success	= 1; /* rudiment, kill when possible */
76 		__entry->target_cpu	= task_cpu(p);
77 	),
78 
79 	TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
80 		  __entry->comm, __entry->pid, __entry->prio,
81 		  __entry->target_cpu)
82 );
83 
84 /*
85  * Tracepoint called when waking a task; this tracepoint is guaranteed to be
86  * called from the waking context.
87  */
88 DEFINE_EVENT(sched_wakeup_template, sched_waking,
89 	     TP_PROTO(struct task_struct *p),
90 	     TP_ARGS(p));
91 
92 /*
93  * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
94  * It it not always called from the waking context.
95  */
96 DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
97 	     TP_PROTO(struct task_struct *p),
98 	     TP_ARGS(p));
99 
100 /*
101  * Tracepoint for waking up a new task:
102  */
103 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
104 	     TP_PROTO(struct task_struct *p),
105 	     TP_ARGS(p));
106 
107 #ifdef CREATE_TRACE_POINTS
108 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
109 {
110 	unsigned int state;
111 
112 #ifdef CONFIG_SCHED_DEBUG
113 	BUG_ON(p != current);
114 #endif /* CONFIG_SCHED_DEBUG */
115 
116 	/*
117 	 * Preemption ignores task state, therefore preempted tasks are always
118 	 * RUNNING (we will not have dequeued if state != RUNNING).
119 	 */
120 	if (preempt)
121 		return TASK_REPORT_MAX;
122 
123 	/*
124 	 * task_state_index() uses fls() and returns a value from 0-8 range.
125 	 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
126 	 * it for left shift operation to get the correct task->state
127 	 * mapping.
128 	 */
129 	state = task_state_index(p);
130 
131 	return state ? (1 << (state - 1)) : state;
132 }
133 #endif /* CREATE_TRACE_POINTS */
134 
135 /*
136  * Tracepoint for task switches, performed by the scheduler:
137  */
138 TRACE_EVENT(sched_switch,
139 
140 	TP_PROTO(bool preempt,
141 		 struct task_struct *prev,
142 		 struct task_struct *next),
143 
144 	TP_ARGS(preempt, prev, next),
145 
146 	TP_STRUCT__entry(
147 		__array(	char,	prev_comm,	TASK_COMM_LEN	)
148 		__field(	pid_t,	prev_pid			)
149 		__field(	int,	prev_prio			)
150 		__field(	long,	prev_state			)
151 		__array(	char,	next_comm,	TASK_COMM_LEN	)
152 		__field(	pid_t,	next_pid			)
153 		__field(	int,	next_prio			)
154 	),
155 
156 	TP_fast_assign(
157 		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
158 		__entry->prev_pid	= prev->pid;
159 		__entry->prev_prio	= prev->prio;
160 		__entry->prev_state	= __trace_sched_switch_state(preempt, prev);
161 		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
162 		__entry->next_pid	= next->pid;
163 		__entry->next_prio	= next->prio;
164 		/* XXX SCHED_DEADLINE */
165 	),
166 
167 	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
168 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
169 
170 		(__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
171 		  __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
172 				{ TASK_INTERRUPTIBLE, "S" },
173 				{ TASK_UNINTERRUPTIBLE, "D" },
174 				{ __TASK_STOPPED, "T" },
175 				{ __TASK_TRACED, "t" },
176 				{ EXIT_DEAD, "X" },
177 				{ EXIT_ZOMBIE, "Z" },
178 				{ TASK_PARKED, "P" },
179 				{ TASK_DEAD, "I" }) :
180 		  "R",
181 
182 		__entry->prev_state & TASK_REPORT_MAX ? "+" : "",
183 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
184 );
185 
186 /*
187  * Tracepoint for a task being migrated:
188  */
189 TRACE_EVENT(sched_migrate_task,
190 
191 	TP_PROTO(struct task_struct *p, int dest_cpu),
192 
193 	TP_ARGS(p, dest_cpu),
194 
195 	TP_STRUCT__entry(
196 		__array(	char,	comm,	TASK_COMM_LEN	)
197 		__field(	pid_t,	pid			)
198 		__field(	int,	prio			)
199 		__field(	int,	orig_cpu		)
200 		__field(	int,	dest_cpu		)
201 	),
202 
203 	TP_fast_assign(
204 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
205 		__entry->pid		= p->pid;
206 		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
207 		__entry->orig_cpu	= task_cpu(p);
208 		__entry->dest_cpu	= dest_cpu;
209 	),
210 
211 	TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
212 		  __entry->comm, __entry->pid, __entry->prio,
213 		  __entry->orig_cpu, __entry->dest_cpu)
214 );
215 
216 DECLARE_EVENT_CLASS(sched_process_template,
217 
218 	TP_PROTO(struct task_struct *p),
219 
220 	TP_ARGS(p),
221 
222 	TP_STRUCT__entry(
223 		__array(	char,	comm,	TASK_COMM_LEN	)
224 		__field(	pid_t,	pid			)
225 		__field(	int,	prio			)
226 	),
227 
228 	TP_fast_assign(
229 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
230 		__entry->pid		= p->pid;
231 		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
232 	),
233 
234 	TP_printk("comm=%s pid=%d prio=%d",
235 		  __entry->comm, __entry->pid, __entry->prio)
236 );
237 
238 /*
239  * Tracepoint for freeing a task:
240  */
241 DEFINE_EVENT(sched_process_template, sched_process_free,
242 	     TP_PROTO(struct task_struct *p),
243 	     TP_ARGS(p));
244 
245 
246 /*
247  * Tracepoint for a task exiting:
248  */
249 DEFINE_EVENT(sched_process_template, sched_process_exit,
250 	     TP_PROTO(struct task_struct *p),
251 	     TP_ARGS(p));
252 
253 /*
254  * Tracepoint for waiting on task to unschedule:
255  */
256 DEFINE_EVENT(sched_process_template, sched_wait_task,
257 	TP_PROTO(struct task_struct *p),
258 	TP_ARGS(p));
259 
260 /*
261  * Tracepoint for a waiting task:
262  */
263 TRACE_EVENT(sched_process_wait,
264 
265 	TP_PROTO(struct pid *pid),
266 
267 	TP_ARGS(pid),
268 
269 	TP_STRUCT__entry(
270 		__array(	char,	comm,	TASK_COMM_LEN	)
271 		__field(	pid_t,	pid			)
272 		__field(	int,	prio			)
273 	),
274 
275 	TP_fast_assign(
276 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
277 		__entry->pid		= pid_nr(pid);
278 		__entry->prio		= current->prio; /* XXX SCHED_DEADLINE */
279 	),
280 
281 	TP_printk("comm=%s pid=%d prio=%d",
282 		  __entry->comm, __entry->pid, __entry->prio)
283 );
284 
285 /*
286  * Tracepoint for do_fork:
287  */
288 TRACE_EVENT(sched_process_fork,
289 
290 	TP_PROTO(struct task_struct *parent, struct task_struct *child),
291 
292 	TP_ARGS(parent, child),
293 
294 	TP_STRUCT__entry(
295 		__array(	char,	parent_comm,	TASK_COMM_LEN	)
296 		__field(	pid_t,	parent_pid			)
297 		__array(	char,	child_comm,	TASK_COMM_LEN	)
298 		__field(	pid_t,	child_pid			)
299 	),
300 
301 	TP_fast_assign(
302 		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
303 		__entry->parent_pid	= parent->pid;
304 		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
305 		__entry->child_pid	= child->pid;
306 	),
307 
308 	TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
309 		__entry->parent_comm, __entry->parent_pid,
310 		__entry->child_comm, __entry->child_pid)
311 );
312 
313 /*
314  * Tracepoint for exec:
315  */
316 TRACE_EVENT(sched_process_exec,
317 
318 	TP_PROTO(struct task_struct *p, pid_t old_pid,
319 		 struct linux_binprm *bprm),
320 
321 	TP_ARGS(p, old_pid, bprm),
322 
323 	TP_STRUCT__entry(
324 		__string(	filename,	bprm->filename	)
325 		__field(	pid_t,		pid		)
326 		__field(	pid_t,		old_pid		)
327 	),
328 
329 	TP_fast_assign(
330 		__assign_str(filename, bprm->filename);
331 		__entry->pid		= p->pid;
332 		__entry->old_pid	= old_pid;
333 	),
334 
335 	TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
336 		  __entry->pid, __entry->old_pid)
337 );
338 
339 /*
340  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
341  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
342  */
343 DECLARE_EVENT_CLASS(sched_stat_template,
344 
345 	TP_PROTO(struct task_struct *tsk, u64 delay),
346 
347 	TP_ARGS(__perf_task(tsk), __perf_count(delay)),
348 
349 	TP_STRUCT__entry(
350 		__array( char,	comm,	TASK_COMM_LEN	)
351 		__field( pid_t,	pid			)
352 		__field( u64,	delay			)
353 	),
354 
355 	TP_fast_assign(
356 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
357 		__entry->pid	= tsk->pid;
358 		__entry->delay	= delay;
359 	),
360 
361 	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
362 			__entry->comm, __entry->pid,
363 			(unsigned long long)__entry->delay)
364 );
365 
366 
367 /*
368  * Tracepoint for accounting wait time (time the task is runnable
369  * but not actually running due to scheduler contention).
370  */
371 DEFINE_EVENT(sched_stat_template, sched_stat_wait,
372 	     TP_PROTO(struct task_struct *tsk, u64 delay),
373 	     TP_ARGS(tsk, delay));
374 
375 /*
376  * Tracepoint for accounting sleep time (time the task is not runnable,
377  * including iowait, see below).
378  */
379 DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
380 	     TP_PROTO(struct task_struct *tsk, u64 delay),
381 	     TP_ARGS(tsk, delay));
382 
383 /*
384  * Tracepoint for accounting iowait time (time the task is not runnable
385  * due to waiting on IO to complete).
386  */
387 DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
388 	     TP_PROTO(struct task_struct *tsk, u64 delay),
389 	     TP_ARGS(tsk, delay));
390 
391 /*
392  * Tracepoint for accounting blocked time (time the task is in uninterruptible).
393  */
394 DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
395 	     TP_PROTO(struct task_struct *tsk, u64 delay),
396 	     TP_ARGS(tsk, delay));
397 
398 /*
399  * Tracepoint for accounting runtime (time the task is executing
400  * on a CPU).
401  */
402 DECLARE_EVENT_CLASS(sched_stat_runtime,
403 
404 	TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
405 
406 	TP_ARGS(tsk, __perf_count(runtime), vruntime),
407 
408 	TP_STRUCT__entry(
409 		__array( char,	comm,	TASK_COMM_LEN	)
410 		__field( pid_t,	pid			)
411 		__field( u64,	runtime			)
412 		__field( u64,	vruntime			)
413 	),
414 
415 	TP_fast_assign(
416 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
417 		__entry->pid		= tsk->pid;
418 		__entry->runtime	= runtime;
419 		__entry->vruntime	= vruntime;
420 	),
421 
422 	TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
423 			__entry->comm, __entry->pid,
424 			(unsigned long long)__entry->runtime,
425 			(unsigned long long)__entry->vruntime)
426 );
427 
428 DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
429 	     TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
430 	     TP_ARGS(tsk, runtime, vruntime));
431 
432 /*
433  * Tracepoint for showing priority inheritance modifying a tasks
434  * priority.
435  */
436 TRACE_EVENT(sched_pi_setprio,
437 
438 	TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
439 
440 	TP_ARGS(tsk, pi_task),
441 
442 	TP_STRUCT__entry(
443 		__array( char,	comm,	TASK_COMM_LEN	)
444 		__field( pid_t,	pid			)
445 		__field( int,	oldprio			)
446 		__field( int,	newprio			)
447 	),
448 
449 	TP_fast_assign(
450 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
451 		__entry->pid		= tsk->pid;
452 		__entry->oldprio	= tsk->prio;
453 		__entry->newprio	= pi_task ?
454 				min(tsk->normal_prio, pi_task->prio) :
455 				tsk->normal_prio;
456 		/* XXX SCHED_DEADLINE bits missing */
457 	),
458 
459 	TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
460 			__entry->comm, __entry->pid,
461 			__entry->oldprio, __entry->newprio)
462 );
463 
464 #ifdef CONFIG_DETECT_HUNG_TASK
465 TRACE_EVENT(sched_process_hang,
466 	TP_PROTO(struct task_struct *tsk),
467 	TP_ARGS(tsk),
468 
469 	TP_STRUCT__entry(
470 		__array( char,	comm,	TASK_COMM_LEN	)
471 		__field( pid_t,	pid			)
472 	),
473 
474 	TP_fast_assign(
475 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
476 		__entry->pid = tsk->pid;
477 	),
478 
479 	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
480 );
481 #endif /* CONFIG_DETECT_HUNG_TASK */
482 
483 DECLARE_EVENT_CLASS(sched_move_task_template,
484 
485 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
486 
487 	TP_ARGS(tsk, src_cpu, dst_cpu),
488 
489 	TP_STRUCT__entry(
490 		__field( pid_t,	pid			)
491 		__field( pid_t,	tgid			)
492 		__field( pid_t,	ngid			)
493 		__field( int,	src_cpu			)
494 		__field( int,	src_nid			)
495 		__field( int,	dst_cpu			)
496 		__field( int,	dst_nid			)
497 	),
498 
499 	TP_fast_assign(
500 		__entry->pid		= task_pid_nr(tsk);
501 		__entry->tgid		= task_tgid_nr(tsk);
502 		__entry->ngid		= task_numa_group_id(tsk);
503 		__entry->src_cpu	= src_cpu;
504 		__entry->src_nid	= cpu_to_node(src_cpu);
505 		__entry->dst_cpu	= dst_cpu;
506 		__entry->dst_nid	= cpu_to_node(dst_cpu);
507 	),
508 
509 	TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
510 			__entry->pid, __entry->tgid, __entry->ngid,
511 			__entry->src_cpu, __entry->src_nid,
512 			__entry->dst_cpu, __entry->dst_nid)
513 );
514 
515 /*
516  * Tracks migration of tasks from one runqueue to another. Can be used to
517  * detect if automatic NUMA balancing is bouncing between nodes
518  */
519 DEFINE_EVENT(sched_move_task_template, sched_move_numa,
520 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
521 
522 	TP_ARGS(tsk, src_cpu, dst_cpu)
523 );
524 
525 DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
526 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
527 
528 	TP_ARGS(tsk, src_cpu, dst_cpu)
529 );
530 
531 TRACE_EVENT(sched_swap_numa,
532 
533 	TP_PROTO(struct task_struct *src_tsk, int src_cpu,
534 		 struct task_struct *dst_tsk, int dst_cpu),
535 
536 	TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
537 
538 	TP_STRUCT__entry(
539 		__field( pid_t,	src_pid			)
540 		__field( pid_t,	src_tgid		)
541 		__field( pid_t,	src_ngid		)
542 		__field( int,	src_cpu			)
543 		__field( int,	src_nid			)
544 		__field( pid_t,	dst_pid			)
545 		__field( pid_t,	dst_tgid		)
546 		__field( pid_t,	dst_ngid		)
547 		__field( int,	dst_cpu			)
548 		__field( int,	dst_nid			)
549 	),
550 
551 	TP_fast_assign(
552 		__entry->src_pid	= task_pid_nr(src_tsk);
553 		__entry->src_tgid	= task_tgid_nr(src_tsk);
554 		__entry->src_ngid	= task_numa_group_id(src_tsk);
555 		__entry->src_cpu	= src_cpu;
556 		__entry->src_nid	= cpu_to_node(src_cpu);
557 		__entry->dst_pid	= task_pid_nr(dst_tsk);
558 		__entry->dst_tgid	= task_tgid_nr(dst_tsk);
559 		__entry->dst_ngid	= task_numa_group_id(dst_tsk);
560 		__entry->dst_cpu	= dst_cpu;
561 		__entry->dst_nid	= cpu_to_node(dst_cpu);
562 	),
563 
564 	TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
565 			__entry->src_pid, __entry->src_tgid, __entry->src_ngid,
566 			__entry->src_cpu, __entry->src_nid,
567 			__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
568 			__entry->dst_cpu, __entry->dst_nid)
569 );
570 
571 /*
572  * Tracepoint for waking a polling cpu without an IPI.
573  */
574 TRACE_EVENT(sched_wake_idle_without_ipi,
575 
576 	TP_PROTO(int cpu),
577 
578 	TP_ARGS(cpu),
579 
580 	TP_STRUCT__entry(
581 		__field(	int,	cpu	)
582 	),
583 
584 	TP_fast_assign(
585 		__entry->cpu	= cpu;
586 	),
587 
588 	TP_printk("cpu=%d", __entry->cpu)
589 );
590 #endif /* _TRACE_SCHED_H */
591 
592 /* This part must be outside protection */
593 #include <trace/define_trace.h>
594