1 #undef TRACE_SYSTEM 2 #define TRACE_SYSTEM sched 3 4 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) 5 #define _TRACE_SCHED_H 6 7 #include <linux/sched/numa_balancing.h> 8 #include <linux/tracepoint.h> 9 #include <linux/binfmts.h> 10 11 /* 12 * Tracepoint for calling kthread_stop, performed to end a kthread: 13 */ 14 TRACE_EVENT(sched_kthread_stop, 15 16 TP_PROTO(struct task_struct *t), 17 18 TP_ARGS(t), 19 20 TP_STRUCT__entry( 21 __array( char, comm, TASK_COMM_LEN ) 22 __field( pid_t, pid ) 23 ), 24 25 TP_fast_assign( 26 memcpy(__entry->comm, t->comm, TASK_COMM_LEN); 27 __entry->pid = t->pid; 28 ), 29 30 TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) 31 ); 32 33 /* 34 * Tracepoint for the return value of the kthread stopping: 35 */ 36 TRACE_EVENT(sched_kthread_stop_ret, 37 38 TP_PROTO(int ret), 39 40 TP_ARGS(ret), 41 42 TP_STRUCT__entry( 43 __field( int, ret ) 44 ), 45 46 TP_fast_assign( 47 __entry->ret = ret; 48 ), 49 50 TP_printk("ret=%d", __entry->ret) 51 ); 52 53 /* 54 * Tracepoint for waking up a task: 55 */ 56 DECLARE_EVENT_CLASS(sched_wakeup_template, 57 58 TP_PROTO(struct task_struct *p), 59 60 TP_ARGS(__perf_task(p)), 61 62 TP_STRUCT__entry( 63 __array( char, comm, TASK_COMM_LEN ) 64 __field( pid_t, pid ) 65 __field( int, prio ) 66 __field( int, success ) 67 __field( int, target_cpu ) 68 ), 69 70 TP_fast_assign( 71 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 72 __entry->pid = p->pid; 73 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 74 __entry->success = 1; /* rudiment, kill when possible */ 75 __entry->target_cpu = task_cpu(p); 76 ), 77 78 TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d", 79 __entry->comm, __entry->pid, __entry->prio, 80 __entry->target_cpu) 81 ); 82 83 /* 84 * Tracepoint called when waking a task; this tracepoint is guaranteed to be 85 * called from the waking context. 86 */ 87 DEFINE_EVENT(sched_wakeup_template, sched_waking, 88 TP_PROTO(struct task_struct *p), 89 TP_ARGS(p)); 90 91 /* 92 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. 93 * It it not always called from the waking context. 94 */ 95 DEFINE_EVENT(sched_wakeup_template, sched_wakeup, 96 TP_PROTO(struct task_struct *p), 97 TP_ARGS(p)); 98 99 /* 100 * Tracepoint for waking up a new task: 101 */ 102 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, 103 TP_PROTO(struct task_struct *p), 104 TP_ARGS(p)); 105 106 #ifdef CREATE_TRACE_POINTS 107 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) 108 { 109 #ifdef CONFIG_SCHED_DEBUG 110 BUG_ON(p != current); 111 #endif /* CONFIG_SCHED_DEBUG */ 112 113 /* 114 * Preemption ignores task state, therefore preempted tasks are always 115 * RUNNING (we will not have dequeued if state != RUNNING). 116 */ 117 return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state; 118 } 119 #endif /* CREATE_TRACE_POINTS */ 120 121 /* 122 * Tracepoint for task switches, performed by the scheduler: 123 */ 124 TRACE_EVENT(sched_switch, 125 126 TP_PROTO(bool preempt, 127 struct task_struct *prev, 128 struct task_struct *next), 129 130 TP_ARGS(preempt, prev, next), 131 132 TP_STRUCT__entry( 133 __array( char, prev_comm, TASK_COMM_LEN ) 134 __field( pid_t, prev_pid ) 135 __field( int, prev_prio ) 136 __field( long, prev_state ) 137 __array( char, next_comm, TASK_COMM_LEN ) 138 __field( pid_t, next_pid ) 139 __field( int, next_prio ) 140 ), 141 142 TP_fast_assign( 143 memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); 144 __entry->prev_pid = prev->pid; 145 __entry->prev_prio = prev->prio; 146 __entry->prev_state = __trace_sched_switch_state(preempt, prev); 147 memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); 148 __entry->next_pid = next->pid; 149 __entry->next_prio = next->prio; 150 /* XXX SCHED_DEADLINE */ 151 ), 152 153 TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", 154 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, 155 __entry->prev_state & (TASK_STATE_MAX-1) ? 156 __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", 157 { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, 158 { 16, "Z" }, { 32, "X" }, { 64, "x" }, 159 { 128, "K" }, { 256, "W" }, { 512, "P" }, 160 { 1024, "N" }) : "R", 161 __entry->prev_state & TASK_STATE_MAX ? "+" : "", 162 __entry->next_comm, __entry->next_pid, __entry->next_prio) 163 ); 164 165 /* 166 * Tracepoint for a task being migrated: 167 */ 168 TRACE_EVENT(sched_migrate_task, 169 170 TP_PROTO(struct task_struct *p, int dest_cpu), 171 172 TP_ARGS(p, dest_cpu), 173 174 TP_STRUCT__entry( 175 __array( char, comm, TASK_COMM_LEN ) 176 __field( pid_t, pid ) 177 __field( int, prio ) 178 __field( int, orig_cpu ) 179 __field( int, dest_cpu ) 180 ), 181 182 TP_fast_assign( 183 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 184 __entry->pid = p->pid; 185 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 186 __entry->orig_cpu = task_cpu(p); 187 __entry->dest_cpu = dest_cpu; 188 ), 189 190 TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", 191 __entry->comm, __entry->pid, __entry->prio, 192 __entry->orig_cpu, __entry->dest_cpu) 193 ); 194 195 DECLARE_EVENT_CLASS(sched_process_template, 196 197 TP_PROTO(struct task_struct *p), 198 199 TP_ARGS(p), 200 201 TP_STRUCT__entry( 202 __array( char, comm, TASK_COMM_LEN ) 203 __field( pid_t, pid ) 204 __field( int, prio ) 205 ), 206 207 TP_fast_assign( 208 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 209 __entry->pid = p->pid; 210 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 211 ), 212 213 TP_printk("comm=%s pid=%d prio=%d", 214 __entry->comm, __entry->pid, __entry->prio) 215 ); 216 217 /* 218 * Tracepoint for freeing a task: 219 */ 220 DEFINE_EVENT(sched_process_template, sched_process_free, 221 TP_PROTO(struct task_struct *p), 222 TP_ARGS(p)); 223 224 225 /* 226 * Tracepoint for a task exiting: 227 */ 228 DEFINE_EVENT(sched_process_template, sched_process_exit, 229 TP_PROTO(struct task_struct *p), 230 TP_ARGS(p)); 231 232 /* 233 * Tracepoint for waiting on task to unschedule: 234 */ 235 DEFINE_EVENT(sched_process_template, sched_wait_task, 236 TP_PROTO(struct task_struct *p), 237 TP_ARGS(p)); 238 239 /* 240 * Tracepoint for a waiting task: 241 */ 242 TRACE_EVENT(sched_process_wait, 243 244 TP_PROTO(struct pid *pid), 245 246 TP_ARGS(pid), 247 248 TP_STRUCT__entry( 249 __array( char, comm, TASK_COMM_LEN ) 250 __field( pid_t, pid ) 251 __field( int, prio ) 252 ), 253 254 TP_fast_assign( 255 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 256 __entry->pid = pid_nr(pid); 257 __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ 258 ), 259 260 TP_printk("comm=%s pid=%d prio=%d", 261 __entry->comm, __entry->pid, __entry->prio) 262 ); 263 264 /* 265 * Tracepoint for do_fork: 266 */ 267 TRACE_EVENT(sched_process_fork, 268 269 TP_PROTO(struct task_struct *parent, struct task_struct *child), 270 271 TP_ARGS(parent, child), 272 273 TP_STRUCT__entry( 274 __array( char, parent_comm, TASK_COMM_LEN ) 275 __field( pid_t, parent_pid ) 276 __array( char, child_comm, TASK_COMM_LEN ) 277 __field( pid_t, child_pid ) 278 ), 279 280 TP_fast_assign( 281 memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); 282 __entry->parent_pid = parent->pid; 283 memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); 284 __entry->child_pid = child->pid; 285 ), 286 287 TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", 288 __entry->parent_comm, __entry->parent_pid, 289 __entry->child_comm, __entry->child_pid) 290 ); 291 292 /* 293 * Tracepoint for exec: 294 */ 295 TRACE_EVENT(sched_process_exec, 296 297 TP_PROTO(struct task_struct *p, pid_t old_pid, 298 struct linux_binprm *bprm), 299 300 TP_ARGS(p, old_pid, bprm), 301 302 TP_STRUCT__entry( 303 __string( filename, bprm->filename ) 304 __field( pid_t, pid ) 305 __field( pid_t, old_pid ) 306 ), 307 308 TP_fast_assign( 309 __assign_str(filename, bprm->filename); 310 __entry->pid = p->pid; 311 __entry->old_pid = old_pid; 312 ), 313 314 TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), 315 __entry->pid, __entry->old_pid) 316 ); 317 318 /* 319 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE 320 * adding sched_stat support to SCHED_FIFO/RR would be welcome. 321 */ 322 DECLARE_EVENT_CLASS(sched_stat_template, 323 324 TP_PROTO(struct task_struct *tsk, u64 delay), 325 326 TP_ARGS(__perf_task(tsk), __perf_count(delay)), 327 328 TP_STRUCT__entry( 329 __array( char, comm, TASK_COMM_LEN ) 330 __field( pid_t, pid ) 331 __field( u64, delay ) 332 ), 333 334 TP_fast_assign( 335 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 336 __entry->pid = tsk->pid; 337 __entry->delay = delay; 338 ), 339 340 TP_printk("comm=%s pid=%d delay=%Lu [ns]", 341 __entry->comm, __entry->pid, 342 (unsigned long long)__entry->delay) 343 ); 344 345 346 /* 347 * Tracepoint for accounting wait time (time the task is runnable 348 * but not actually running due to scheduler contention). 349 */ 350 DEFINE_EVENT(sched_stat_template, sched_stat_wait, 351 TP_PROTO(struct task_struct *tsk, u64 delay), 352 TP_ARGS(tsk, delay)); 353 354 /* 355 * Tracepoint for accounting sleep time (time the task is not runnable, 356 * including iowait, see below). 357 */ 358 DEFINE_EVENT(sched_stat_template, sched_stat_sleep, 359 TP_PROTO(struct task_struct *tsk, u64 delay), 360 TP_ARGS(tsk, delay)); 361 362 /* 363 * Tracepoint for accounting iowait time (time the task is not runnable 364 * due to waiting on IO to complete). 365 */ 366 DEFINE_EVENT(sched_stat_template, sched_stat_iowait, 367 TP_PROTO(struct task_struct *tsk, u64 delay), 368 TP_ARGS(tsk, delay)); 369 370 /* 371 * Tracepoint for accounting blocked time (time the task is in uninterruptible). 372 */ 373 DEFINE_EVENT(sched_stat_template, sched_stat_blocked, 374 TP_PROTO(struct task_struct *tsk, u64 delay), 375 TP_ARGS(tsk, delay)); 376 377 /* 378 * Tracepoint for accounting runtime (time the task is executing 379 * on a CPU). 380 */ 381 DECLARE_EVENT_CLASS(sched_stat_runtime, 382 383 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), 384 385 TP_ARGS(tsk, __perf_count(runtime), vruntime), 386 387 TP_STRUCT__entry( 388 __array( char, comm, TASK_COMM_LEN ) 389 __field( pid_t, pid ) 390 __field( u64, runtime ) 391 __field( u64, vruntime ) 392 ), 393 394 TP_fast_assign( 395 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 396 __entry->pid = tsk->pid; 397 __entry->runtime = runtime; 398 __entry->vruntime = vruntime; 399 ), 400 401 TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", 402 __entry->comm, __entry->pid, 403 (unsigned long long)__entry->runtime, 404 (unsigned long long)__entry->vruntime) 405 ); 406 407 DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, 408 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), 409 TP_ARGS(tsk, runtime, vruntime)); 410 411 /* 412 * Tracepoint for showing priority inheritance modifying a tasks 413 * priority. 414 */ 415 TRACE_EVENT(sched_pi_setprio, 416 417 TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), 418 419 TP_ARGS(tsk, pi_task), 420 421 TP_STRUCT__entry( 422 __array( char, comm, TASK_COMM_LEN ) 423 __field( pid_t, pid ) 424 __field( int, oldprio ) 425 __field( int, newprio ) 426 ), 427 428 TP_fast_assign( 429 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 430 __entry->pid = tsk->pid; 431 __entry->oldprio = tsk->prio; 432 __entry->newprio = pi_task ? pi_task->prio : tsk->prio; 433 /* XXX SCHED_DEADLINE bits missing */ 434 ), 435 436 TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", 437 __entry->comm, __entry->pid, 438 __entry->oldprio, __entry->newprio) 439 ); 440 441 #ifdef CONFIG_DETECT_HUNG_TASK 442 TRACE_EVENT(sched_process_hang, 443 TP_PROTO(struct task_struct *tsk), 444 TP_ARGS(tsk), 445 446 TP_STRUCT__entry( 447 __array( char, comm, TASK_COMM_LEN ) 448 __field( pid_t, pid ) 449 ), 450 451 TP_fast_assign( 452 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 453 __entry->pid = tsk->pid; 454 ), 455 456 TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) 457 ); 458 #endif /* CONFIG_DETECT_HUNG_TASK */ 459 460 DECLARE_EVENT_CLASS(sched_move_task_template, 461 462 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 463 464 TP_ARGS(tsk, src_cpu, dst_cpu), 465 466 TP_STRUCT__entry( 467 __field( pid_t, pid ) 468 __field( pid_t, tgid ) 469 __field( pid_t, ngid ) 470 __field( int, src_cpu ) 471 __field( int, src_nid ) 472 __field( int, dst_cpu ) 473 __field( int, dst_nid ) 474 ), 475 476 TP_fast_assign( 477 __entry->pid = task_pid_nr(tsk); 478 __entry->tgid = task_tgid_nr(tsk); 479 __entry->ngid = task_numa_group_id(tsk); 480 __entry->src_cpu = src_cpu; 481 __entry->src_nid = cpu_to_node(src_cpu); 482 __entry->dst_cpu = dst_cpu; 483 __entry->dst_nid = cpu_to_node(dst_cpu); 484 ), 485 486 TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d", 487 __entry->pid, __entry->tgid, __entry->ngid, 488 __entry->src_cpu, __entry->src_nid, 489 __entry->dst_cpu, __entry->dst_nid) 490 ); 491 492 /* 493 * Tracks migration of tasks from one runqueue to another. Can be used to 494 * detect if automatic NUMA balancing is bouncing between nodes 495 */ 496 DEFINE_EVENT(sched_move_task_template, sched_move_numa, 497 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 498 499 TP_ARGS(tsk, src_cpu, dst_cpu) 500 ); 501 502 DEFINE_EVENT(sched_move_task_template, sched_stick_numa, 503 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 504 505 TP_ARGS(tsk, src_cpu, dst_cpu) 506 ); 507 508 TRACE_EVENT(sched_swap_numa, 509 510 TP_PROTO(struct task_struct *src_tsk, int src_cpu, 511 struct task_struct *dst_tsk, int dst_cpu), 512 513 TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu), 514 515 TP_STRUCT__entry( 516 __field( pid_t, src_pid ) 517 __field( pid_t, src_tgid ) 518 __field( pid_t, src_ngid ) 519 __field( int, src_cpu ) 520 __field( int, src_nid ) 521 __field( pid_t, dst_pid ) 522 __field( pid_t, dst_tgid ) 523 __field( pid_t, dst_ngid ) 524 __field( int, dst_cpu ) 525 __field( int, dst_nid ) 526 ), 527 528 TP_fast_assign( 529 __entry->src_pid = task_pid_nr(src_tsk); 530 __entry->src_tgid = task_tgid_nr(src_tsk); 531 __entry->src_ngid = task_numa_group_id(src_tsk); 532 __entry->src_cpu = src_cpu; 533 __entry->src_nid = cpu_to_node(src_cpu); 534 __entry->dst_pid = task_pid_nr(dst_tsk); 535 __entry->dst_tgid = task_tgid_nr(dst_tsk); 536 __entry->dst_ngid = task_numa_group_id(dst_tsk); 537 __entry->dst_cpu = dst_cpu; 538 __entry->dst_nid = cpu_to_node(dst_cpu); 539 ), 540 541 TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d", 542 __entry->src_pid, __entry->src_tgid, __entry->src_ngid, 543 __entry->src_cpu, __entry->src_nid, 544 __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid, 545 __entry->dst_cpu, __entry->dst_nid) 546 ); 547 548 /* 549 * Tracepoint for waking a polling cpu without an IPI. 550 */ 551 TRACE_EVENT(sched_wake_idle_without_ipi, 552 553 TP_PROTO(int cpu), 554 555 TP_ARGS(cpu), 556 557 TP_STRUCT__entry( 558 __field( int, cpu ) 559 ), 560 561 TP_fast_assign( 562 __entry->cpu = cpu; 563 ), 564 565 TP_printk("cpu=%d", __entry->cpu) 566 ); 567 #endif /* _TRACE_SCHED_H */ 568 569 /* This part must be outside protection */ 570 #include <trace/define_trace.h> 571