1 #undef TRACE_SYSTEM 2 #define TRACE_SYSTEM sched 3 4 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) 5 #define _TRACE_SCHED_H 6 7 #include <linux/sched/numa_balancing.h> 8 #include <linux/tracepoint.h> 9 #include <linux/binfmts.h> 10 11 /* 12 * Tracepoint for calling kthread_stop, performed to end a kthread: 13 */ 14 TRACE_EVENT(sched_kthread_stop, 15 16 TP_PROTO(struct task_struct *t), 17 18 TP_ARGS(t), 19 20 TP_STRUCT__entry( 21 __array( char, comm, TASK_COMM_LEN ) 22 __field( pid_t, pid ) 23 ), 24 25 TP_fast_assign( 26 memcpy(__entry->comm, t->comm, TASK_COMM_LEN); 27 __entry->pid = t->pid; 28 ), 29 30 TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) 31 ); 32 33 /* 34 * Tracepoint for the return value of the kthread stopping: 35 */ 36 TRACE_EVENT(sched_kthread_stop_ret, 37 38 TP_PROTO(int ret), 39 40 TP_ARGS(ret), 41 42 TP_STRUCT__entry( 43 __field( int, ret ) 44 ), 45 46 TP_fast_assign( 47 __entry->ret = ret; 48 ), 49 50 TP_printk("ret=%d", __entry->ret) 51 ); 52 53 /* 54 * Tracepoint for waking up a task: 55 */ 56 DECLARE_EVENT_CLASS(sched_wakeup_template, 57 58 TP_PROTO(struct task_struct *p), 59 60 TP_ARGS(__perf_task(p)), 61 62 TP_STRUCT__entry( 63 __array( char, comm, TASK_COMM_LEN ) 64 __field( pid_t, pid ) 65 __field( int, prio ) 66 __field( int, success ) 67 __field( int, target_cpu ) 68 ), 69 70 TP_fast_assign( 71 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 72 __entry->pid = p->pid; 73 __entry->prio = p->prio; 74 __entry->success = 1; /* rudiment, kill when possible */ 75 __entry->target_cpu = task_cpu(p); 76 ), 77 78 TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d", 79 __entry->comm, __entry->pid, __entry->prio, 80 __entry->target_cpu) 81 ); 82 83 /* 84 * Tracepoint called when waking a task; this tracepoint is guaranteed to be 85 * called from the waking context. 86 */ 87 DEFINE_EVENT(sched_wakeup_template, sched_waking, 88 TP_PROTO(struct task_struct *p), 89 TP_ARGS(p)); 90 91 /* 92 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. 93 * It it not always called from the waking context. 94 */ 95 DEFINE_EVENT(sched_wakeup_template, sched_wakeup, 96 TP_PROTO(struct task_struct *p), 97 TP_ARGS(p)); 98 99 /* 100 * Tracepoint for waking up a new task: 101 */ 102 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, 103 TP_PROTO(struct task_struct *p), 104 TP_ARGS(p)); 105 106 #ifdef CREATE_TRACE_POINTS 107 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) 108 { 109 #ifdef CONFIG_SCHED_DEBUG 110 BUG_ON(p != current); 111 #endif /* CONFIG_SCHED_DEBUG */ 112 113 /* 114 * Preemption ignores task state, therefore preempted tasks are always 115 * RUNNING (we will not have dequeued if state != RUNNING). 116 */ 117 return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state; 118 } 119 #endif /* CREATE_TRACE_POINTS */ 120 121 /* 122 * Tracepoint for task switches, performed by the scheduler: 123 */ 124 TRACE_EVENT(sched_switch, 125 126 TP_PROTO(bool preempt, 127 struct task_struct *prev, 128 struct task_struct *next), 129 130 TP_ARGS(preempt, prev, next), 131 132 TP_STRUCT__entry( 133 __array( char, prev_comm, TASK_COMM_LEN ) 134 __field( pid_t, prev_pid ) 135 __field( int, prev_prio ) 136 __field( long, prev_state ) 137 __array( char, next_comm, TASK_COMM_LEN ) 138 __field( pid_t, next_pid ) 139 __field( int, next_prio ) 140 ), 141 142 TP_fast_assign( 143 memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); 144 __entry->prev_pid = prev->pid; 145 __entry->prev_prio = prev->prio; 146 __entry->prev_state = __trace_sched_switch_state(preempt, prev); 147 memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); 148 __entry->next_pid = next->pid; 149 __entry->next_prio = next->prio; 150 ), 151 152 TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", 153 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, 154 __entry->prev_state & (TASK_STATE_MAX-1) ? 155 __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", 156 { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, 157 { 16, "Z" }, { 32, "X" }, { 64, "x" }, 158 { 128, "K" }, { 256, "W" }, { 512, "P" }, 159 { 1024, "N" }) : "R", 160 __entry->prev_state & TASK_STATE_MAX ? "+" : "", 161 __entry->next_comm, __entry->next_pid, __entry->next_prio) 162 ); 163 164 /* 165 * Tracepoint for a task being migrated: 166 */ 167 TRACE_EVENT(sched_migrate_task, 168 169 TP_PROTO(struct task_struct *p, int dest_cpu), 170 171 TP_ARGS(p, dest_cpu), 172 173 TP_STRUCT__entry( 174 __array( char, comm, TASK_COMM_LEN ) 175 __field( pid_t, pid ) 176 __field( int, prio ) 177 __field( int, orig_cpu ) 178 __field( int, dest_cpu ) 179 ), 180 181 TP_fast_assign( 182 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 183 __entry->pid = p->pid; 184 __entry->prio = p->prio; 185 __entry->orig_cpu = task_cpu(p); 186 __entry->dest_cpu = dest_cpu; 187 ), 188 189 TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", 190 __entry->comm, __entry->pid, __entry->prio, 191 __entry->orig_cpu, __entry->dest_cpu) 192 ); 193 194 DECLARE_EVENT_CLASS(sched_process_template, 195 196 TP_PROTO(struct task_struct *p), 197 198 TP_ARGS(p), 199 200 TP_STRUCT__entry( 201 __array( char, comm, TASK_COMM_LEN ) 202 __field( pid_t, pid ) 203 __field( int, prio ) 204 ), 205 206 TP_fast_assign( 207 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 208 __entry->pid = p->pid; 209 __entry->prio = p->prio; 210 ), 211 212 TP_printk("comm=%s pid=%d prio=%d", 213 __entry->comm, __entry->pid, __entry->prio) 214 ); 215 216 /* 217 * Tracepoint for freeing a task: 218 */ 219 DEFINE_EVENT(sched_process_template, sched_process_free, 220 TP_PROTO(struct task_struct *p), 221 TP_ARGS(p)); 222 223 224 /* 225 * Tracepoint for a task exiting: 226 */ 227 DEFINE_EVENT(sched_process_template, sched_process_exit, 228 TP_PROTO(struct task_struct *p), 229 TP_ARGS(p)); 230 231 /* 232 * Tracepoint for waiting on task to unschedule: 233 */ 234 DEFINE_EVENT(sched_process_template, sched_wait_task, 235 TP_PROTO(struct task_struct *p), 236 TP_ARGS(p)); 237 238 /* 239 * Tracepoint for a waiting task: 240 */ 241 TRACE_EVENT(sched_process_wait, 242 243 TP_PROTO(struct pid *pid), 244 245 TP_ARGS(pid), 246 247 TP_STRUCT__entry( 248 __array( char, comm, TASK_COMM_LEN ) 249 __field( pid_t, pid ) 250 __field( int, prio ) 251 ), 252 253 TP_fast_assign( 254 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 255 __entry->pid = pid_nr(pid); 256 __entry->prio = current->prio; 257 ), 258 259 TP_printk("comm=%s pid=%d prio=%d", 260 __entry->comm, __entry->pid, __entry->prio) 261 ); 262 263 /* 264 * Tracepoint for do_fork: 265 */ 266 TRACE_EVENT(sched_process_fork, 267 268 TP_PROTO(struct task_struct *parent, struct task_struct *child), 269 270 TP_ARGS(parent, child), 271 272 TP_STRUCT__entry( 273 __array( char, parent_comm, TASK_COMM_LEN ) 274 __field( pid_t, parent_pid ) 275 __array( char, child_comm, TASK_COMM_LEN ) 276 __field( pid_t, child_pid ) 277 ), 278 279 TP_fast_assign( 280 memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); 281 __entry->parent_pid = parent->pid; 282 memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); 283 __entry->child_pid = child->pid; 284 ), 285 286 TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", 287 __entry->parent_comm, __entry->parent_pid, 288 __entry->child_comm, __entry->child_pid) 289 ); 290 291 /* 292 * Tracepoint for exec: 293 */ 294 TRACE_EVENT(sched_process_exec, 295 296 TP_PROTO(struct task_struct *p, pid_t old_pid, 297 struct linux_binprm *bprm), 298 299 TP_ARGS(p, old_pid, bprm), 300 301 TP_STRUCT__entry( 302 __string( filename, bprm->filename ) 303 __field( pid_t, pid ) 304 __field( pid_t, old_pid ) 305 ), 306 307 TP_fast_assign( 308 __assign_str(filename, bprm->filename); 309 __entry->pid = p->pid; 310 __entry->old_pid = old_pid; 311 ), 312 313 TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), 314 __entry->pid, __entry->old_pid) 315 ); 316 317 /* 318 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE 319 * adding sched_stat support to SCHED_FIFO/RR would be welcome. 320 */ 321 DECLARE_EVENT_CLASS(sched_stat_template, 322 323 TP_PROTO(struct task_struct *tsk, u64 delay), 324 325 TP_ARGS(__perf_task(tsk), __perf_count(delay)), 326 327 TP_STRUCT__entry( 328 __array( char, comm, TASK_COMM_LEN ) 329 __field( pid_t, pid ) 330 __field( u64, delay ) 331 ), 332 333 TP_fast_assign( 334 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 335 __entry->pid = tsk->pid; 336 __entry->delay = delay; 337 ), 338 339 TP_printk("comm=%s pid=%d delay=%Lu [ns]", 340 __entry->comm, __entry->pid, 341 (unsigned long long)__entry->delay) 342 ); 343 344 345 /* 346 * Tracepoint for accounting wait time (time the task is runnable 347 * but not actually running due to scheduler contention). 348 */ 349 DEFINE_EVENT(sched_stat_template, sched_stat_wait, 350 TP_PROTO(struct task_struct *tsk, u64 delay), 351 TP_ARGS(tsk, delay)); 352 353 /* 354 * Tracepoint for accounting sleep time (time the task is not runnable, 355 * including iowait, see below). 356 */ 357 DEFINE_EVENT(sched_stat_template, sched_stat_sleep, 358 TP_PROTO(struct task_struct *tsk, u64 delay), 359 TP_ARGS(tsk, delay)); 360 361 /* 362 * Tracepoint for accounting iowait time (time the task is not runnable 363 * due to waiting on IO to complete). 364 */ 365 DEFINE_EVENT(sched_stat_template, sched_stat_iowait, 366 TP_PROTO(struct task_struct *tsk, u64 delay), 367 TP_ARGS(tsk, delay)); 368 369 /* 370 * Tracepoint for accounting blocked time (time the task is in uninterruptible). 371 */ 372 DEFINE_EVENT(sched_stat_template, sched_stat_blocked, 373 TP_PROTO(struct task_struct *tsk, u64 delay), 374 TP_ARGS(tsk, delay)); 375 376 /* 377 * Tracepoint for accounting runtime (time the task is executing 378 * on a CPU). 379 */ 380 DECLARE_EVENT_CLASS(sched_stat_runtime, 381 382 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), 383 384 TP_ARGS(tsk, __perf_count(runtime), vruntime), 385 386 TP_STRUCT__entry( 387 __array( char, comm, TASK_COMM_LEN ) 388 __field( pid_t, pid ) 389 __field( u64, runtime ) 390 __field( u64, vruntime ) 391 ), 392 393 TP_fast_assign( 394 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 395 __entry->pid = tsk->pid; 396 __entry->runtime = runtime; 397 __entry->vruntime = vruntime; 398 ), 399 400 TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", 401 __entry->comm, __entry->pid, 402 (unsigned long long)__entry->runtime, 403 (unsigned long long)__entry->vruntime) 404 ); 405 406 DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, 407 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), 408 TP_ARGS(tsk, runtime, vruntime)); 409 410 /* 411 * Tracepoint for showing priority inheritance modifying a tasks 412 * priority. 413 */ 414 TRACE_EVENT(sched_pi_setprio, 415 416 TP_PROTO(struct task_struct *tsk, int newprio), 417 418 TP_ARGS(tsk, newprio), 419 420 TP_STRUCT__entry( 421 __array( char, comm, TASK_COMM_LEN ) 422 __field( pid_t, pid ) 423 __field( int, oldprio ) 424 __field( int, newprio ) 425 ), 426 427 TP_fast_assign( 428 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 429 __entry->pid = tsk->pid; 430 __entry->oldprio = tsk->prio; 431 __entry->newprio = newprio; 432 ), 433 434 TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", 435 __entry->comm, __entry->pid, 436 __entry->oldprio, __entry->newprio) 437 ); 438 439 #ifdef CONFIG_DETECT_HUNG_TASK 440 TRACE_EVENT(sched_process_hang, 441 TP_PROTO(struct task_struct *tsk), 442 TP_ARGS(tsk), 443 444 TP_STRUCT__entry( 445 __array( char, comm, TASK_COMM_LEN ) 446 __field( pid_t, pid ) 447 ), 448 449 TP_fast_assign( 450 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 451 __entry->pid = tsk->pid; 452 ), 453 454 TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) 455 ); 456 #endif /* CONFIG_DETECT_HUNG_TASK */ 457 458 DECLARE_EVENT_CLASS(sched_move_task_template, 459 460 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 461 462 TP_ARGS(tsk, src_cpu, dst_cpu), 463 464 TP_STRUCT__entry( 465 __field( pid_t, pid ) 466 __field( pid_t, tgid ) 467 __field( pid_t, ngid ) 468 __field( int, src_cpu ) 469 __field( int, src_nid ) 470 __field( int, dst_cpu ) 471 __field( int, dst_nid ) 472 ), 473 474 TP_fast_assign( 475 __entry->pid = task_pid_nr(tsk); 476 __entry->tgid = task_tgid_nr(tsk); 477 __entry->ngid = task_numa_group_id(tsk); 478 __entry->src_cpu = src_cpu; 479 __entry->src_nid = cpu_to_node(src_cpu); 480 __entry->dst_cpu = dst_cpu; 481 __entry->dst_nid = cpu_to_node(dst_cpu); 482 ), 483 484 TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d", 485 __entry->pid, __entry->tgid, __entry->ngid, 486 __entry->src_cpu, __entry->src_nid, 487 __entry->dst_cpu, __entry->dst_nid) 488 ); 489 490 /* 491 * Tracks migration of tasks from one runqueue to another. Can be used to 492 * detect if automatic NUMA balancing is bouncing between nodes 493 */ 494 DEFINE_EVENT(sched_move_task_template, sched_move_numa, 495 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 496 497 TP_ARGS(tsk, src_cpu, dst_cpu) 498 ); 499 500 DEFINE_EVENT(sched_move_task_template, sched_stick_numa, 501 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 502 503 TP_ARGS(tsk, src_cpu, dst_cpu) 504 ); 505 506 TRACE_EVENT(sched_swap_numa, 507 508 TP_PROTO(struct task_struct *src_tsk, int src_cpu, 509 struct task_struct *dst_tsk, int dst_cpu), 510 511 TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu), 512 513 TP_STRUCT__entry( 514 __field( pid_t, src_pid ) 515 __field( pid_t, src_tgid ) 516 __field( pid_t, src_ngid ) 517 __field( int, src_cpu ) 518 __field( int, src_nid ) 519 __field( pid_t, dst_pid ) 520 __field( pid_t, dst_tgid ) 521 __field( pid_t, dst_ngid ) 522 __field( int, dst_cpu ) 523 __field( int, dst_nid ) 524 ), 525 526 TP_fast_assign( 527 __entry->src_pid = task_pid_nr(src_tsk); 528 __entry->src_tgid = task_tgid_nr(src_tsk); 529 __entry->src_ngid = task_numa_group_id(src_tsk); 530 __entry->src_cpu = src_cpu; 531 __entry->src_nid = cpu_to_node(src_cpu); 532 __entry->dst_pid = task_pid_nr(dst_tsk); 533 __entry->dst_tgid = task_tgid_nr(dst_tsk); 534 __entry->dst_ngid = task_numa_group_id(dst_tsk); 535 __entry->dst_cpu = dst_cpu; 536 __entry->dst_nid = cpu_to_node(dst_cpu); 537 ), 538 539 TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d", 540 __entry->src_pid, __entry->src_tgid, __entry->src_ngid, 541 __entry->src_cpu, __entry->src_nid, 542 __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid, 543 __entry->dst_cpu, __entry->dst_nid) 544 ); 545 546 /* 547 * Tracepoint for waking a polling cpu without an IPI. 548 */ 549 TRACE_EVENT(sched_wake_idle_without_ipi, 550 551 TP_PROTO(int cpu), 552 553 TP_ARGS(cpu), 554 555 TP_STRUCT__entry( 556 __field( int, cpu ) 557 ), 558 559 TP_fast_assign( 560 __entry->cpu = cpu; 561 ), 562 563 TP_printk("cpu=%d", __entry->cpu) 564 ); 565 #endif /* _TRACE_SCHED_H */ 566 567 /* This part must be outside protection */ 568 #include <trace/define_trace.h> 569