1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/evlist.h" 6 #include "util/cache.h" 7 #include "util/evsel.h" 8 #include "util/symbol.h" 9 #include "util/thread.h" 10 #include "util/header.h" 11 #include "util/session.h" 12 #include "util/tool.h" 13 14 #include "util/parse-options.h" 15 #include "util/trace-event.h" 16 17 #include "util/debug.h" 18 19 #include <sys/prctl.h> 20 21 #include <semaphore.h> 22 #include <pthread.h> 23 #include <math.h> 24 25 static const char *input_name; 26 27 static char default_sort_order[] = "avg, max, switch, runtime"; 28 static const char *sort_order = default_sort_order; 29 30 static int profile_cpu = -1; 31 32 #define PR_SET_NAME 15 /* Set process name */ 33 #define MAX_CPUS 4096 34 35 static u64 run_measurement_overhead; 36 static u64 sleep_measurement_overhead; 37 38 #define COMM_LEN 20 39 #define SYM_LEN 129 40 41 #define MAX_PID 65536 42 43 static unsigned long nr_tasks; 44 45 struct sched_atom; 46 47 struct task_desc { 48 unsigned long nr; 49 unsigned long pid; 50 char comm[COMM_LEN]; 51 52 unsigned long nr_events; 53 unsigned long curr_event; 54 struct sched_atom **atoms; 55 56 pthread_t thread; 57 sem_t sleep_sem; 58 59 sem_t ready_for_work; 60 sem_t work_done_sem; 61 62 u64 cpu_usage; 63 }; 64 65 enum sched_event_type { 66 SCHED_EVENT_RUN, 67 SCHED_EVENT_SLEEP, 68 SCHED_EVENT_WAKEUP, 69 SCHED_EVENT_MIGRATION, 70 }; 71 72 struct sched_atom { 73 enum sched_event_type type; 74 int specific_wait; 75 u64 timestamp; 76 u64 duration; 77 unsigned long nr; 78 sem_t *wait_sem; 79 struct task_desc *wakee; 80 }; 81 82 static struct task_desc *pid_to_task[MAX_PID]; 83 84 static struct task_desc **tasks; 85 86 static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; 87 static u64 start_time; 88 89 static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; 90 91 static unsigned long nr_run_events; 92 static unsigned long nr_sleep_events; 93 static unsigned long nr_wakeup_events; 94 95 static unsigned long nr_sleep_corrections; 96 static unsigned long nr_run_events_optimized; 97 98 static unsigned long targetless_wakeups; 99 static unsigned long multitarget_wakeups; 100 101 static u64 cpu_usage; 102 static u64 runavg_cpu_usage; 103 static u64 parent_cpu_usage; 104 static u64 runavg_parent_cpu_usage; 105 106 static unsigned long nr_runs; 107 static u64 sum_runtime; 108 static u64 sum_fluct; 109 static u64 run_avg; 110 111 static unsigned int replay_repeat = 10; 112 static unsigned long nr_timestamps; 113 static unsigned long nr_unordered_timestamps; 114 static unsigned long nr_state_machine_bugs; 115 static unsigned long nr_context_switch_bugs; 116 static unsigned long nr_events; 117 static unsigned long nr_lost_chunks; 118 static unsigned long nr_lost_events; 119 120 #define TASK_STATE_TO_CHAR_STR "RSDTtZX" 121 122 enum thread_state { 123 THREAD_SLEEPING = 0, 124 THREAD_WAIT_CPU, 125 THREAD_SCHED_IN, 126 THREAD_IGNORE 127 }; 128 129 struct work_atom { 130 struct list_head list; 131 enum thread_state state; 132 u64 sched_out_time; 133 u64 wake_up_time; 134 u64 sched_in_time; 135 u64 runtime; 136 }; 137 138 struct work_atoms { 139 struct list_head work_list; 140 struct thread *thread; 141 struct rb_node node; 142 u64 max_lat; 143 u64 max_lat_at; 144 u64 total_lat; 145 u64 nb_atoms; 146 u64 total_runtime; 147 }; 148 149 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 150 151 static struct rb_root atom_root, sorted_atom_root; 152 153 static u64 all_runtime; 154 static u64 all_count; 155 156 157 static u64 get_nsecs(void) 158 { 159 struct timespec ts; 160 161 clock_gettime(CLOCK_MONOTONIC, &ts); 162 163 return ts.tv_sec * 1000000000ULL + ts.tv_nsec; 164 } 165 166 static void burn_nsecs(u64 nsecs) 167 { 168 u64 T0 = get_nsecs(), T1; 169 170 do { 171 T1 = get_nsecs(); 172 } while (T1 + run_measurement_overhead < T0 + nsecs); 173 } 174 175 static void sleep_nsecs(u64 nsecs) 176 { 177 struct timespec ts; 178 179 ts.tv_nsec = nsecs % 999999999; 180 ts.tv_sec = nsecs / 999999999; 181 182 nanosleep(&ts, NULL); 183 } 184 185 static void calibrate_run_measurement_overhead(void) 186 { 187 u64 T0, T1, delta, min_delta = 1000000000ULL; 188 int i; 189 190 for (i = 0; i < 10; i++) { 191 T0 = get_nsecs(); 192 burn_nsecs(0); 193 T1 = get_nsecs(); 194 delta = T1-T0; 195 min_delta = min(min_delta, delta); 196 } 197 run_measurement_overhead = min_delta; 198 199 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 200 } 201 202 static void calibrate_sleep_measurement_overhead(void) 203 { 204 u64 T0, T1, delta, min_delta = 1000000000ULL; 205 int i; 206 207 for (i = 0; i < 10; i++) { 208 T0 = get_nsecs(); 209 sleep_nsecs(10000); 210 T1 = get_nsecs(); 211 delta = T1-T0; 212 min_delta = min(min_delta, delta); 213 } 214 min_delta -= 10000; 215 sleep_measurement_overhead = min_delta; 216 217 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 218 } 219 220 static struct sched_atom * 221 get_new_event(struct task_desc *task, u64 timestamp) 222 { 223 struct sched_atom *event = zalloc(sizeof(*event)); 224 unsigned long idx = task->nr_events; 225 size_t size; 226 227 event->timestamp = timestamp; 228 event->nr = idx; 229 230 task->nr_events++; 231 size = sizeof(struct sched_atom *) * task->nr_events; 232 task->atoms = realloc(task->atoms, size); 233 BUG_ON(!task->atoms); 234 235 task->atoms[idx] = event; 236 237 return event; 238 } 239 240 static struct sched_atom *last_event(struct task_desc *task) 241 { 242 if (!task->nr_events) 243 return NULL; 244 245 return task->atoms[task->nr_events - 1]; 246 } 247 248 static void 249 add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) 250 { 251 struct sched_atom *event, *curr_event = last_event(task); 252 253 /* 254 * optimize an existing RUN event by merging this one 255 * to it: 256 */ 257 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 258 nr_run_events_optimized++; 259 curr_event->duration += duration; 260 return; 261 } 262 263 event = get_new_event(task, timestamp); 264 265 event->type = SCHED_EVENT_RUN; 266 event->duration = duration; 267 268 nr_run_events++; 269 } 270 271 static void 272 add_sched_event_wakeup(struct task_desc *task, u64 timestamp, 273 struct task_desc *wakee) 274 { 275 struct sched_atom *event, *wakee_event; 276 277 event = get_new_event(task, timestamp); 278 event->type = SCHED_EVENT_WAKEUP; 279 event->wakee = wakee; 280 281 wakee_event = last_event(wakee); 282 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 283 targetless_wakeups++; 284 return; 285 } 286 if (wakee_event->wait_sem) { 287 multitarget_wakeups++; 288 return; 289 } 290 291 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 292 sem_init(wakee_event->wait_sem, 0, 0); 293 wakee_event->specific_wait = 1; 294 event->wait_sem = wakee_event->wait_sem; 295 296 nr_wakeup_events++; 297 } 298 299 static void 300 add_sched_event_sleep(struct task_desc *task, u64 timestamp, 301 u64 task_state __used) 302 { 303 struct sched_atom *event = get_new_event(task, timestamp); 304 305 event->type = SCHED_EVENT_SLEEP; 306 307 nr_sleep_events++; 308 } 309 310 static struct task_desc *register_pid(unsigned long pid, const char *comm) 311 { 312 struct task_desc *task; 313 314 BUG_ON(pid >= MAX_PID); 315 316 task = pid_to_task[pid]; 317 318 if (task) 319 return task; 320 321 task = zalloc(sizeof(*task)); 322 task->pid = pid; 323 task->nr = nr_tasks; 324 strcpy(task->comm, comm); 325 /* 326 * every task starts in sleeping state - this gets ignored 327 * if there's no wakeup pointing to this sleep state: 328 */ 329 add_sched_event_sleep(task, 0, 0); 330 331 pid_to_task[pid] = task; 332 nr_tasks++; 333 tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *)); 334 BUG_ON(!tasks); 335 tasks[task->nr] = task; 336 337 if (verbose) 338 printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm); 339 340 return task; 341 } 342 343 344 static void print_task_traces(void) 345 { 346 struct task_desc *task; 347 unsigned long i; 348 349 for (i = 0; i < nr_tasks; i++) { 350 task = tasks[i]; 351 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 352 task->nr, task->comm, task->pid, task->nr_events); 353 } 354 } 355 356 static void add_cross_task_wakeups(void) 357 { 358 struct task_desc *task1, *task2; 359 unsigned long i, j; 360 361 for (i = 0; i < nr_tasks; i++) { 362 task1 = tasks[i]; 363 j = i + 1; 364 if (j == nr_tasks) 365 j = 0; 366 task2 = tasks[j]; 367 add_sched_event_wakeup(task1, 0, task2); 368 } 369 } 370 371 static void 372 process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) 373 { 374 int ret = 0; 375 376 switch (atom->type) { 377 case SCHED_EVENT_RUN: 378 burn_nsecs(atom->duration); 379 break; 380 case SCHED_EVENT_SLEEP: 381 if (atom->wait_sem) 382 ret = sem_wait(atom->wait_sem); 383 BUG_ON(ret); 384 break; 385 case SCHED_EVENT_WAKEUP: 386 if (atom->wait_sem) 387 ret = sem_post(atom->wait_sem); 388 BUG_ON(ret); 389 break; 390 case SCHED_EVENT_MIGRATION: 391 break; 392 default: 393 BUG_ON(1); 394 } 395 } 396 397 static u64 get_cpu_usage_nsec_parent(void) 398 { 399 struct rusage ru; 400 u64 sum; 401 int err; 402 403 err = getrusage(RUSAGE_SELF, &ru); 404 BUG_ON(err); 405 406 sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3; 407 sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3; 408 409 return sum; 410 } 411 412 static int self_open_counters(void) 413 { 414 struct perf_event_attr attr; 415 int fd; 416 417 memset(&attr, 0, sizeof(attr)); 418 419 attr.type = PERF_TYPE_SOFTWARE; 420 attr.config = PERF_COUNT_SW_TASK_CLOCK; 421 422 fd = sys_perf_event_open(&attr, 0, -1, -1, 0); 423 424 if (fd < 0) 425 die("Error: sys_perf_event_open() syscall returned" 426 "with %d (%s)\n", fd, strerror(errno)); 427 return fd; 428 } 429 430 static u64 get_cpu_usage_nsec_self(int fd) 431 { 432 u64 runtime; 433 int ret; 434 435 ret = read(fd, &runtime, sizeof(runtime)); 436 BUG_ON(ret != sizeof(runtime)); 437 438 return runtime; 439 } 440 441 static void *thread_func(void *ctx) 442 { 443 struct task_desc *this_task = ctx; 444 u64 cpu_usage_0, cpu_usage_1; 445 unsigned long i, ret; 446 char comm2[22]; 447 int fd; 448 449 sprintf(comm2, ":%s", this_task->comm); 450 prctl(PR_SET_NAME, comm2); 451 fd = self_open_counters(); 452 453 again: 454 ret = sem_post(&this_task->ready_for_work); 455 BUG_ON(ret); 456 ret = pthread_mutex_lock(&start_work_mutex); 457 BUG_ON(ret); 458 ret = pthread_mutex_unlock(&start_work_mutex); 459 BUG_ON(ret); 460 461 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 462 463 for (i = 0; i < this_task->nr_events; i++) { 464 this_task->curr_event = i; 465 process_sched_event(this_task, this_task->atoms[i]); 466 } 467 468 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 469 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 470 ret = sem_post(&this_task->work_done_sem); 471 BUG_ON(ret); 472 473 ret = pthread_mutex_lock(&work_done_wait_mutex); 474 BUG_ON(ret); 475 ret = pthread_mutex_unlock(&work_done_wait_mutex); 476 BUG_ON(ret); 477 478 goto again; 479 } 480 481 static void create_tasks(void) 482 { 483 struct task_desc *task; 484 pthread_attr_t attr; 485 unsigned long i; 486 int err; 487 488 err = pthread_attr_init(&attr); 489 BUG_ON(err); 490 err = pthread_attr_setstacksize(&attr, 491 (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); 492 BUG_ON(err); 493 err = pthread_mutex_lock(&start_work_mutex); 494 BUG_ON(err); 495 err = pthread_mutex_lock(&work_done_wait_mutex); 496 BUG_ON(err); 497 for (i = 0; i < nr_tasks; i++) { 498 task = tasks[i]; 499 sem_init(&task->sleep_sem, 0, 0); 500 sem_init(&task->ready_for_work, 0, 0); 501 sem_init(&task->work_done_sem, 0, 0); 502 task->curr_event = 0; 503 err = pthread_create(&task->thread, &attr, thread_func, task); 504 BUG_ON(err); 505 } 506 } 507 508 static void wait_for_tasks(void) 509 { 510 u64 cpu_usage_0, cpu_usage_1; 511 struct task_desc *task; 512 unsigned long i, ret; 513 514 start_time = get_nsecs(); 515 cpu_usage = 0; 516 pthread_mutex_unlock(&work_done_wait_mutex); 517 518 for (i = 0; i < nr_tasks; i++) { 519 task = tasks[i]; 520 ret = sem_wait(&task->ready_for_work); 521 BUG_ON(ret); 522 sem_init(&task->ready_for_work, 0, 0); 523 } 524 ret = pthread_mutex_lock(&work_done_wait_mutex); 525 BUG_ON(ret); 526 527 cpu_usage_0 = get_cpu_usage_nsec_parent(); 528 529 pthread_mutex_unlock(&start_work_mutex); 530 531 for (i = 0; i < nr_tasks; i++) { 532 task = tasks[i]; 533 ret = sem_wait(&task->work_done_sem); 534 BUG_ON(ret); 535 sem_init(&task->work_done_sem, 0, 0); 536 cpu_usage += task->cpu_usage; 537 task->cpu_usage = 0; 538 } 539 540 cpu_usage_1 = get_cpu_usage_nsec_parent(); 541 if (!runavg_cpu_usage) 542 runavg_cpu_usage = cpu_usage; 543 runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10; 544 545 parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 546 if (!runavg_parent_cpu_usage) 547 runavg_parent_cpu_usage = parent_cpu_usage; 548 runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 + 549 parent_cpu_usage)/10; 550 551 ret = pthread_mutex_lock(&start_work_mutex); 552 BUG_ON(ret); 553 554 for (i = 0; i < nr_tasks; i++) { 555 task = tasks[i]; 556 sem_init(&task->sleep_sem, 0, 0); 557 task->curr_event = 0; 558 } 559 } 560 561 static void run_one_test(void) 562 { 563 u64 T0, T1, delta, avg_delta, fluct; 564 565 T0 = get_nsecs(); 566 wait_for_tasks(); 567 T1 = get_nsecs(); 568 569 delta = T1 - T0; 570 sum_runtime += delta; 571 nr_runs++; 572 573 avg_delta = sum_runtime / nr_runs; 574 if (delta < avg_delta) 575 fluct = avg_delta - delta; 576 else 577 fluct = delta - avg_delta; 578 sum_fluct += fluct; 579 if (!run_avg) 580 run_avg = delta; 581 run_avg = (run_avg*9 + delta)/10; 582 583 printf("#%-3ld: %0.3f, ", 584 nr_runs, (double)delta/1000000.0); 585 586 printf("ravg: %0.2f, ", 587 (double)run_avg/1e6); 588 589 printf("cpu: %0.2f / %0.2f", 590 (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6); 591 592 #if 0 593 /* 594 * rusage statistics done by the parent, these are less 595 * accurate than the sum_exec_runtime based statistics: 596 */ 597 printf(" [%0.2f / %0.2f]", 598 (double)parent_cpu_usage/1e6, 599 (double)runavg_parent_cpu_usage/1e6); 600 #endif 601 602 printf("\n"); 603 604 if (nr_sleep_corrections) 605 printf(" (%ld sleep corrections)\n", nr_sleep_corrections); 606 nr_sleep_corrections = 0; 607 } 608 609 static void test_calibrations(void) 610 { 611 u64 T0, T1; 612 613 T0 = get_nsecs(); 614 burn_nsecs(1e6); 615 T1 = get_nsecs(); 616 617 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 618 619 T0 = get_nsecs(); 620 sleep_nsecs(1e6); 621 T1 = get_nsecs(); 622 623 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 624 } 625 626 #define FILL_FIELD(ptr, field, event, data) \ 627 ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) 628 629 #define FILL_ARRAY(ptr, array, event, data) \ 630 do { \ 631 void *__array = raw_field_ptr(event, #array, data); \ 632 memcpy(ptr.array, __array, sizeof(ptr.array)); \ 633 } while(0) 634 635 #define FILL_COMMON_FIELDS(ptr, event, data) \ 636 do { \ 637 FILL_FIELD(ptr, common_type, event, data); \ 638 FILL_FIELD(ptr, common_flags, event, data); \ 639 FILL_FIELD(ptr, common_preempt_count, event, data); \ 640 FILL_FIELD(ptr, common_pid, event, data); \ 641 FILL_FIELD(ptr, common_tgid, event, data); \ 642 } while (0) 643 644 645 646 struct trace_switch_event { 647 u32 size; 648 649 u16 common_type; 650 u8 common_flags; 651 u8 common_preempt_count; 652 u32 common_pid; 653 u32 common_tgid; 654 655 char prev_comm[16]; 656 u32 prev_pid; 657 u32 prev_prio; 658 u64 prev_state; 659 char next_comm[16]; 660 u32 next_pid; 661 u32 next_prio; 662 }; 663 664 struct trace_runtime_event { 665 u32 size; 666 667 u16 common_type; 668 u8 common_flags; 669 u8 common_preempt_count; 670 u32 common_pid; 671 u32 common_tgid; 672 673 char comm[16]; 674 u32 pid; 675 u64 runtime; 676 u64 vruntime; 677 }; 678 679 struct trace_wakeup_event { 680 u32 size; 681 682 u16 common_type; 683 u8 common_flags; 684 u8 common_preempt_count; 685 u32 common_pid; 686 u32 common_tgid; 687 688 char comm[16]; 689 u32 pid; 690 691 u32 prio; 692 u32 success; 693 u32 cpu; 694 }; 695 696 struct trace_fork_event { 697 u32 size; 698 699 u16 common_type; 700 u8 common_flags; 701 u8 common_preempt_count; 702 u32 common_pid; 703 u32 common_tgid; 704 705 char parent_comm[16]; 706 u32 parent_pid; 707 char child_comm[16]; 708 u32 child_pid; 709 }; 710 711 struct trace_migrate_task_event { 712 u32 size; 713 714 u16 common_type; 715 u8 common_flags; 716 u8 common_preempt_count; 717 u32 common_pid; 718 u32 common_tgid; 719 720 char comm[16]; 721 u32 pid; 722 723 u32 prio; 724 u32 cpu; 725 }; 726 727 struct trace_sched_handler { 728 void (*switch_event)(struct trace_switch_event *, 729 struct machine *, 730 struct event *, 731 int cpu, 732 u64 timestamp, 733 struct thread *thread); 734 735 void (*runtime_event)(struct trace_runtime_event *, 736 struct machine *, 737 struct event *, 738 int cpu, 739 u64 timestamp, 740 struct thread *thread); 741 742 void (*wakeup_event)(struct trace_wakeup_event *, 743 struct machine *, 744 struct event *, 745 int cpu, 746 u64 timestamp, 747 struct thread *thread); 748 749 void (*fork_event)(struct trace_fork_event *, 750 struct event *, 751 int cpu, 752 u64 timestamp, 753 struct thread *thread); 754 755 void (*migrate_task_event)(struct trace_migrate_task_event *, 756 struct machine *machine, 757 struct event *, 758 int cpu, 759 u64 timestamp, 760 struct thread *thread); 761 }; 762 763 764 static void 765 replay_wakeup_event(struct trace_wakeup_event *wakeup_event, 766 struct machine *machine __used, 767 struct event *event, 768 int cpu __used, 769 u64 timestamp __used, 770 struct thread *thread __used) 771 { 772 struct task_desc *waker, *wakee; 773 774 if (verbose) { 775 printf("sched_wakeup event %p\n", event); 776 777 printf(" ... pid %d woke up %s/%d\n", 778 wakeup_event->common_pid, 779 wakeup_event->comm, 780 wakeup_event->pid); 781 } 782 783 waker = register_pid(wakeup_event->common_pid, "<unknown>"); 784 wakee = register_pid(wakeup_event->pid, wakeup_event->comm); 785 786 add_sched_event_wakeup(waker, timestamp, wakee); 787 } 788 789 static u64 cpu_last_switched[MAX_CPUS]; 790 791 static void 792 replay_switch_event(struct trace_switch_event *switch_event, 793 struct machine *machine __used, 794 struct event *event, 795 int cpu, 796 u64 timestamp, 797 struct thread *thread __used) 798 { 799 struct task_desc *prev, __used *next; 800 u64 timestamp0; 801 s64 delta; 802 803 if (verbose) 804 printf("sched_switch event %p\n", event); 805 806 if (cpu >= MAX_CPUS || cpu < 0) 807 return; 808 809 timestamp0 = cpu_last_switched[cpu]; 810 if (timestamp0) 811 delta = timestamp - timestamp0; 812 else 813 delta = 0; 814 815 if (delta < 0) 816 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 817 818 if (verbose) { 819 printf(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 820 switch_event->prev_comm, switch_event->prev_pid, 821 switch_event->next_comm, switch_event->next_pid, 822 delta); 823 } 824 825 prev = register_pid(switch_event->prev_pid, switch_event->prev_comm); 826 next = register_pid(switch_event->next_pid, switch_event->next_comm); 827 828 cpu_last_switched[cpu] = timestamp; 829 830 add_sched_event_run(prev, timestamp, delta); 831 add_sched_event_sleep(prev, timestamp, switch_event->prev_state); 832 } 833 834 835 static void 836 replay_fork_event(struct trace_fork_event *fork_event, 837 struct event *event, 838 int cpu __used, 839 u64 timestamp __used, 840 struct thread *thread __used) 841 { 842 if (verbose) { 843 printf("sched_fork event %p\n", event); 844 printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); 845 printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); 846 } 847 register_pid(fork_event->parent_pid, fork_event->parent_comm); 848 register_pid(fork_event->child_pid, fork_event->child_comm); 849 } 850 851 static struct trace_sched_handler replay_ops = { 852 .wakeup_event = replay_wakeup_event, 853 .switch_event = replay_switch_event, 854 .fork_event = replay_fork_event, 855 }; 856 857 struct sort_dimension { 858 const char *name; 859 sort_fn_t cmp; 860 struct list_head list; 861 }; 862 863 static LIST_HEAD(cmp_pid); 864 865 static int 866 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 867 { 868 struct sort_dimension *sort; 869 int ret = 0; 870 871 BUG_ON(list_empty(list)); 872 873 list_for_each_entry(sort, list, list) { 874 ret = sort->cmp(l, r); 875 if (ret) 876 return ret; 877 } 878 879 return ret; 880 } 881 882 static struct work_atoms * 883 thread_atoms_search(struct rb_root *root, struct thread *thread, 884 struct list_head *sort_list) 885 { 886 struct rb_node *node = root->rb_node; 887 struct work_atoms key = { .thread = thread }; 888 889 while (node) { 890 struct work_atoms *atoms; 891 int cmp; 892 893 atoms = container_of(node, struct work_atoms, node); 894 895 cmp = thread_lat_cmp(sort_list, &key, atoms); 896 if (cmp > 0) 897 node = node->rb_left; 898 else if (cmp < 0) 899 node = node->rb_right; 900 else { 901 BUG_ON(thread != atoms->thread); 902 return atoms; 903 } 904 } 905 return NULL; 906 } 907 908 static void 909 __thread_latency_insert(struct rb_root *root, struct work_atoms *data, 910 struct list_head *sort_list) 911 { 912 struct rb_node **new = &(root->rb_node), *parent = NULL; 913 914 while (*new) { 915 struct work_atoms *this; 916 int cmp; 917 918 this = container_of(*new, struct work_atoms, node); 919 parent = *new; 920 921 cmp = thread_lat_cmp(sort_list, data, this); 922 923 if (cmp > 0) 924 new = &((*new)->rb_left); 925 else 926 new = &((*new)->rb_right); 927 } 928 929 rb_link_node(&data->node, parent, new); 930 rb_insert_color(&data->node, root); 931 } 932 933 static void thread_atoms_insert(struct thread *thread) 934 { 935 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 936 if (!atoms) 937 die("No memory"); 938 939 atoms->thread = thread; 940 INIT_LIST_HEAD(&atoms->work_list); 941 __thread_latency_insert(&atom_root, atoms, &cmp_pid); 942 } 943 944 static void 945 latency_fork_event(struct trace_fork_event *fork_event __used, 946 struct event *event __used, 947 int cpu __used, 948 u64 timestamp __used, 949 struct thread *thread __used) 950 { 951 /* should insert the newcomer */ 952 } 953 954 __used 955 static char sched_out_state(struct trace_switch_event *switch_event) 956 { 957 const char *str = TASK_STATE_TO_CHAR_STR; 958 959 return str[switch_event->prev_state]; 960 } 961 962 static void 963 add_sched_out_event(struct work_atoms *atoms, 964 char run_state, 965 u64 timestamp) 966 { 967 struct work_atom *atom = zalloc(sizeof(*atom)); 968 if (!atom) 969 die("Non memory"); 970 971 atom->sched_out_time = timestamp; 972 973 if (run_state == 'R') { 974 atom->state = THREAD_WAIT_CPU; 975 atom->wake_up_time = atom->sched_out_time; 976 } 977 978 list_add_tail(&atom->list, &atoms->work_list); 979 } 980 981 static void 982 add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) 983 { 984 struct work_atom *atom; 985 986 BUG_ON(list_empty(&atoms->work_list)); 987 988 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 989 990 atom->runtime += delta; 991 atoms->total_runtime += delta; 992 } 993 994 static void 995 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 996 { 997 struct work_atom *atom; 998 u64 delta; 999 1000 if (list_empty(&atoms->work_list)) 1001 return; 1002 1003 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1004 1005 if (atom->state != THREAD_WAIT_CPU) 1006 return; 1007 1008 if (timestamp < atom->wake_up_time) { 1009 atom->state = THREAD_IGNORE; 1010 return; 1011 } 1012 1013 atom->state = THREAD_SCHED_IN; 1014 atom->sched_in_time = timestamp; 1015 1016 delta = atom->sched_in_time - atom->wake_up_time; 1017 atoms->total_lat += delta; 1018 if (delta > atoms->max_lat) { 1019 atoms->max_lat = delta; 1020 atoms->max_lat_at = timestamp; 1021 } 1022 atoms->nb_atoms++; 1023 } 1024 1025 static void 1026 latency_switch_event(struct trace_switch_event *switch_event, 1027 struct machine *machine, 1028 struct event *event __used, 1029 int cpu, 1030 u64 timestamp, 1031 struct thread *thread __used) 1032 { 1033 struct work_atoms *out_events, *in_events; 1034 struct thread *sched_out, *sched_in; 1035 u64 timestamp0; 1036 s64 delta; 1037 1038 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1039 1040 timestamp0 = cpu_last_switched[cpu]; 1041 cpu_last_switched[cpu] = timestamp; 1042 if (timestamp0) 1043 delta = timestamp - timestamp0; 1044 else 1045 delta = 0; 1046 1047 if (delta < 0) 1048 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1049 1050 1051 sched_out = machine__findnew_thread(machine, switch_event->prev_pid); 1052 sched_in = machine__findnew_thread(machine, switch_event->next_pid); 1053 1054 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1055 if (!out_events) { 1056 thread_atoms_insert(sched_out); 1057 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1058 if (!out_events) 1059 die("out-event: Internal tree error"); 1060 } 1061 add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); 1062 1063 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1064 if (!in_events) { 1065 thread_atoms_insert(sched_in); 1066 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1067 if (!in_events) 1068 die("in-event: Internal tree error"); 1069 /* 1070 * Take came in we have not heard about yet, 1071 * add in an initial atom in runnable state: 1072 */ 1073 add_sched_out_event(in_events, 'R', timestamp); 1074 } 1075 add_sched_in_event(in_events, timestamp); 1076 } 1077 1078 static void 1079 latency_runtime_event(struct trace_runtime_event *runtime_event, 1080 struct machine *machine, 1081 struct event *event __used, 1082 int cpu, 1083 u64 timestamp, 1084 struct thread *this_thread __used) 1085 { 1086 struct thread *thread = machine__findnew_thread(machine, runtime_event->pid); 1087 struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1088 1089 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1090 if (!atoms) { 1091 thread_atoms_insert(thread); 1092 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1093 if (!atoms) 1094 die("in-event: Internal tree error"); 1095 add_sched_out_event(atoms, 'R', timestamp); 1096 } 1097 1098 add_runtime_event(atoms, runtime_event->runtime, timestamp); 1099 } 1100 1101 static void 1102 latency_wakeup_event(struct trace_wakeup_event *wakeup_event, 1103 struct machine *machine, 1104 struct event *__event __used, 1105 int cpu __used, 1106 u64 timestamp, 1107 struct thread *thread __used) 1108 { 1109 struct work_atoms *atoms; 1110 struct work_atom *atom; 1111 struct thread *wakee; 1112 1113 /* Note for later, it may be interesting to observe the failing cases */ 1114 if (!wakeup_event->success) 1115 return; 1116 1117 wakee = machine__findnew_thread(machine, wakeup_event->pid); 1118 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1119 if (!atoms) { 1120 thread_atoms_insert(wakee); 1121 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1122 if (!atoms) 1123 die("wakeup-event: Internal tree error"); 1124 add_sched_out_event(atoms, 'S', timestamp); 1125 } 1126 1127 BUG_ON(list_empty(&atoms->work_list)); 1128 1129 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1130 1131 /* 1132 * You WILL be missing events if you've recorded only 1133 * one CPU, or are only looking at only one, so don't 1134 * make useless noise. 1135 */ 1136 if (profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1137 nr_state_machine_bugs++; 1138 1139 nr_timestamps++; 1140 if (atom->sched_out_time > timestamp) { 1141 nr_unordered_timestamps++; 1142 return; 1143 } 1144 1145 atom->state = THREAD_WAIT_CPU; 1146 atom->wake_up_time = timestamp; 1147 } 1148 1149 static void 1150 latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, 1151 struct machine *machine, 1152 struct event *__event __used, 1153 int cpu __used, 1154 u64 timestamp, 1155 struct thread *thread __used) 1156 { 1157 struct work_atoms *atoms; 1158 struct work_atom *atom; 1159 struct thread *migrant; 1160 1161 /* 1162 * Only need to worry about migration when profiling one CPU. 1163 */ 1164 if (profile_cpu == -1) 1165 return; 1166 1167 migrant = machine__findnew_thread(machine, migrate_task_event->pid); 1168 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1169 if (!atoms) { 1170 thread_atoms_insert(migrant); 1171 register_pid(migrant->pid, migrant->comm); 1172 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1173 if (!atoms) 1174 die("migration-event: Internal tree error"); 1175 add_sched_out_event(atoms, 'R', timestamp); 1176 } 1177 1178 BUG_ON(list_empty(&atoms->work_list)); 1179 1180 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1181 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1182 1183 nr_timestamps++; 1184 1185 if (atom->sched_out_time > timestamp) 1186 nr_unordered_timestamps++; 1187 } 1188 1189 static struct trace_sched_handler lat_ops = { 1190 .wakeup_event = latency_wakeup_event, 1191 .switch_event = latency_switch_event, 1192 .runtime_event = latency_runtime_event, 1193 .fork_event = latency_fork_event, 1194 .migrate_task_event = latency_migrate_task_event, 1195 }; 1196 1197 static void output_lat_thread(struct work_atoms *work_list) 1198 { 1199 int i; 1200 int ret; 1201 u64 avg; 1202 1203 if (!work_list->nb_atoms) 1204 return; 1205 /* 1206 * Ignore idle threads: 1207 */ 1208 if (!strcmp(work_list->thread->comm, "swapper")) 1209 return; 1210 1211 all_runtime += work_list->total_runtime; 1212 all_count += work_list->nb_atoms; 1213 1214 ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); 1215 1216 for (i = 0; i < 24 - ret; i++) 1217 printf(" "); 1218 1219 avg = work_list->total_lat / work_list->nb_atoms; 1220 1221 printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n", 1222 (double)work_list->total_runtime / 1e6, 1223 work_list->nb_atoms, (double)avg / 1e6, 1224 (double)work_list->max_lat / 1e6, 1225 (double)work_list->max_lat_at / 1e9); 1226 } 1227 1228 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1229 { 1230 if (l->thread->pid < r->thread->pid) 1231 return -1; 1232 if (l->thread->pid > r->thread->pid) 1233 return 1; 1234 1235 return 0; 1236 } 1237 1238 static struct sort_dimension pid_sort_dimension = { 1239 .name = "pid", 1240 .cmp = pid_cmp, 1241 }; 1242 1243 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1244 { 1245 u64 avgl, avgr; 1246 1247 if (!l->nb_atoms) 1248 return -1; 1249 1250 if (!r->nb_atoms) 1251 return 1; 1252 1253 avgl = l->total_lat / l->nb_atoms; 1254 avgr = r->total_lat / r->nb_atoms; 1255 1256 if (avgl < avgr) 1257 return -1; 1258 if (avgl > avgr) 1259 return 1; 1260 1261 return 0; 1262 } 1263 1264 static struct sort_dimension avg_sort_dimension = { 1265 .name = "avg", 1266 .cmp = avg_cmp, 1267 }; 1268 1269 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1270 { 1271 if (l->max_lat < r->max_lat) 1272 return -1; 1273 if (l->max_lat > r->max_lat) 1274 return 1; 1275 1276 return 0; 1277 } 1278 1279 static struct sort_dimension max_sort_dimension = { 1280 .name = "max", 1281 .cmp = max_cmp, 1282 }; 1283 1284 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1285 { 1286 if (l->nb_atoms < r->nb_atoms) 1287 return -1; 1288 if (l->nb_atoms > r->nb_atoms) 1289 return 1; 1290 1291 return 0; 1292 } 1293 1294 static struct sort_dimension switch_sort_dimension = { 1295 .name = "switch", 1296 .cmp = switch_cmp, 1297 }; 1298 1299 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1300 { 1301 if (l->total_runtime < r->total_runtime) 1302 return -1; 1303 if (l->total_runtime > r->total_runtime) 1304 return 1; 1305 1306 return 0; 1307 } 1308 1309 static struct sort_dimension runtime_sort_dimension = { 1310 .name = "runtime", 1311 .cmp = runtime_cmp, 1312 }; 1313 1314 static struct sort_dimension *available_sorts[] = { 1315 &pid_sort_dimension, 1316 &avg_sort_dimension, 1317 &max_sort_dimension, 1318 &switch_sort_dimension, 1319 &runtime_sort_dimension, 1320 }; 1321 1322 #define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *)) 1323 1324 static LIST_HEAD(sort_list); 1325 1326 static int sort_dimension__add(const char *tok, struct list_head *list) 1327 { 1328 int i; 1329 1330 for (i = 0; i < NB_AVAILABLE_SORTS; i++) { 1331 if (!strcmp(available_sorts[i]->name, tok)) { 1332 list_add_tail(&available_sorts[i]->list, list); 1333 1334 return 0; 1335 } 1336 } 1337 1338 return -1; 1339 } 1340 1341 static void setup_sorting(void); 1342 1343 static void sort_lat(void) 1344 { 1345 struct rb_node *node; 1346 1347 for (;;) { 1348 struct work_atoms *data; 1349 node = rb_first(&atom_root); 1350 if (!node) 1351 break; 1352 1353 rb_erase(node, &atom_root); 1354 data = rb_entry(node, struct work_atoms, node); 1355 __thread_latency_insert(&sorted_atom_root, data, &sort_list); 1356 } 1357 } 1358 1359 static struct trace_sched_handler *trace_handler; 1360 1361 static void 1362 process_sched_wakeup_event(struct perf_tool *tool __used, 1363 struct event *event, 1364 struct perf_sample *sample, 1365 struct machine *machine, 1366 struct thread *thread) 1367 { 1368 void *data = sample->raw_data; 1369 struct trace_wakeup_event wakeup_event; 1370 1371 FILL_COMMON_FIELDS(wakeup_event, event, data); 1372 1373 FILL_ARRAY(wakeup_event, comm, event, data); 1374 FILL_FIELD(wakeup_event, pid, event, data); 1375 FILL_FIELD(wakeup_event, prio, event, data); 1376 FILL_FIELD(wakeup_event, success, event, data); 1377 FILL_FIELD(wakeup_event, cpu, event, data); 1378 1379 if (trace_handler->wakeup_event) 1380 trace_handler->wakeup_event(&wakeup_event, machine, event, 1381 sample->cpu, sample->time, thread); 1382 } 1383 1384 /* 1385 * Track the current task - that way we can know whether there's any 1386 * weird events, such as a task being switched away that is not current. 1387 */ 1388 static int max_cpu; 1389 1390 static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; 1391 1392 static struct thread *curr_thread[MAX_CPUS]; 1393 1394 static char next_shortname1 = 'A'; 1395 static char next_shortname2 = '0'; 1396 1397 static void 1398 map_switch_event(struct trace_switch_event *switch_event, 1399 struct machine *machine, 1400 struct event *event __used, 1401 int this_cpu, 1402 u64 timestamp, 1403 struct thread *thread __used) 1404 { 1405 struct thread *sched_out __used, *sched_in; 1406 int new_shortname; 1407 u64 timestamp0; 1408 s64 delta; 1409 int cpu; 1410 1411 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); 1412 1413 if (this_cpu > max_cpu) 1414 max_cpu = this_cpu; 1415 1416 timestamp0 = cpu_last_switched[this_cpu]; 1417 cpu_last_switched[this_cpu] = timestamp; 1418 if (timestamp0) 1419 delta = timestamp - timestamp0; 1420 else 1421 delta = 0; 1422 1423 if (delta < 0) 1424 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1425 1426 1427 sched_out = machine__findnew_thread(machine, switch_event->prev_pid); 1428 sched_in = machine__findnew_thread(machine, switch_event->next_pid); 1429 1430 curr_thread[this_cpu] = sched_in; 1431 1432 printf(" "); 1433 1434 new_shortname = 0; 1435 if (!sched_in->shortname[0]) { 1436 sched_in->shortname[0] = next_shortname1; 1437 sched_in->shortname[1] = next_shortname2; 1438 1439 if (next_shortname1 < 'Z') { 1440 next_shortname1++; 1441 } else { 1442 next_shortname1='A'; 1443 if (next_shortname2 < '9') { 1444 next_shortname2++; 1445 } else { 1446 next_shortname2='0'; 1447 } 1448 } 1449 new_shortname = 1; 1450 } 1451 1452 for (cpu = 0; cpu <= max_cpu; cpu++) { 1453 if (cpu != this_cpu) 1454 printf(" "); 1455 else 1456 printf("*"); 1457 1458 if (curr_thread[cpu]) { 1459 if (curr_thread[cpu]->pid) 1460 printf("%2s ", curr_thread[cpu]->shortname); 1461 else 1462 printf(". "); 1463 } else 1464 printf(" "); 1465 } 1466 1467 printf(" %12.6f secs ", (double)timestamp/1e9); 1468 if (new_shortname) { 1469 printf("%s => %s:%d\n", 1470 sched_in->shortname, sched_in->comm, sched_in->pid); 1471 } else { 1472 printf("\n"); 1473 } 1474 } 1475 1476 static void 1477 process_sched_switch_event(struct perf_tool *tool __used, 1478 struct event *event, 1479 struct perf_sample *sample, 1480 struct machine *machine, 1481 struct thread *thread) 1482 { 1483 int this_cpu = sample->cpu; 1484 void *data = sample->raw_data; 1485 struct trace_switch_event switch_event; 1486 1487 FILL_COMMON_FIELDS(switch_event, event, data); 1488 1489 FILL_ARRAY(switch_event, prev_comm, event, data); 1490 FILL_FIELD(switch_event, prev_pid, event, data); 1491 FILL_FIELD(switch_event, prev_prio, event, data); 1492 FILL_FIELD(switch_event, prev_state, event, data); 1493 FILL_ARRAY(switch_event, next_comm, event, data); 1494 FILL_FIELD(switch_event, next_pid, event, data); 1495 FILL_FIELD(switch_event, next_prio, event, data); 1496 1497 if (curr_pid[this_cpu] != (u32)-1) { 1498 /* 1499 * Are we trying to switch away a PID that is 1500 * not current? 1501 */ 1502 if (curr_pid[this_cpu] != switch_event.prev_pid) 1503 nr_context_switch_bugs++; 1504 } 1505 if (trace_handler->switch_event) 1506 trace_handler->switch_event(&switch_event, machine, event, 1507 this_cpu, sample->time, thread); 1508 1509 curr_pid[this_cpu] = switch_event.next_pid; 1510 } 1511 1512 static void 1513 process_sched_runtime_event(struct perf_tool *tool __used, 1514 struct event *event, 1515 struct perf_sample *sample, 1516 struct machine *machine, 1517 struct thread *thread) 1518 { 1519 void *data = sample->raw_data; 1520 struct trace_runtime_event runtime_event; 1521 1522 FILL_ARRAY(runtime_event, comm, event, data); 1523 FILL_FIELD(runtime_event, pid, event, data); 1524 FILL_FIELD(runtime_event, runtime, event, data); 1525 FILL_FIELD(runtime_event, vruntime, event, data); 1526 1527 if (trace_handler->runtime_event) 1528 trace_handler->runtime_event(&runtime_event, machine, event, 1529 sample->cpu, sample->time, thread); 1530 } 1531 1532 static void 1533 process_sched_fork_event(struct perf_tool *tool __used, 1534 struct event *event, 1535 struct perf_sample *sample, 1536 struct machine *machine __used, 1537 struct thread *thread) 1538 { 1539 void *data = sample->raw_data; 1540 struct trace_fork_event fork_event; 1541 1542 FILL_COMMON_FIELDS(fork_event, event, data); 1543 1544 FILL_ARRAY(fork_event, parent_comm, event, data); 1545 FILL_FIELD(fork_event, parent_pid, event, data); 1546 FILL_ARRAY(fork_event, child_comm, event, data); 1547 FILL_FIELD(fork_event, child_pid, event, data); 1548 1549 if (trace_handler->fork_event) 1550 trace_handler->fork_event(&fork_event, event, 1551 sample->cpu, sample->time, thread); 1552 } 1553 1554 static void 1555 process_sched_exit_event(struct perf_tool *tool __used, 1556 struct event *event, 1557 struct perf_sample *sample __used, 1558 struct machine *machine __used, 1559 struct thread *thread __used) 1560 { 1561 if (verbose) 1562 printf("sched_exit event %p\n", event); 1563 } 1564 1565 static void 1566 process_sched_migrate_task_event(struct perf_tool *tool __used, 1567 struct event *event, 1568 struct perf_sample *sample, 1569 struct machine *machine, 1570 struct thread *thread) 1571 { 1572 void *data = sample->raw_data; 1573 struct trace_migrate_task_event migrate_task_event; 1574 1575 FILL_COMMON_FIELDS(migrate_task_event, event, data); 1576 1577 FILL_ARRAY(migrate_task_event, comm, event, data); 1578 FILL_FIELD(migrate_task_event, pid, event, data); 1579 FILL_FIELD(migrate_task_event, prio, event, data); 1580 FILL_FIELD(migrate_task_event, cpu, event, data); 1581 1582 if (trace_handler->migrate_task_event) 1583 trace_handler->migrate_task_event(&migrate_task_event, machine, 1584 event, sample->cpu, 1585 sample->time, thread); 1586 } 1587 1588 typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event *event, 1589 struct perf_sample *sample, 1590 struct machine *machine, 1591 struct thread *thread); 1592 1593 static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, 1594 union perf_event *event __used, 1595 struct perf_sample *sample, 1596 struct perf_evsel *evsel, 1597 struct machine *machine) 1598 { 1599 struct thread *thread = machine__findnew_thread(machine, sample->pid); 1600 1601 if (thread == NULL) { 1602 pr_debug("problem processing %s event, skipping it.\n", 1603 evsel->name); 1604 return -1; 1605 } 1606 1607 evsel->hists.stats.total_period += sample->period; 1608 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 1609 1610 if (evsel->handler.func != NULL) { 1611 tracepoint_handler f = evsel->handler.func; 1612 1613 if (evsel->handler.data == NULL) 1614 evsel->handler.data = trace_find_event(evsel->attr.config); 1615 1616 f(tool, evsel->handler.data, sample, machine, thread); 1617 } 1618 1619 return 0; 1620 } 1621 1622 static struct perf_tool perf_sched = { 1623 .sample = perf_sched__process_tracepoint_sample, 1624 .comm = perf_event__process_comm, 1625 .lost = perf_event__process_lost, 1626 .fork = perf_event__process_task, 1627 .ordered_samples = true, 1628 }; 1629 1630 static void read_events(bool destroy, struct perf_session **psession) 1631 { 1632 int err = -EINVAL; 1633 const struct perf_evsel_str_handler handlers[] = { 1634 { "sched:sched_switch", process_sched_switch_event, }, 1635 { "sched:sched_stat_runtime", process_sched_runtime_event, }, 1636 { "sched:sched_wakeup", process_sched_wakeup_event, }, 1637 { "sched:sched_wakeup_new", process_sched_wakeup_event, }, 1638 { "sched:sched_process_fork", process_sched_fork_event, }, 1639 { "sched:sched_process_exit", process_sched_exit_event, }, 1640 { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1641 }; 1642 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 1643 0, false, &perf_sched); 1644 if (session == NULL) 1645 die("No Memory"); 1646 1647 err = perf_evlist__set_tracepoints_handlers_array(session->evlist, handlers); 1648 assert(err == 0); 1649 1650 if (perf_session__has_traces(session, "record -R")) { 1651 err = perf_session__process_events(session, &perf_sched); 1652 if (err) 1653 die("Failed to process events, error %d", err); 1654 1655 nr_events = session->hists.stats.nr_events[0]; 1656 nr_lost_events = session->hists.stats.total_lost; 1657 nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; 1658 } 1659 1660 if (destroy) 1661 perf_session__delete(session); 1662 1663 if (psession) 1664 *psession = session; 1665 } 1666 1667 static void print_bad_events(void) 1668 { 1669 if (nr_unordered_timestamps && nr_timestamps) { 1670 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 1671 (double)nr_unordered_timestamps/(double)nr_timestamps*100.0, 1672 nr_unordered_timestamps, nr_timestamps); 1673 } 1674 if (nr_lost_events && nr_events) { 1675 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 1676 (double)nr_lost_events/(double)nr_events*100.0, 1677 nr_lost_events, nr_events, nr_lost_chunks); 1678 } 1679 if (nr_state_machine_bugs && nr_timestamps) { 1680 printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", 1681 (double)nr_state_machine_bugs/(double)nr_timestamps*100.0, 1682 nr_state_machine_bugs, nr_timestamps); 1683 if (nr_lost_events) 1684 printf(" (due to lost events?)"); 1685 printf("\n"); 1686 } 1687 if (nr_context_switch_bugs && nr_timestamps) { 1688 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 1689 (double)nr_context_switch_bugs/(double)nr_timestamps*100.0, 1690 nr_context_switch_bugs, nr_timestamps); 1691 if (nr_lost_events) 1692 printf(" (due to lost events?)"); 1693 printf("\n"); 1694 } 1695 } 1696 1697 static void __cmd_lat(void) 1698 { 1699 struct rb_node *next; 1700 struct perf_session *session; 1701 1702 setup_pager(); 1703 read_events(false, &session); 1704 sort_lat(); 1705 1706 printf("\n ---------------------------------------------------------------------------------------------------------------\n"); 1707 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); 1708 printf(" ---------------------------------------------------------------------------------------------------------------\n"); 1709 1710 next = rb_first(&sorted_atom_root); 1711 1712 while (next) { 1713 struct work_atoms *work_list; 1714 1715 work_list = rb_entry(next, struct work_atoms, node); 1716 output_lat_thread(work_list); 1717 next = rb_next(next); 1718 } 1719 1720 printf(" -----------------------------------------------------------------------------------------\n"); 1721 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 1722 (double)all_runtime/1e6, all_count); 1723 1724 printf(" ---------------------------------------------------\n"); 1725 1726 print_bad_events(); 1727 printf("\n"); 1728 1729 perf_session__delete(session); 1730 } 1731 1732 static struct trace_sched_handler map_ops = { 1733 .wakeup_event = NULL, 1734 .switch_event = map_switch_event, 1735 .runtime_event = NULL, 1736 .fork_event = NULL, 1737 }; 1738 1739 static void __cmd_map(void) 1740 { 1741 max_cpu = sysconf(_SC_NPROCESSORS_CONF); 1742 1743 setup_pager(); 1744 read_events(true, NULL); 1745 print_bad_events(); 1746 } 1747 1748 static void __cmd_replay(void) 1749 { 1750 unsigned long i; 1751 1752 calibrate_run_measurement_overhead(); 1753 calibrate_sleep_measurement_overhead(); 1754 1755 test_calibrations(); 1756 1757 read_events(true, NULL); 1758 1759 printf("nr_run_events: %ld\n", nr_run_events); 1760 printf("nr_sleep_events: %ld\n", nr_sleep_events); 1761 printf("nr_wakeup_events: %ld\n", nr_wakeup_events); 1762 1763 if (targetless_wakeups) 1764 printf("target-less wakeups: %ld\n", targetless_wakeups); 1765 if (multitarget_wakeups) 1766 printf("multi-target wakeups: %ld\n", multitarget_wakeups); 1767 if (nr_run_events_optimized) 1768 printf("run atoms optimized: %ld\n", 1769 nr_run_events_optimized); 1770 1771 print_task_traces(); 1772 add_cross_task_wakeups(); 1773 1774 create_tasks(); 1775 printf("------------------------------------------------------------\n"); 1776 for (i = 0; i < replay_repeat; i++) 1777 run_one_test(); 1778 } 1779 1780 1781 static const char * const sched_usage[] = { 1782 "perf sched [<options>] {record|latency|map|replay|script}", 1783 NULL 1784 }; 1785 1786 static const struct option sched_options[] = { 1787 OPT_STRING('i', "input", &input_name, "file", 1788 "input file name"), 1789 OPT_INCR('v', "verbose", &verbose, 1790 "be more verbose (show symbol address, etc)"), 1791 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1792 "dump raw trace in ASCII"), 1793 OPT_END() 1794 }; 1795 1796 static const char * const latency_usage[] = { 1797 "perf sched latency [<options>]", 1798 NULL 1799 }; 1800 1801 static const struct option latency_options[] = { 1802 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1803 "sort by key(s): runtime, switch, avg, max"), 1804 OPT_INCR('v', "verbose", &verbose, 1805 "be more verbose (show symbol address, etc)"), 1806 OPT_INTEGER('C', "CPU", &profile_cpu, 1807 "CPU to profile on"), 1808 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1809 "dump raw trace in ASCII"), 1810 OPT_END() 1811 }; 1812 1813 static const char * const replay_usage[] = { 1814 "perf sched replay [<options>]", 1815 NULL 1816 }; 1817 1818 static const struct option replay_options[] = { 1819 OPT_UINTEGER('r', "repeat", &replay_repeat, 1820 "repeat the workload replay N times (-1: infinite)"), 1821 OPT_INCR('v', "verbose", &verbose, 1822 "be more verbose (show symbol address, etc)"), 1823 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1824 "dump raw trace in ASCII"), 1825 OPT_END() 1826 }; 1827 1828 static void setup_sorting(void) 1829 { 1830 char *tmp, *tok, *str = strdup(sort_order); 1831 1832 for (tok = strtok_r(str, ", ", &tmp); 1833 tok; tok = strtok_r(NULL, ", ", &tmp)) { 1834 if (sort_dimension__add(tok, &sort_list) < 0) { 1835 error("Unknown --sort key: `%s'", tok); 1836 usage_with_options(latency_usage, latency_options); 1837 } 1838 } 1839 1840 free(str); 1841 1842 sort_dimension__add("pid", &cmp_pid); 1843 } 1844 1845 static const char *record_args[] = { 1846 "record", 1847 "-a", 1848 "-R", 1849 "-f", 1850 "-m", "1024", 1851 "-c", "1", 1852 "-e", "sched:sched_switch", 1853 "-e", "sched:sched_stat_wait", 1854 "-e", "sched:sched_stat_sleep", 1855 "-e", "sched:sched_stat_iowait", 1856 "-e", "sched:sched_stat_runtime", 1857 "-e", "sched:sched_process_exit", 1858 "-e", "sched:sched_process_fork", 1859 "-e", "sched:sched_wakeup", 1860 "-e", "sched:sched_migrate_task", 1861 }; 1862 1863 static int __cmd_record(int argc, const char **argv) 1864 { 1865 unsigned int rec_argc, i, j; 1866 const char **rec_argv; 1867 1868 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1869 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1870 1871 if (rec_argv == NULL) 1872 return -ENOMEM; 1873 1874 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1875 rec_argv[i] = strdup(record_args[i]); 1876 1877 for (j = 1; j < (unsigned int)argc; j++, i++) 1878 rec_argv[i] = argv[j]; 1879 1880 BUG_ON(i != rec_argc); 1881 1882 return cmd_record(i, rec_argv, NULL); 1883 } 1884 1885 int cmd_sched(int argc, const char **argv, const char *prefix __used) 1886 { 1887 argc = parse_options(argc, argv, sched_options, sched_usage, 1888 PARSE_OPT_STOP_AT_NON_OPTION); 1889 if (!argc) 1890 usage_with_options(sched_usage, sched_options); 1891 1892 /* 1893 * Aliased to 'perf script' for now: 1894 */ 1895 if (!strcmp(argv[0], "script")) 1896 return cmd_script(argc, argv, prefix); 1897 1898 symbol__init(); 1899 if (!strncmp(argv[0], "rec", 3)) { 1900 return __cmd_record(argc, argv); 1901 } else if (!strncmp(argv[0], "lat", 3)) { 1902 trace_handler = &lat_ops; 1903 if (argc > 1) { 1904 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 1905 if (argc) 1906 usage_with_options(latency_usage, latency_options); 1907 } 1908 setup_sorting(); 1909 __cmd_lat(); 1910 } else if (!strcmp(argv[0], "map")) { 1911 trace_handler = &map_ops; 1912 setup_sorting(); 1913 __cmd_map(); 1914 } else if (!strncmp(argv[0], "rep", 3)) { 1915 trace_handler = &replay_ops; 1916 if (argc) { 1917 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 1918 if (argc) 1919 usage_with_options(replay_usage, replay_options); 1920 } 1921 __cmd_replay(); 1922 } else { 1923 usage_with_options(sched_usage, sched_options); 1924 } 1925 1926 return 0; 1927 } 1928