1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/cache.h" 6 #include "util/symbol.h" 7 #include "util/thread.h" 8 #include "util/header.h" 9 #include "util/session.h" 10 11 #include "util/parse-options.h" 12 #include "util/trace-event.h" 13 14 #include "util/debug.h" 15 16 #include <sys/prctl.h> 17 18 #include <semaphore.h> 19 #include <pthread.h> 20 #include <math.h> 21 22 static char const *input_name = "perf.data"; 23 24 static char default_sort_order[] = "avg, max, switch, runtime"; 25 static const char *sort_order = default_sort_order; 26 27 static int profile_cpu = -1; 28 29 #define PR_SET_NAME 15 /* Set process name */ 30 #define MAX_CPUS 4096 31 32 static u64 run_measurement_overhead; 33 static u64 sleep_measurement_overhead; 34 35 #define COMM_LEN 20 36 #define SYM_LEN 129 37 38 #define MAX_PID 65536 39 40 static unsigned long nr_tasks; 41 42 struct sched_atom; 43 44 struct task_desc { 45 unsigned long nr; 46 unsigned long pid; 47 char comm[COMM_LEN]; 48 49 unsigned long nr_events; 50 unsigned long curr_event; 51 struct sched_atom **atoms; 52 53 pthread_t thread; 54 sem_t sleep_sem; 55 56 sem_t ready_for_work; 57 sem_t work_done_sem; 58 59 u64 cpu_usage; 60 }; 61 62 enum sched_event_type { 63 SCHED_EVENT_RUN, 64 SCHED_EVENT_SLEEP, 65 SCHED_EVENT_WAKEUP, 66 SCHED_EVENT_MIGRATION, 67 }; 68 69 struct sched_atom { 70 enum sched_event_type type; 71 int specific_wait; 72 u64 timestamp; 73 u64 duration; 74 unsigned long nr; 75 sem_t *wait_sem; 76 struct task_desc *wakee; 77 }; 78 79 static struct task_desc *pid_to_task[MAX_PID]; 80 81 static struct task_desc **tasks; 82 83 static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; 84 static u64 start_time; 85 86 static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; 87 88 static unsigned long nr_run_events; 89 static unsigned long nr_sleep_events; 90 static unsigned long nr_wakeup_events; 91 92 static unsigned long nr_sleep_corrections; 93 static unsigned long nr_run_events_optimized; 94 95 static unsigned long targetless_wakeups; 96 static unsigned long multitarget_wakeups; 97 98 static u64 cpu_usage; 99 static u64 runavg_cpu_usage; 100 static u64 parent_cpu_usage; 101 static u64 runavg_parent_cpu_usage; 102 103 static unsigned long nr_runs; 104 static u64 sum_runtime; 105 static u64 sum_fluct; 106 static u64 run_avg; 107 108 static unsigned int replay_repeat = 10; 109 static unsigned long nr_timestamps; 110 static unsigned long nr_unordered_timestamps; 111 static unsigned long nr_state_machine_bugs; 112 static unsigned long nr_context_switch_bugs; 113 static unsigned long nr_events; 114 static unsigned long nr_lost_chunks; 115 static unsigned long nr_lost_events; 116 117 #define TASK_STATE_TO_CHAR_STR "RSDTtZX" 118 119 enum thread_state { 120 THREAD_SLEEPING = 0, 121 THREAD_WAIT_CPU, 122 THREAD_SCHED_IN, 123 THREAD_IGNORE 124 }; 125 126 struct work_atom { 127 struct list_head list; 128 enum thread_state state; 129 u64 sched_out_time; 130 u64 wake_up_time; 131 u64 sched_in_time; 132 u64 runtime; 133 }; 134 135 struct work_atoms { 136 struct list_head work_list; 137 struct thread *thread; 138 struct rb_node node; 139 u64 max_lat; 140 u64 max_lat_at; 141 u64 total_lat; 142 u64 nb_atoms; 143 u64 total_runtime; 144 }; 145 146 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 147 148 static struct rb_root atom_root, sorted_atom_root; 149 150 static u64 all_runtime; 151 static u64 all_count; 152 153 154 static u64 get_nsecs(void) 155 { 156 struct timespec ts; 157 158 clock_gettime(CLOCK_MONOTONIC, &ts); 159 160 return ts.tv_sec * 1000000000ULL + ts.tv_nsec; 161 } 162 163 static void burn_nsecs(u64 nsecs) 164 { 165 u64 T0 = get_nsecs(), T1; 166 167 do { 168 T1 = get_nsecs(); 169 } while (T1 + run_measurement_overhead < T0 + nsecs); 170 } 171 172 static void sleep_nsecs(u64 nsecs) 173 { 174 struct timespec ts; 175 176 ts.tv_nsec = nsecs % 999999999; 177 ts.tv_sec = nsecs / 999999999; 178 179 nanosleep(&ts, NULL); 180 } 181 182 static void calibrate_run_measurement_overhead(void) 183 { 184 u64 T0, T1, delta, min_delta = 1000000000ULL; 185 int i; 186 187 for (i = 0; i < 10; i++) { 188 T0 = get_nsecs(); 189 burn_nsecs(0); 190 T1 = get_nsecs(); 191 delta = T1-T0; 192 min_delta = min(min_delta, delta); 193 } 194 run_measurement_overhead = min_delta; 195 196 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 197 } 198 199 static void calibrate_sleep_measurement_overhead(void) 200 { 201 u64 T0, T1, delta, min_delta = 1000000000ULL; 202 int i; 203 204 for (i = 0; i < 10; i++) { 205 T0 = get_nsecs(); 206 sleep_nsecs(10000); 207 T1 = get_nsecs(); 208 delta = T1-T0; 209 min_delta = min(min_delta, delta); 210 } 211 min_delta -= 10000; 212 sleep_measurement_overhead = min_delta; 213 214 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 215 } 216 217 static struct sched_atom * 218 get_new_event(struct task_desc *task, u64 timestamp) 219 { 220 struct sched_atom *event = zalloc(sizeof(*event)); 221 unsigned long idx = task->nr_events; 222 size_t size; 223 224 event->timestamp = timestamp; 225 event->nr = idx; 226 227 task->nr_events++; 228 size = sizeof(struct sched_atom *) * task->nr_events; 229 task->atoms = realloc(task->atoms, size); 230 BUG_ON(!task->atoms); 231 232 task->atoms[idx] = event; 233 234 return event; 235 } 236 237 static struct sched_atom *last_event(struct task_desc *task) 238 { 239 if (!task->nr_events) 240 return NULL; 241 242 return task->atoms[task->nr_events - 1]; 243 } 244 245 static void 246 add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) 247 { 248 struct sched_atom *event, *curr_event = last_event(task); 249 250 /* 251 * optimize an existing RUN event by merging this one 252 * to it: 253 */ 254 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 255 nr_run_events_optimized++; 256 curr_event->duration += duration; 257 return; 258 } 259 260 event = get_new_event(task, timestamp); 261 262 event->type = SCHED_EVENT_RUN; 263 event->duration = duration; 264 265 nr_run_events++; 266 } 267 268 static void 269 add_sched_event_wakeup(struct task_desc *task, u64 timestamp, 270 struct task_desc *wakee) 271 { 272 struct sched_atom *event, *wakee_event; 273 274 event = get_new_event(task, timestamp); 275 event->type = SCHED_EVENT_WAKEUP; 276 event->wakee = wakee; 277 278 wakee_event = last_event(wakee); 279 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 280 targetless_wakeups++; 281 return; 282 } 283 if (wakee_event->wait_sem) { 284 multitarget_wakeups++; 285 return; 286 } 287 288 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 289 sem_init(wakee_event->wait_sem, 0, 0); 290 wakee_event->specific_wait = 1; 291 event->wait_sem = wakee_event->wait_sem; 292 293 nr_wakeup_events++; 294 } 295 296 static void 297 add_sched_event_sleep(struct task_desc *task, u64 timestamp, 298 u64 task_state __used) 299 { 300 struct sched_atom *event = get_new_event(task, timestamp); 301 302 event->type = SCHED_EVENT_SLEEP; 303 304 nr_sleep_events++; 305 } 306 307 static struct task_desc *register_pid(unsigned long pid, const char *comm) 308 { 309 struct task_desc *task; 310 311 BUG_ON(pid >= MAX_PID); 312 313 task = pid_to_task[pid]; 314 315 if (task) 316 return task; 317 318 task = zalloc(sizeof(*task)); 319 task->pid = pid; 320 task->nr = nr_tasks; 321 strcpy(task->comm, comm); 322 /* 323 * every task starts in sleeping state - this gets ignored 324 * if there's no wakeup pointing to this sleep state: 325 */ 326 add_sched_event_sleep(task, 0, 0); 327 328 pid_to_task[pid] = task; 329 nr_tasks++; 330 tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *)); 331 BUG_ON(!tasks); 332 tasks[task->nr] = task; 333 334 if (verbose) 335 printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm); 336 337 return task; 338 } 339 340 341 static void print_task_traces(void) 342 { 343 struct task_desc *task; 344 unsigned long i; 345 346 for (i = 0; i < nr_tasks; i++) { 347 task = tasks[i]; 348 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 349 task->nr, task->comm, task->pid, task->nr_events); 350 } 351 } 352 353 static void add_cross_task_wakeups(void) 354 { 355 struct task_desc *task1, *task2; 356 unsigned long i, j; 357 358 for (i = 0; i < nr_tasks; i++) { 359 task1 = tasks[i]; 360 j = i + 1; 361 if (j == nr_tasks) 362 j = 0; 363 task2 = tasks[j]; 364 add_sched_event_wakeup(task1, 0, task2); 365 } 366 } 367 368 static void 369 process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) 370 { 371 int ret = 0; 372 373 switch (atom->type) { 374 case SCHED_EVENT_RUN: 375 burn_nsecs(atom->duration); 376 break; 377 case SCHED_EVENT_SLEEP: 378 if (atom->wait_sem) 379 ret = sem_wait(atom->wait_sem); 380 BUG_ON(ret); 381 break; 382 case SCHED_EVENT_WAKEUP: 383 if (atom->wait_sem) 384 ret = sem_post(atom->wait_sem); 385 BUG_ON(ret); 386 break; 387 case SCHED_EVENT_MIGRATION: 388 break; 389 default: 390 BUG_ON(1); 391 } 392 } 393 394 static u64 get_cpu_usage_nsec_parent(void) 395 { 396 struct rusage ru; 397 u64 sum; 398 int err; 399 400 err = getrusage(RUSAGE_SELF, &ru); 401 BUG_ON(err); 402 403 sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3; 404 sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3; 405 406 return sum; 407 } 408 409 static int self_open_counters(void) 410 { 411 struct perf_event_attr attr; 412 int fd; 413 414 memset(&attr, 0, sizeof(attr)); 415 416 attr.type = PERF_TYPE_SOFTWARE; 417 attr.config = PERF_COUNT_SW_TASK_CLOCK; 418 419 fd = sys_perf_event_open(&attr, 0, -1, -1, 0); 420 421 if (fd < 0) 422 die("Error: sys_perf_event_open() syscall returned" 423 "with %d (%s)\n", fd, strerror(errno)); 424 return fd; 425 } 426 427 static u64 get_cpu_usage_nsec_self(int fd) 428 { 429 u64 runtime; 430 int ret; 431 432 ret = read(fd, &runtime, sizeof(runtime)); 433 BUG_ON(ret != sizeof(runtime)); 434 435 return runtime; 436 } 437 438 static void *thread_func(void *ctx) 439 { 440 struct task_desc *this_task = ctx; 441 u64 cpu_usage_0, cpu_usage_1; 442 unsigned long i, ret; 443 char comm2[22]; 444 int fd; 445 446 sprintf(comm2, ":%s", this_task->comm); 447 prctl(PR_SET_NAME, comm2); 448 fd = self_open_counters(); 449 450 again: 451 ret = sem_post(&this_task->ready_for_work); 452 BUG_ON(ret); 453 ret = pthread_mutex_lock(&start_work_mutex); 454 BUG_ON(ret); 455 ret = pthread_mutex_unlock(&start_work_mutex); 456 BUG_ON(ret); 457 458 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 459 460 for (i = 0; i < this_task->nr_events; i++) { 461 this_task->curr_event = i; 462 process_sched_event(this_task, this_task->atoms[i]); 463 } 464 465 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 466 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 467 ret = sem_post(&this_task->work_done_sem); 468 BUG_ON(ret); 469 470 ret = pthread_mutex_lock(&work_done_wait_mutex); 471 BUG_ON(ret); 472 ret = pthread_mutex_unlock(&work_done_wait_mutex); 473 BUG_ON(ret); 474 475 goto again; 476 } 477 478 static void create_tasks(void) 479 { 480 struct task_desc *task; 481 pthread_attr_t attr; 482 unsigned long i; 483 int err; 484 485 err = pthread_attr_init(&attr); 486 BUG_ON(err); 487 err = pthread_attr_setstacksize(&attr, 488 (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); 489 BUG_ON(err); 490 err = pthread_mutex_lock(&start_work_mutex); 491 BUG_ON(err); 492 err = pthread_mutex_lock(&work_done_wait_mutex); 493 BUG_ON(err); 494 for (i = 0; i < nr_tasks; i++) { 495 task = tasks[i]; 496 sem_init(&task->sleep_sem, 0, 0); 497 sem_init(&task->ready_for_work, 0, 0); 498 sem_init(&task->work_done_sem, 0, 0); 499 task->curr_event = 0; 500 err = pthread_create(&task->thread, &attr, thread_func, task); 501 BUG_ON(err); 502 } 503 } 504 505 static void wait_for_tasks(void) 506 { 507 u64 cpu_usage_0, cpu_usage_1; 508 struct task_desc *task; 509 unsigned long i, ret; 510 511 start_time = get_nsecs(); 512 cpu_usage = 0; 513 pthread_mutex_unlock(&work_done_wait_mutex); 514 515 for (i = 0; i < nr_tasks; i++) { 516 task = tasks[i]; 517 ret = sem_wait(&task->ready_for_work); 518 BUG_ON(ret); 519 sem_init(&task->ready_for_work, 0, 0); 520 } 521 ret = pthread_mutex_lock(&work_done_wait_mutex); 522 BUG_ON(ret); 523 524 cpu_usage_0 = get_cpu_usage_nsec_parent(); 525 526 pthread_mutex_unlock(&start_work_mutex); 527 528 for (i = 0; i < nr_tasks; i++) { 529 task = tasks[i]; 530 ret = sem_wait(&task->work_done_sem); 531 BUG_ON(ret); 532 sem_init(&task->work_done_sem, 0, 0); 533 cpu_usage += task->cpu_usage; 534 task->cpu_usage = 0; 535 } 536 537 cpu_usage_1 = get_cpu_usage_nsec_parent(); 538 if (!runavg_cpu_usage) 539 runavg_cpu_usage = cpu_usage; 540 runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10; 541 542 parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 543 if (!runavg_parent_cpu_usage) 544 runavg_parent_cpu_usage = parent_cpu_usage; 545 runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 + 546 parent_cpu_usage)/10; 547 548 ret = pthread_mutex_lock(&start_work_mutex); 549 BUG_ON(ret); 550 551 for (i = 0; i < nr_tasks; i++) { 552 task = tasks[i]; 553 sem_init(&task->sleep_sem, 0, 0); 554 task->curr_event = 0; 555 } 556 } 557 558 static void run_one_test(void) 559 { 560 u64 T0, T1, delta, avg_delta, fluct; 561 562 T0 = get_nsecs(); 563 wait_for_tasks(); 564 T1 = get_nsecs(); 565 566 delta = T1 - T0; 567 sum_runtime += delta; 568 nr_runs++; 569 570 avg_delta = sum_runtime / nr_runs; 571 if (delta < avg_delta) 572 fluct = avg_delta - delta; 573 else 574 fluct = delta - avg_delta; 575 sum_fluct += fluct; 576 if (!run_avg) 577 run_avg = delta; 578 run_avg = (run_avg*9 + delta)/10; 579 580 printf("#%-3ld: %0.3f, ", 581 nr_runs, (double)delta/1000000.0); 582 583 printf("ravg: %0.2f, ", 584 (double)run_avg/1e6); 585 586 printf("cpu: %0.2f / %0.2f", 587 (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6); 588 589 #if 0 590 /* 591 * rusage statistics done by the parent, these are less 592 * accurate than the sum_exec_runtime based statistics: 593 */ 594 printf(" [%0.2f / %0.2f]", 595 (double)parent_cpu_usage/1e6, 596 (double)runavg_parent_cpu_usage/1e6); 597 #endif 598 599 printf("\n"); 600 601 if (nr_sleep_corrections) 602 printf(" (%ld sleep corrections)\n", nr_sleep_corrections); 603 nr_sleep_corrections = 0; 604 } 605 606 static void test_calibrations(void) 607 { 608 u64 T0, T1; 609 610 T0 = get_nsecs(); 611 burn_nsecs(1e6); 612 T1 = get_nsecs(); 613 614 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 615 616 T0 = get_nsecs(); 617 sleep_nsecs(1e6); 618 T1 = get_nsecs(); 619 620 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 621 } 622 623 #define FILL_FIELD(ptr, field, event, data) \ 624 ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) 625 626 #define FILL_ARRAY(ptr, array, event, data) \ 627 do { \ 628 void *__array = raw_field_ptr(event, #array, data); \ 629 memcpy(ptr.array, __array, sizeof(ptr.array)); \ 630 } while(0) 631 632 #define FILL_COMMON_FIELDS(ptr, event, data) \ 633 do { \ 634 FILL_FIELD(ptr, common_type, event, data); \ 635 FILL_FIELD(ptr, common_flags, event, data); \ 636 FILL_FIELD(ptr, common_preempt_count, event, data); \ 637 FILL_FIELD(ptr, common_pid, event, data); \ 638 FILL_FIELD(ptr, common_tgid, event, data); \ 639 } while (0) 640 641 642 643 struct trace_switch_event { 644 u32 size; 645 646 u16 common_type; 647 u8 common_flags; 648 u8 common_preempt_count; 649 u32 common_pid; 650 u32 common_tgid; 651 652 char prev_comm[16]; 653 u32 prev_pid; 654 u32 prev_prio; 655 u64 prev_state; 656 char next_comm[16]; 657 u32 next_pid; 658 u32 next_prio; 659 }; 660 661 struct trace_runtime_event { 662 u32 size; 663 664 u16 common_type; 665 u8 common_flags; 666 u8 common_preempt_count; 667 u32 common_pid; 668 u32 common_tgid; 669 670 char comm[16]; 671 u32 pid; 672 u64 runtime; 673 u64 vruntime; 674 }; 675 676 struct trace_wakeup_event { 677 u32 size; 678 679 u16 common_type; 680 u8 common_flags; 681 u8 common_preempt_count; 682 u32 common_pid; 683 u32 common_tgid; 684 685 char comm[16]; 686 u32 pid; 687 688 u32 prio; 689 u32 success; 690 u32 cpu; 691 }; 692 693 struct trace_fork_event { 694 u32 size; 695 696 u16 common_type; 697 u8 common_flags; 698 u8 common_preempt_count; 699 u32 common_pid; 700 u32 common_tgid; 701 702 char parent_comm[16]; 703 u32 parent_pid; 704 char child_comm[16]; 705 u32 child_pid; 706 }; 707 708 struct trace_migrate_task_event { 709 u32 size; 710 711 u16 common_type; 712 u8 common_flags; 713 u8 common_preempt_count; 714 u32 common_pid; 715 u32 common_tgid; 716 717 char comm[16]; 718 u32 pid; 719 720 u32 prio; 721 u32 cpu; 722 }; 723 724 struct trace_sched_handler { 725 void (*switch_event)(struct trace_switch_event *, 726 struct perf_session *, 727 struct event *, 728 int cpu, 729 u64 timestamp, 730 struct thread *thread); 731 732 void (*runtime_event)(struct trace_runtime_event *, 733 struct perf_session *, 734 struct event *, 735 int cpu, 736 u64 timestamp, 737 struct thread *thread); 738 739 void (*wakeup_event)(struct trace_wakeup_event *, 740 struct perf_session *, 741 struct event *, 742 int cpu, 743 u64 timestamp, 744 struct thread *thread); 745 746 void (*fork_event)(struct trace_fork_event *, 747 struct event *, 748 int cpu, 749 u64 timestamp, 750 struct thread *thread); 751 752 void (*migrate_task_event)(struct trace_migrate_task_event *, 753 struct perf_session *session, 754 struct event *, 755 int cpu, 756 u64 timestamp, 757 struct thread *thread); 758 }; 759 760 761 static void 762 replay_wakeup_event(struct trace_wakeup_event *wakeup_event, 763 struct perf_session *session __used, 764 struct event *event, 765 int cpu __used, 766 u64 timestamp __used, 767 struct thread *thread __used) 768 { 769 struct task_desc *waker, *wakee; 770 771 if (verbose) { 772 printf("sched_wakeup event %p\n", event); 773 774 printf(" ... pid %d woke up %s/%d\n", 775 wakeup_event->common_pid, 776 wakeup_event->comm, 777 wakeup_event->pid); 778 } 779 780 waker = register_pid(wakeup_event->common_pid, "<unknown>"); 781 wakee = register_pid(wakeup_event->pid, wakeup_event->comm); 782 783 add_sched_event_wakeup(waker, timestamp, wakee); 784 } 785 786 static u64 cpu_last_switched[MAX_CPUS]; 787 788 static void 789 replay_switch_event(struct trace_switch_event *switch_event, 790 struct perf_session *session __used, 791 struct event *event, 792 int cpu, 793 u64 timestamp, 794 struct thread *thread __used) 795 { 796 struct task_desc *prev, __used *next; 797 u64 timestamp0; 798 s64 delta; 799 800 if (verbose) 801 printf("sched_switch event %p\n", event); 802 803 if (cpu >= MAX_CPUS || cpu < 0) 804 return; 805 806 timestamp0 = cpu_last_switched[cpu]; 807 if (timestamp0) 808 delta = timestamp - timestamp0; 809 else 810 delta = 0; 811 812 if (delta < 0) 813 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 814 815 if (verbose) { 816 printf(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 817 switch_event->prev_comm, switch_event->prev_pid, 818 switch_event->next_comm, switch_event->next_pid, 819 delta); 820 } 821 822 prev = register_pid(switch_event->prev_pid, switch_event->prev_comm); 823 next = register_pid(switch_event->next_pid, switch_event->next_comm); 824 825 cpu_last_switched[cpu] = timestamp; 826 827 add_sched_event_run(prev, timestamp, delta); 828 add_sched_event_sleep(prev, timestamp, switch_event->prev_state); 829 } 830 831 832 static void 833 replay_fork_event(struct trace_fork_event *fork_event, 834 struct event *event, 835 int cpu __used, 836 u64 timestamp __used, 837 struct thread *thread __used) 838 { 839 if (verbose) { 840 printf("sched_fork event %p\n", event); 841 printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); 842 printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); 843 } 844 register_pid(fork_event->parent_pid, fork_event->parent_comm); 845 register_pid(fork_event->child_pid, fork_event->child_comm); 846 } 847 848 static struct trace_sched_handler replay_ops = { 849 .wakeup_event = replay_wakeup_event, 850 .switch_event = replay_switch_event, 851 .fork_event = replay_fork_event, 852 }; 853 854 struct sort_dimension { 855 const char *name; 856 sort_fn_t cmp; 857 struct list_head list; 858 }; 859 860 static LIST_HEAD(cmp_pid); 861 862 static int 863 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 864 { 865 struct sort_dimension *sort; 866 int ret = 0; 867 868 BUG_ON(list_empty(list)); 869 870 list_for_each_entry(sort, list, list) { 871 ret = sort->cmp(l, r); 872 if (ret) 873 return ret; 874 } 875 876 return ret; 877 } 878 879 static struct work_atoms * 880 thread_atoms_search(struct rb_root *root, struct thread *thread, 881 struct list_head *sort_list) 882 { 883 struct rb_node *node = root->rb_node; 884 struct work_atoms key = { .thread = thread }; 885 886 while (node) { 887 struct work_atoms *atoms; 888 int cmp; 889 890 atoms = container_of(node, struct work_atoms, node); 891 892 cmp = thread_lat_cmp(sort_list, &key, atoms); 893 if (cmp > 0) 894 node = node->rb_left; 895 else if (cmp < 0) 896 node = node->rb_right; 897 else { 898 BUG_ON(thread != atoms->thread); 899 return atoms; 900 } 901 } 902 return NULL; 903 } 904 905 static void 906 __thread_latency_insert(struct rb_root *root, struct work_atoms *data, 907 struct list_head *sort_list) 908 { 909 struct rb_node **new = &(root->rb_node), *parent = NULL; 910 911 while (*new) { 912 struct work_atoms *this; 913 int cmp; 914 915 this = container_of(*new, struct work_atoms, node); 916 parent = *new; 917 918 cmp = thread_lat_cmp(sort_list, data, this); 919 920 if (cmp > 0) 921 new = &((*new)->rb_left); 922 else 923 new = &((*new)->rb_right); 924 } 925 926 rb_link_node(&data->node, parent, new); 927 rb_insert_color(&data->node, root); 928 } 929 930 static void thread_atoms_insert(struct thread *thread) 931 { 932 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 933 if (!atoms) 934 die("No memory"); 935 936 atoms->thread = thread; 937 INIT_LIST_HEAD(&atoms->work_list); 938 __thread_latency_insert(&atom_root, atoms, &cmp_pid); 939 } 940 941 static void 942 latency_fork_event(struct trace_fork_event *fork_event __used, 943 struct event *event __used, 944 int cpu __used, 945 u64 timestamp __used, 946 struct thread *thread __used) 947 { 948 /* should insert the newcomer */ 949 } 950 951 __used 952 static char sched_out_state(struct trace_switch_event *switch_event) 953 { 954 const char *str = TASK_STATE_TO_CHAR_STR; 955 956 return str[switch_event->prev_state]; 957 } 958 959 static void 960 add_sched_out_event(struct work_atoms *atoms, 961 char run_state, 962 u64 timestamp) 963 { 964 struct work_atom *atom = zalloc(sizeof(*atom)); 965 if (!atom) 966 die("Non memory"); 967 968 atom->sched_out_time = timestamp; 969 970 if (run_state == 'R') { 971 atom->state = THREAD_WAIT_CPU; 972 atom->wake_up_time = atom->sched_out_time; 973 } 974 975 list_add_tail(&atom->list, &atoms->work_list); 976 } 977 978 static void 979 add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) 980 { 981 struct work_atom *atom; 982 983 BUG_ON(list_empty(&atoms->work_list)); 984 985 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 986 987 atom->runtime += delta; 988 atoms->total_runtime += delta; 989 } 990 991 static void 992 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 993 { 994 struct work_atom *atom; 995 u64 delta; 996 997 if (list_empty(&atoms->work_list)) 998 return; 999 1000 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1001 1002 if (atom->state != THREAD_WAIT_CPU) 1003 return; 1004 1005 if (timestamp < atom->wake_up_time) { 1006 atom->state = THREAD_IGNORE; 1007 return; 1008 } 1009 1010 atom->state = THREAD_SCHED_IN; 1011 atom->sched_in_time = timestamp; 1012 1013 delta = atom->sched_in_time - atom->wake_up_time; 1014 atoms->total_lat += delta; 1015 if (delta > atoms->max_lat) { 1016 atoms->max_lat = delta; 1017 atoms->max_lat_at = timestamp; 1018 } 1019 atoms->nb_atoms++; 1020 } 1021 1022 static void 1023 latency_switch_event(struct trace_switch_event *switch_event, 1024 struct perf_session *session, 1025 struct event *event __used, 1026 int cpu, 1027 u64 timestamp, 1028 struct thread *thread __used) 1029 { 1030 struct work_atoms *out_events, *in_events; 1031 struct thread *sched_out, *sched_in; 1032 u64 timestamp0; 1033 s64 delta; 1034 1035 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1036 1037 timestamp0 = cpu_last_switched[cpu]; 1038 cpu_last_switched[cpu] = timestamp; 1039 if (timestamp0) 1040 delta = timestamp - timestamp0; 1041 else 1042 delta = 0; 1043 1044 if (delta < 0) 1045 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1046 1047 1048 sched_out = perf_session__findnew(session, switch_event->prev_pid); 1049 sched_in = perf_session__findnew(session, switch_event->next_pid); 1050 1051 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1052 if (!out_events) { 1053 thread_atoms_insert(sched_out); 1054 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1055 if (!out_events) 1056 die("out-event: Internal tree error"); 1057 } 1058 add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); 1059 1060 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1061 if (!in_events) { 1062 thread_atoms_insert(sched_in); 1063 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1064 if (!in_events) 1065 die("in-event: Internal tree error"); 1066 /* 1067 * Take came in we have not heard about yet, 1068 * add in an initial atom in runnable state: 1069 */ 1070 add_sched_out_event(in_events, 'R', timestamp); 1071 } 1072 add_sched_in_event(in_events, timestamp); 1073 } 1074 1075 static void 1076 latency_runtime_event(struct trace_runtime_event *runtime_event, 1077 struct perf_session *session, 1078 struct event *event __used, 1079 int cpu, 1080 u64 timestamp, 1081 struct thread *this_thread __used) 1082 { 1083 struct thread *thread = perf_session__findnew(session, runtime_event->pid); 1084 struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1085 1086 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1087 if (!atoms) { 1088 thread_atoms_insert(thread); 1089 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1090 if (!atoms) 1091 die("in-event: Internal tree error"); 1092 add_sched_out_event(atoms, 'R', timestamp); 1093 } 1094 1095 add_runtime_event(atoms, runtime_event->runtime, timestamp); 1096 } 1097 1098 static void 1099 latency_wakeup_event(struct trace_wakeup_event *wakeup_event, 1100 struct perf_session *session, 1101 struct event *__event __used, 1102 int cpu __used, 1103 u64 timestamp, 1104 struct thread *thread __used) 1105 { 1106 struct work_atoms *atoms; 1107 struct work_atom *atom; 1108 struct thread *wakee; 1109 1110 /* Note for later, it may be interesting to observe the failing cases */ 1111 if (!wakeup_event->success) 1112 return; 1113 1114 wakee = perf_session__findnew(session, wakeup_event->pid); 1115 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1116 if (!atoms) { 1117 thread_atoms_insert(wakee); 1118 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1119 if (!atoms) 1120 die("wakeup-event: Internal tree error"); 1121 add_sched_out_event(atoms, 'S', timestamp); 1122 } 1123 1124 BUG_ON(list_empty(&atoms->work_list)); 1125 1126 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1127 1128 /* 1129 * You WILL be missing events if you've recorded only 1130 * one CPU, or are only looking at only one, so don't 1131 * make useless noise. 1132 */ 1133 if (profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1134 nr_state_machine_bugs++; 1135 1136 nr_timestamps++; 1137 if (atom->sched_out_time > timestamp) { 1138 nr_unordered_timestamps++; 1139 return; 1140 } 1141 1142 atom->state = THREAD_WAIT_CPU; 1143 atom->wake_up_time = timestamp; 1144 } 1145 1146 static void 1147 latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, 1148 struct perf_session *session, 1149 struct event *__event __used, 1150 int cpu __used, 1151 u64 timestamp, 1152 struct thread *thread __used) 1153 { 1154 struct work_atoms *atoms; 1155 struct work_atom *atom; 1156 struct thread *migrant; 1157 1158 /* 1159 * Only need to worry about migration when profiling one CPU. 1160 */ 1161 if (profile_cpu == -1) 1162 return; 1163 1164 migrant = perf_session__findnew(session, migrate_task_event->pid); 1165 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1166 if (!atoms) { 1167 thread_atoms_insert(migrant); 1168 register_pid(migrant->pid, migrant->comm); 1169 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1170 if (!atoms) 1171 die("migration-event: Internal tree error"); 1172 add_sched_out_event(atoms, 'R', timestamp); 1173 } 1174 1175 BUG_ON(list_empty(&atoms->work_list)); 1176 1177 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1178 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1179 1180 nr_timestamps++; 1181 1182 if (atom->sched_out_time > timestamp) 1183 nr_unordered_timestamps++; 1184 } 1185 1186 static struct trace_sched_handler lat_ops = { 1187 .wakeup_event = latency_wakeup_event, 1188 .switch_event = latency_switch_event, 1189 .runtime_event = latency_runtime_event, 1190 .fork_event = latency_fork_event, 1191 .migrate_task_event = latency_migrate_task_event, 1192 }; 1193 1194 static void output_lat_thread(struct work_atoms *work_list) 1195 { 1196 int i; 1197 int ret; 1198 u64 avg; 1199 1200 if (!work_list->nb_atoms) 1201 return; 1202 /* 1203 * Ignore idle threads: 1204 */ 1205 if (!strcmp(work_list->thread->comm, "swapper")) 1206 return; 1207 1208 all_runtime += work_list->total_runtime; 1209 all_count += work_list->nb_atoms; 1210 1211 ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); 1212 1213 for (i = 0; i < 24 - ret; i++) 1214 printf(" "); 1215 1216 avg = work_list->total_lat / work_list->nb_atoms; 1217 1218 printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n", 1219 (double)work_list->total_runtime / 1e6, 1220 work_list->nb_atoms, (double)avg / 1e6, 1221 (double)work_list->max_lat / 1e6, 1222 (double)work_list->max_lat_at / 1e9); 1223 } 1224 1225 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1226 { 1227 if (l->thread->pid < r->thread->pid) 1228 return -1; 1229 if (l->thread->pid > r->thread->pid) 1230 return 1; 1231 1232 return 0; 1233 } 1234 1235 static struct sort_dimension pid_sort_dimension = { 1236 .name = "pid", 1237 .cmp = pid_cmp, 1238 }; 1239 1240 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1241 { 1242 u64 avgl, avgr; 1243 1244 if (!l->nb_atoms) 1245 return -1; 1246 1247 if (!r->nb_atoms) 1248 return 1; 1249 1250 avgl = l->total_lat / l->nb_atoms; 1251 avgr = r->total_lat / r->nb_atoms; 1252 1253 if (avgl < avgr) 1254 return -1; 1255 if (avgl > avgr) 1256 return 1; 1257 1258 return 0; 1259 } 1260 1261 static struct sort_dimension avg_sort_dimension = { 1262 .name = "avg", 1263 .cmp = avg_cmp, 1264 }; 1265 1266 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1267 { 1268 if (l->max_lat < r->max_lat) 1269 return -1; 1270 if (l->max_lat > r->max_lat) 1271 return 1; 1272 1273 return 0; 1274 } 1275 1276 static struct sort_dimension max_sort_dimension = { 1277 .name = "max", 1278 .cmp = max_cmp, 1279 }; 1280 1281 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1282 { 1283 if (l->nb_atoms < r->nb_atoms) 1284 return -1; 1285 if (l->nb_atoms > r->nb_atoms) 1286 return 1; 1287 1288 return 0; 1289 } 1290 1291 static struct sort_dimension switch_sort_dimension = { 1292 .name = "switch", 1293 .cmp = switch_cmp, 1294 }; 1295 1296 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1297 { 1298 if (l->total_runtime < r->total_runtime) 1299 return -1; 1300 if (l->total_runtime > r->total_runtime) 1301 return 1; 1302 1303 return 0; 1304 } 1305 1306 static struct sort_dimension runtime_sort_dimension = { 1307 .name = "runtime", 1308 .cmp = runtime_cmp, 1309 }; 1310 1311 static struct sort_dimension *available_sorts[] = { 1312 &pid_sort_dimension, 1313 &avg_sort_dimension, 1314 &max_sort_dimension, 1315 &switch_sort_dimension, 1316 &runtime_sort_dimension, 1317 }; 1318 1319 #define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *)) 1320 1321 static LIST_HEAD(sort_list); 1322 1323 static int sort_dimension__add(const char *tok, struct list_head *list) 1324 { 1325 int i; 1326 1327 for (i = 0; i < NB_AVAILABLE_SORTS; i++) { 1328 if (!strcmp(available_sorts[i]->name, tok)) { 1329 list_add_tail(&available_sorts[i]->list, list); 1330 1331 return 0; 1332 } 1333 } 1334 1335 return -1; 1336 } 1337 1338 static void setup_sorting(void); 1339 1340 static void sort_lat(void) 1341 { 1342 struct rb_node *node; 1343 1344 for (;;) { 1345 struct work_atoms *data; 1346 node = rb_first(&atom_root); 1347 if (!node) 1348 break; 1349 1350 rb_erase(node, &atom_root); 1351 data = rb_entry(node, struct work_atoms, node); 1352 __thread_latency_insert(&sorted_atom_root, data, &sort_list); 1353 } 1354 } 1355 1356 static struct trace_sched_handler *trace_handler; 1357 1358 static void 1359 process_sched_wakeup_event(void *data, struct perf_session *session, 1360 struct event *event, 1361 int cpu __used, 1362 u64 timestamp __used, 1363 struct thread *thread __used) 1364 { 1365 struct trace_wakeup_event wakeup_event; 1366 1367 FILL_COMMON_FIELDS(wakeup_event, event, data); 1368 1369 FILL_ARRAY(wakeup_event, comm, event, data); 1370 FILL_FIELD(wakeup_event, pid, event, data); 1371 FILL_FIELD(wakeup_event, prio, event, data); 1372 FILL_FIELD(wakeup_event, success, event, data); 1373 FILL_FIELD(wakeup_event, cpu, event, data); 1374 1375 if (trace_handler->wakeup_event) 1376 trace_handler->wakeup_event(&wakeup_event, session, event, 1377 cpu, timestamp, thread); 1378 } 1379 1380 /* 1381 * Track the current task - that way we can know whether there's any 1382 * weird events, such as a task being switched away that is not current. 1383 */ 1384 static int max_cpu; 1385 1386 static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; 1387 1388 static struct thread *curr_thread[MAX_CPUS]; 1389 1390 static char next_shortname1 = 'A'; 1391 static char next_shortname2 = '0'; 1392 1393 static void 1394 map_switch_event(struct trace_switch_event *switch_event, 1395 struct perf_session *session, 1396 struct event *event __used, 1397 int this_cpu, 1398 u64 timestamp, 1399 struct thread *thread __used) 1400 { 1401 struct thread *sched_out __used, *sched_in; 1402 int new_shortname; 1403 u64 timestamp0; 1404 s64 delta; 1405 int cpu; 1406 1407 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); 1408 1409 if (this_cpu > max_cpu) 1410 max_cpu = this_cpu; 1411 1412 timestamp0 = cpu_last_switched[this_cpu]; 1413 cpu_last_switched[this_cpu] = timestamp; 1414 if (timestamp0) 1415 delta = timestamp - timestamp0; 1416 else 1417 delta = 0; 1418 1419 if (delta < 0) 1420 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1421 1422 1423 sched_out = perf_session__findnew(session, switch_event->prev_pid); 1424 sched_in = perf_session__findnew(session, switch_event->next_pid); 1425 1426 curr_thread[this_cpu] = sched_in; 1427 1428 printf(" "); 1429 1430 new_shortname = 0; 1431 if (!sched_in->shortname[0]) { 1432 sched_in->shortname[0] = next_shortname1; 1433 sched_in->shortname[1] = next_shortname2; 1434 1435 if (next_shortname1 < 'Z') { 1436 next_shortname1++; 1437 } else { 1438 next_shortname1='A'; 1439 if (next_shortname2 < '9') { 1440 next_shortname2++; 1441 } else { 1442 next_shortname2='0'; 1443 } 1444 } 1445 new_shortname = 1; 1446 } 1447 1448 for (cpu = 0; cpu <= max_cpu; cpu++) { 1449 if (cpu != this_cpu) 1450 printf(" "); 1451 else 1452 printf("*"); 1453 1454 if (curr_thread[cpu]) { 1455 if (curr_thread[cpu]->pid) 1456 printf("%2s ", curr_thread[cpu]->shortname); 1457 else 1458 printf(". "); 1459 } else 1460 printf(" "); 1461 } 1462 1463 printf(" %12.6f secs ", (double)timestamp/1e9); 1464 if (new_shortname) { 1465 printf("%s => %s:%d\n", 1466 sched_in->shortname, sched_in->comm, sched_in->pid); 1467 } else { 1468 printf("\n"); 1469 } 1470 } 1471 1472 1473 static void 1474 process_sched_switch_event(void *data, struct perf_session *session, 1475 struct event *event, 1476 int this_cpu, 1477 u64 timestamp __used, 1478 struct thread *thread __used) 1479 { 1480 struct trace_switch_event switch_event; 1481 1482 FILL_COMMON_FIELDS(switch_event, event, data); 1483 1484 FILL_ARRAY(switch_event, prev_comm, event, data); 1485 FILL_FIELD(switch_event, prev_pid, event, data); 1486 FILL_FIELD(switch_event, prev_prio, event, data); 1487 FILL_FIELD(switch_event, prev_state, event, data); 1488 FILL_ARRAY(switch_event, next_comm, event, data); 1489 FILL_FIELD(switch_event, next_pid, event, data); 1490 FILL_FIELD(switch_event, next_prio, event, data); 1491 1492 if (curr_pid[this_cpu] != (u32)-1) { 1493 /* 1494 * Are we trying to switch away a PID that is 1495 * not current? 1496 */ 1497 if (curr_pid[this_cpu] != switch_event.prev_pid) 1498 nr_context_switch_bugs++; 1499 } 1500 if (trace_handler->switch_event) 1501 trace_handler->switch_event(&switch_event, session, event, 1502 this_cpu, timestamp, thread); 1503 1504 curr_pid[this_cpu] = switch_event.next_pid; 1505 } 1506 1507 static void 1508 process_sched_runtime_event(void *data, struct perf_session *session, 1509 struct event *event, 1510 int cpu __used, 1511 u64 timestamp __used, 1512 struct thread *thread __used) 1513 { 1514 struct trace_runtime_event runtime_event; 1515 1516 FILL_ARRAY(runtime_event, comm, event, data); 1517 FILL_FIELD(runtime_event, pid, event, data); 1518 FILL_FIELD(runtime_event, runtime, event, data); 1519 FILL_FIELD(runtime_event, vruntime, event, data); 1520 1521 if (trace_handler->runtime_event) 1522 trace_handler->runtime_event(&runtime_event, session, event, cpu, timestamp, thread); 1523 } 1524 1525 static void 1526 process_sched_fork_event(void *data, 1527 struct event *event, 1528 int cpu __used, 1529 u64 timestamp __used, 1530 struct thread *thread __used) 1531 { 1532 struct trace_fork_event fork_event; 1533 1534 FILL_COMMON_FIELDS(fork_event, event, data); 1535 1536 FILL_ARRAY(fork_event, parent_comm, event, data); 1537 FILL_FIELD(fork_event, parent_pid, event, data); 1538 FILL_ARRAY(fork_event, child_comm, event, data); 1539 FILL_FIELD(fork_event, child_pid, event, data); 1540 1541 if (trace_handler->fork_event) 1542 trace_handler->fork_event(&fork_event, event, 1543 cpu, timestamp, thread); 1544 } 1545 1546 static void 1547 process_sched_exit_event(struct event *event, 1548 int cpu __used, 1549 u64 timestamp __used, 1550 struct thread *thread __used) 1551 { 1552 if (verbose) 1553 printf("sched_exit event %p\n", event); 1554 } 1555 1556 static void 1557 process_sched_migrate_task_event(void *data, struct perf_session *session, 1558 struct event *event, 1559 int cpu __used, 1560 u64 timestamp __used, 1561 struct thread *thread __used) 1562 { 1563 struct trace_migrate_task_event migrate_task_event; 1564 1565 FILL_COMMON_FIELDS(migrate_task_event, event, data); 1566 1567 FILL_ARRAY(migrate_task_event, comm, event, data); 1568 FILL_FIELD(migrate_task_event, pid, event, data); 1569 FILL_FIELD(migrate_task_event, prio, event, data); 1570 FILL_FIELD(migrate_task_event, cpu, event, data); 1571 1572 if (trace_handler->migrate_task_event) 1573 trace_handler->migrate_task_event(&migrate_task_event, session, 1574 event, cpu, timestamp, thread); 1575 } 1576 1577 static void process_raw_event(union perf_event *raw_event __used, 1578 struct perf_session *session, void *data, int cpu, 1579 u64 timestamp, struct thread *thread) 1580 { 1581 struct event *event; 1582 int type; 1583 1584 1585 type = trace_parse_common_type(data); 1586 event = trace_find_event(type); 1587 1588 if (!strcmp(event->name, "sched_switch")) 1589 process_sched_switch_event(data, session, event, cpu, timestamp, thread); 1590 if (!strcmp(event->name, "sched_stat_runtime")) 1591 process_sched_runtime_event(data, session, event, cpu, timestamp, thread); 1592 if (!strcmp(event->name, "sched_wakeup")) 1593 process_sched_wakeup_event(data, session, event, cpu, timestamp, thread); 1594 if (!strcmp(event->name, "sched_wakeup_new")) 1595 process_sched_wakeup_event(data, session, event, cpu, timestamp, thread); 1596 if (!strcmp(event->name, "sched_process_fork")) 1597 process_sched_fork_event(data, event, cpu, timestamp, thread); 1598 if (!strcmp(event->name, "sched_process_exit")) 1599 process_sched_exit_event(event, cpu, timestamp, thread); 1600 if (!strcmp(event->name, "sched_migrate_task")) 1601 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); 1602 } 1603 1604 static int process_sample_event(union perf_event *event, 1605 struct perf_sample *sample, 1606 struct perf_evsel *evsel __used, 1607 struct perf_session *session) 1608 { 1609 struct thread *thread; 1610 1611 if (!(session->sample_type & PERF_SAMPLE_RAW)) 1612 return 0; 1613 1614 thread = perf_session__findnew(session, sample->pid); 1615 if (thread == NULL) { 1616 pr_debug("problem processing %d event, skipping it.\n", 1617 event->header.type); 1618 return -1; 1619 } 1620 1621 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 1622 1623 if (profile_cpu != -1 && profile_cpu != (int)sample->cpu) 1624 return 0; 1625 1626 process_raw_event(event, session, sample->raw_data, sample->cpu, 1627 sample->time, thread); 1628 1629 return 0; 1630 } 1631 1632 static struct perf_event_ops event_ops = { 1633 .sample = process_sample_event, 1634 .comm = perf_event__process_comm, 1635 .lost = perf_event__process_lost, 1636 .fork = perf_event__process_task, 1637 .ordered_samples = true, 1638 }; 1639 1640 static void read_events(bool destroy, struct perf_session **psession) 1641 { 1642 int err = -EINVAL; 1643 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 1644 0, false, &event_ops); 1645 if (session == NULL) 1646 die("No Memory"); 1647 1648 if (perf_session__has_traces(session, "record -R")) { 1649 err = perf_session__process_events(session, &event_ops); 1650 if (err) 1651 die("Failed to process events, error %d", err); 1652 1653 nr_events = session->hists.stats.nr_events[0]; 1654 nr_lost_events = session->hists.stats.total_lost; 1655 nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; 1656 } 1657 1658 if (destroy) 1659 perf_session__delete(session); 1660 1661 if (psession) 1662 *psession = session; 1663 } 1664 1665 static void print_bad_events(void) 1666 { 1667 if (nr_unordered_timestamps && nr_timestamps) { 1668 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 1669 (double)nr_unordered_timestamps/(double)nr_timestamps*100.0, 1670 nr_unordered_timestamps, nr_timestamps); 1671 } 1672 if (nr_lost_events && nr_events) { 1673 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 1674 (double)nr_lost_events/(double)nr_events*100.0, 1675 nr_lost_events, nr_events, nr_lost_chunks); 1676 } 1677 if (nr_state_machine_bugs && nr_timestamps) { 1678 printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", 1679 (double)nr_state_machine_bugs/(double)nr_timestamps*100.0, 1680 nr_state_machine_bugs, nr_timestamps); 1681 if (nr_lost_events) 1682 printf(" (due to lost events?)"); 1683 printf("\n"); 1684 } 1685 if (nr_context_switch_bugs && nr_timestamps) { 1686 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 1687 (double)nr_context_switch_bugs/(double)nr_timestamps*100.0, 1688 nr_context_switch_bugs, nr_timestamps); 1689 if (nr_lost_events) 1690 printf(" (due to lost events?)"); 1691 printf("\n"); 1692 } 1693 } 1694 1695 static void __cmd_lat(void) 1696 { 1697 struct rb_node *next; 1698 struct perf_session *session; 1699 1700 setup_pager(); 1701 read_events(false, &session); 1702 sort_lat(); 1703 1704 printf("\n ---------------------------------------------------------------------------------------------------------------\n"); 1705 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); 1706 printf(" ---------------------------------------------------------------------------------------------------------------\n"); 1707 1708 next = rb_first(&sorted_atom_root); 1709 1710 while (next) { 1711 struct work_atoms *work_list; 1712 1713 work_list = rb_entry(next, struct work_atoms, node); 1714 output_lat_thread(work_list); 1715 next = rb_next(next); 1716 } 1717 1718 printf(" -----------------------------------------------------------------------------------------\n"); 1719 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 1720 (double)all_runtime/1e6, all_count); 1721 1722 printf(" ---------------------------------------------------\n"); 1723 1724 print_bad_events(); 1725 printf("\n"); 1726 1727 perf_session__delete(session); 1728 } 1729 1730 static struct trace_sched_handler map_ops = { 1731 .wakeup_event = NULL, 1732 .switch_event = map_switch_event, 1733 .runtime_event = NULL, 1734 .fork_event = NULL, 1735 }; 1736 1737 static void __cmd_map(void) 1738 { 1739 max_cpu = sysconf(_SC_NPROCESSORS_CONF); 1740 1741 setup_pager(); 1742 read_events(true, NULL); 1743 print_bad_events(); 1744 } 1745 1746 static void __cmd_replay(void) 1747 { 1748 unsigned long i; 1749 1750 calibrate_run_measurement_overhead(); 1751 calibrate_sleep_measurement_overhead(); 1752 1753 test_calibrations(); 1754 1755 read_events(true, NULL); 1756 1757 printf("nr_run_events: %ld\n", nr_run_events); 1758 printf("nr_sleep_events: %ld\n", nr_sleep_events); 1759 printf("nr_wakeup_events: %ld\n", nr_wakeup_events); 1760 1761 if (targetless_wakeups) 1762 printf("target-less wakeups: %ld\n", targetless_wakeups); 1763 if (multitarget_wakeups) 1764 printf("multi-target wakeups: %ld\n", multitarget_wakeups); 1765 if (nr_run_events_optimized) 1766 printf("run atoms optimized: %ld\n", 1767 nr_run_events_optimized); 1768 1769 print_task_traces(); 1770 add_cross_task_wakeups(); 1771 1772 create_tasks(); 1773 printf("------------------------------------------------------------\n"); 1774 for (i = 0; i < replay_repeat; i++) 1775 run_one_test(); 1776 } 1777 1778 1779 static const char * const sched_usage[] = { 1780 "perf sched [<options>] {record|latency|map|replay|script}", 1781 NULL 1782 }; 1783 1784 static const struct option sched_options[] = { 1785 OPT_STRING('i', "input", &input_name, "file", 1786 "input file name"), 1787 OPT_INCR('v', "verbose", &verbose, 1788 "be more verbose (show symbol address, etc)"), 1789 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1790 "dump raw trace in ASCII"), 1791 OPT_END() 1792 }; 1793 1794 static const char * const latency_usage[] = { 1795 "perf sched latency [<options>]", 1796 NULL 1797 }; 1798 1799 static const struct option latency_options[] = { 1800 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1801 "sort by key(s): runtime, switch, avg, max"), 1802 OPT_INCR('v', "verbose", &verbose, 1803 "be more verbose (show symbol address, etc)"), 1804 OPT_INTEGER('C', "CPU", &profile_cpu, 1805 "CPU to profile on"), 1806 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1807 "dump raw trace in ASCII"), 1808 OPT_END() 1809 }; 1810 1811 static const char * const replay_usage[] = { 1812 "perf sched replay [<options>]", 1813 NULL 1814 }; 1815 1816 static const struct option replay_options[] = { 1817 OPT_UINTEGER('r', "repeat", &replay_repeat, 1818 "repeat the workload replay N times (-1: infinite)"), 1819 OPT_INCR('v', "verbose", &verbose, 1820 "be more verbose (show symbol address, etc)"), 1821 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1822 "dump raw trace in ASCII"), 1823 OPT_END() 1824 }; 1825 1826 static void setup_sorting(void) 1827 { 1828 char *tmp, *tok, *str = strdup(sort_order); 1829 1830 for (tok = strtok_r(str, ", ", &tmp); 1831 tok; tok = strtok_r(NULL, ", ", &tmp)) { 1832 if (sort_dimension__add(tok, &sort_list) < 0) { 1833 error("Unknown --sort key: `%s'", tok); 1834 usage_with_options(latency_usage, latency_options); 1835 } 1836 } 1837 1838 free(str); 1839 1840 sort_dimension__add("pid", &cmp_pid); 1841 } 1842 1843 static const char *record_args[] = { 1844 "record", 1845 "-a", 1846 "-R", 1847 "-f", 1848 "-m", "1024", 1849 "-c", "1", 1850 "-e", "sched:sched_switch", 1851 "-e", "sched:sched_stat_wait", 1852 "-e", "sched:sched_stat_sleep", 1853 "-e", "sched:sched_stat_iowait", 1854 "-e", "sched:sched_stat_runtime", 1855 "-e", "sched:sched_process_exit", 1856 "-e", "sched:sched_process_fork", 1857 "-e", "sched:sched_wakeup", 1858 "-e", "sched:sched_migrate_task", 1859 }; 1860 1861 static int __cmd_record(int argc, const char **argv) 1862 { 1863 unsigned int rec_argc, i, j; 1864 const char **rec_argv; 1865 1866 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1867 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1868 1869 if (rec_argv == NULL) 1870 return -ENOMEM; 1871 1872 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1873 rec_argv[i] = strdup(record_args[i]); 1874 1875 for (j = 1; j < (unsigned int)argc; j++, i++) 1876 rec_argv[i] = argv[j]; 1877 1878 BUG_ON(i != rec_argc); 1879 1880 return cmd_record(i, rec_argv, NULL); 1881 } 1882 1883 int cmd_sched(int argc, const char **argv, const char *prefix __used) 1884 { 1885 argc = parse_options(argc, argv, sched_options, sched_usage, 1886 PARSE_OPT_STOP_AT_NON_OPTION); 1887 if (!argc) 1888 usage_with_options(sched_usage, sched_options); 1889 1890 /* 1891 * Aliased to 'perf script' for now: 1892 */ 1893 if (!strcmp(argv[0], "script")) 1894 return cmd_script(argc, argv, prefix); 1895 1896 symbol__init(); 1897 if (!strncmp(argv[0], "rec", 3)) { 1898 return __cmd_record(argc, argv); 1899 } else if (!strncmp(argv[0], "lat", 3)) { 1900 trace_handler = &lat_ops; 1901 if (argc > 1) { 1902 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 1903 if (argc) 1904 usage_with_options(latency_usage, latency_options); 1905 } 1906 setup_sorting(); 1907 __cmd_lat(); 1908 } else if (!strcmp(argv[0], "map")) { 1909 trace_handler = &map_ops; 1910 setup_sorting(); 1911 __cmd_map(); 1912 } else if (!strncmp(argv[0], "rep", 3)) { 1913 trace_handler = &replay_ops; 1914 if (argc) { 1915 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 1916 if (argc) 1917 usage_with_options(replay_usage, replay_options); 1918 } 1919 __cmd_replay(); 1920 } else { 1921 usage_with_options(sched_usage, sched_options); 1922 } 1923 1924 return 0; 1925 } 1926