1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/cache.h" 6 #include "util/symbol.h" 7 #include "util/thread.h" 8 #include "util/header.h" 9 #include "util/session.h" 10 11 #include "util/parse-options.h" 12 #include "util/trace-event.h" 13 14 #include "util/debug.h" 15 16 #include <sys/prctl.h> 17 18 #include <semaphore.h> 19 #include <pthread.h> 20 #include <math.h> 21 22 static char const *input_name = "perf.data"; 23 24 static char default_sort_order[] = "avg, max, switch, runtime"; 25 static const char *sort_order = default_sort_order; 26 27 static int profile_cpu = -1; 28 29 #define PR_SET_NAME 15 /* Set process name */ 30 #define MAX_CPUS 4096 31 32 static u64 run_measurement_overhead; 33 static u64 sleep_measurement_overhead; 34 35 #define COMM_LEN 20 36 #define SYM_LEN 129 37 38 #define MAX_PID 65536 39 40 static unsigned long nr_tasks; 41 42 struct sched_atom; 43 44 struct task_desc { 45 unsigned long nr; 46 unsigned long pid; 47 char comm[COMM_LEN]; 48 49 unsigned long nr_events; 50 unsigned long curr_event; 51 struct sched_atom **atoms; 52 53 pthread_t thread; 54 sem_t sleep_sem; 55 56 sem_t ready_for_work; 57 sem_t work_done_sem; 58 59 u64 cpu_usage; 60 }; 61 62 enum sched_event_type { 63 SCHED_EVENT_RUN, 64 SCHED_EVENT_SLEEP, 65 SCHED_EVENT_WAKEUP, 66 SCHED_EVENT_MIGRATION, 67 }; 68 69 struct sched_atom { 70 enum sched_event_type type; 71 int specific_wait; 72 u64 timestamp; 73 u64 duration; 74 unsigned long nr; 75 sem_t *wait_sem; 76 struct task_desc *wakee; 77 }; 78 79 static struct task_desc *pid_to_task[MAX_PID]; 80 81 static struct task_desc **tasks; 82 83 static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; 84 static u64 start_time; 85 86 static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; 87 88 static unsigned long nr_run_events; 89 static unsigned long nr_sleep_events; 90 static unsigned long nr_wakeup_events; 91 92 static unsigned long nr_sleep_corrections; 93 static unsigned long nr_run_events_optimized; 94 95 static unsigned long targetless_wakeups; 96 static unsigned long multitarget_wakeups; 97 98 static u64 cpu_usage; 99 static u64 runavg_cpu_usage; 100 static u64 parent_cpu_usage; 101 static u64 runavg_parent_cpu_usage; 102 103 static unsigned long nr_runs; 104 static u64 sum_runtime; 105 static u64 sum_fluct; 106 static u64 run_avg; 107 108 static unsigned int replay_repeat = 10; 109 static unsigned long nr_timestamps; 110 static unsigned long nr_unordered_timestamps; 111 static unsigned long nr_state_machine_bugs; 112 static unsigned long nr_context_switch_bugs; 113 static unsigned long nr_events; 114 static unsigned long nr_lost_chunks; 115 static unsigned long nr_lost_events; 116 117 #define TASK_STATE_TO_CHAR_STR "RSDTtZX" 118 119 enum thread_state { 120 THREAD_SLEEPING = 0, 121 THREAD_WAIT_CPU, 122 THREAD_SCHED_IN, 123 THREAD_IGNORE 124 }; 125 126 struct work_atom { 127 struct list_head list; 128 enum thread_state state; 129 u64 sched_out_time; 130 u64 wake_up_time; 131 u64 sched_in_time; 132 u64 runtime; 133 }; 134 135 struct work_atoms { 136 struct list_head work_list; 137 struct thread *thread; 138 struct rb_node node; 139 u64 max_lat; 140 u64 max_lat_at; 141 u64 total_lat; 142 u64 nb_atoms; 143 u64 total_runtime; 144 }; 145 146 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 147 148 static struct rb_root atom_root, sorted_atom_root; 149 150 static u64 all_runtime; 151 static u64 all_count; 152 153 154 static u64 get_nsecs(void) 155 { 156 struct timespec ts; 157 158 clock_gettime(CLOCK_MONOTONIC, &ts); 159 160 return ts.tv_sec * 1000000000ULL + ts.tv_nsec; 161 } 162 163 static void burn_nsecs(u64 nsecs) 164 { 165 u64 T0 = get_nsecs(), T1; 166 167 do { 168 T1 = get_nsecs(); 169 } while (T1 + run_measurement_overhead < T0 + nsecs); 170 } 171 172 static void sleep_nsecs(u64 nsecs) 173 { 174 struct timespec ts; 175 176 ts.tv_nsec = nsecs % 999999999; 177 ts.tv_sec = nsecs / 999999999; 178 179 nanosleep(&ts, NULL); 180 } 181 182 static void calibrate_run_measurement_overhead(void) 183 { 184 u64 T0, T1, delta, min_delta = 1000000000ULL; 185 int i; 186 187 for (i = 0; i < 10; i++) { 188 T0 = get_nsecs(); 189 burn_nsecs(0); 190 T1 = get_nsecs(); 191 delta = T1-T0; 192 min_delta = min(min_delta, delta); 193 } 194 run_measurement_overhead = min_delta; 195 196 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 197 } 198 199 static void calibrate_sleep_measurement_overhead(void) 200 { 201 u64 T0, T1, delta, min_delta = 1000000000ULL; 202 int i; 203 204 for (i = 0; i < 10; i++) { 205 T0 = get_nsecs(); 206 sleep_nsecs(10000); 207 T1 = get_nsecs(); 208 delta = T1-T0; 209 min_delta = min(min_delta, delta); 210 } 211 min_delta -= 10000; 212 sleep_measurement_overhead = min_delta; 213 214 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 215 } 216 217 static struct sched_atom * 218 get_new_event(struct task_desc *task, u64 timestamp) 219 { 220 struct sched_atom *event = zalloc(sizeof(*event)); 221 unsigned long idx = task->nr_events; 222 size_t size; 223 224 event->timestamp = timestamp; 225 event->nr = idx; 226 227 task->nr_events++; 228 size = sizeof(struct sched_atom *) * task->nr_events; 229 task->atoms = realloc(task->atoms, size); 230 BUG_ON(!task->atoms); 231 232 task->atoms[idx] = event; 233 234 return event; 235 } 236 237 static struct sched_atom *last_event(struct task_desc *task) 238 { 239 if (!task->nr_events) 240 return NULL; 241 242 return task->atoms[task->nr_events - 1]; 243 } 244 245 static void 246 add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) 247 { 248 struct sched_atom *event, *curr_event = last_event(task); 249 250 /* 251 * optimize an existing RUN event by merging this one 252 * to it: 253 */ 254 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 255 nr_run_events_optimized++; 256 curr_event->duration += duration; 257 return; 258 } 259 260 event = get_new_event(task, timestamp); 261 262 event->type = SCHED_EVENT_RUN; 263 event->duration = duration; 264 265 nr_run_events++; 266 } 267 268 static void 269 add_sched_event_wakeup(struct task_desc *task, u64 timestamp, 270 struct task_desc *wakee) 271 { 272 struct sched_atom *event, *wakee_event; 273 274 event = get_new_event(task, timestamp); 275 event->type = SCHED_EVENT_WAKEUP; 276 event->wakee = wakee; 277 278 wakee_event = last_event(wakee); 279 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 280 targetless_wakeups++; 281 return; 282 } 283 if (wakee_event->wait_sem) { 284 multitarget_wakeups++; 285 return; 286 } 287 288 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 289 sem_init(wakee_event->wait_sem, 0, 0); 290 wakee_event->specific_wait = 1; 291 event->wait_sem = wakee_event->wait_sem; 292 293 nr_wakeup_events++; 294 } 295 296 static void 297 add_sched_event_sleep(struct task_desc *task, u64 timestamp, 298 u64 task_state __used) 299 { 300 struct sched_atom *event = get_new_event(task, timestamp); 301 302 event->type = SCHED_EVENT_SLEEP; 303 304 nr_sleep_events++; 305 } 306 307 static struct task_desc *register_pid(unsigned long pid, const char *comm) 308 { 309 struct task_desc *task; 310 311 BUG_ON(pid >= MAX_PID); 312 313 task = pid_to_task[pid]; 314 315 if (task) 316 return task; 317 318 task = zalloc(sizeof(*task)); 319 task->pid = pid; 320 task->nr = nr_tasks; 321 strcpy(task->comm, comm); 322 /* 323 * every task starts in sleeping state - this gets ignored 324 * if there's no wakeup pointing to this sleep state: 325 */ 326 add_sched_event_sleep(task, 0, 0); 327 328 pid_to_task[pid] = task; 329 nr_tasks++; 330 tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *)); 331 BUG_ON(!tasks); 332 tasks[task->nr] = task; 333 334 if (verbose) 335 printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm); 336 337 return task; 338 } 339 340 341 static void print_task_traces(void) 342 { 343 struct task_desc *task; 344 unsigned long i; 345 346 for (i = 0; i < nr_tasks; i++) { 347 task = tasks[i]; 348 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 349 task->nr, task->comm, task->pid, task->nr_events); 350 } 351 } 352 353 static void add_cross_task_wakeups(void) 354 { 355 struct task_desc *task1, *task2; 356 unsigned long i, j; 357 358 for (i = 0; i < nr_tasks; i++) { 359 task1 = tasks[i]; 360 j = i + 1; 361 if (j == nr_tasks) 362 j = 0; 363 task2 = tasks[j]; 364 add_sched_event_wakeup(task1, 0, task2); 365 } 366 } 367 368 static void 369 process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) 370 { 371 int ret = 0; 372 u64 now; 373 long long delta; 374 375 now = get_nsecs(); 376 delta = start_time + atom->timestamp - now; 377 378 switch (atom->type) { 379 case SCHED_EVENT_RUN: 380 burn_nsecs(atom->duration); 381 break; 382 case SCHED_EVENT_SLEEP: 383 if (atom->wait_sem) 384 ret = sem_wait(atom->wait_sem); 385 BUG_ON(ret); 386 break; 387 case SCHED_EVENT_WAKEUP: 388 if (atom->wait_sem) 389 ret = sem_post(atom->wait_sem); 390 BUG_ON(ret); 391 break; 392 case SCHED_EVENT_MIGRATION: 393 break; 394 default: 395 BUG_ON(1); 396 } 397 } 398 399 static u64 get_cpu_usage_nsec_parent(void) 400 { 401 struct rusage ru; 402 u64 sum; 403 int err; 404 405 err = getrusage(RUSAGE_SELF, &ru); 406 BUG_ON(err); 407 408 sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3; 409 sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3; 410 411 return sum; 412 } 413 414 static int self_open_counters(void) 415 { 416 struct perf_event_attr attr; 417 int fd; 418 419 memset(&attr, 0, sizeof(attr)); 420 421 attr.type = PERF_TYPE_SOFTWARE; 422 attr.config = PERF_COUNT_SW_TASK_CLOCK; 423 424 fd = sys_perf_event_open(&attr, 0, -1, -1, 0); 425 426 if (fd < 0) 427 die("Error: sys_perf_event_open() syscall returned" 428 "with %d (%s)\n", fd, strerror(errno)); 429 return fd; 430 } 431 432 static u64 get_cpu_usage_nsec_self(int fd) 433 { 434 u64 runtime; 435 int ret; 436 437 ret = read(fd, &runtime, sizeof(runtime)); 438 BUG_ON(ret != sizeof(runtime)); 439 440 return runtime; 441 } 442 443 static void *thread_func(void *ctx) 444 { 445 struct task_desc *this_task = ctx; 446 u64 cpu_usage_0, cpu_usage_1; 447 unsigned long i, ret; 448 char comm2[22]; 449 int fd; 450 451 sprintf(comm2, ":%s", this_task->comm); 452 prctl(PR_SET_NAME, comm2); 453 fd = self_open_counters(); 454 455 again: 456 ret = sem_post(&this_task->ready_for_work); 457 BUG_ON(ret); 458 ret = pthread_mutex_lock(&start_work_mutex); 459 BUG_ON(ret); 460 ret = pthread_mutex_unlock(&start_work_mutex); 461 BUG_ON(ret); 462 463 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 464 465 for (i = 0; i < this_task->nr_events; i++) { 466 this_task->curr_event = i; 467 process_sched_event(this_task, this_task->atoms[i]); 468 } 469 470 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 471 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 472 ret = sem_post(&this_task->work_done_sem); 473 BUG_ON(ret); 474 475 ret = pthread_mutex_lock(&work_done_wait_mutex); 476 BUG_ON(ret); 477 ret = pthread_mutex_unlock(&work_done_wait_mutex); 478 BUG_ON(ret); 479 480 goto again; 481 } 482 483 static void create_tasks(void) 484 { 485 struct task_desc *task; 486 pthread_attr_t attr; 487 unsigned long i; 488 int err; 489 490 err = pthread_attr_init(&attr); 491 BUG_ON(err); 492 err = pthread_attr_setstacksize(&attr, 493 (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); 494 BUG_ON(err); 495 err = pthread_mutex_lock(&start_work_mutex); 496 BUG_ON(err); 497 err = pthread_mutex_lock(&work_done_wait_mutex); 498 BUG_ON(err); 499 for (i = 0; i < nr_tasks; i++) { 500 task = tasks[i]; 501 sem_init(&task->sleep_sem, 0, 0); 502 sem_init(&task->ready_for_work, 0, 0); 503 sem_init(&task->work_done_sem, 0, 0); 504 task->curr_event = 0; 505 err = pthread_create(&task->thread, &attr, thread_func, task); 506 BUG_ON(err); 507 } 508 } 509 510 static void wait_for_tasks(void) 511 { 512 u64 cpu_usage_0, cpu_usage_1; 513 struct task_desc *task; 514 unsigned long i, ret; 515 516 start_time = get_nsecs(); 517 cpu_usage = 0; 518 pthread_mutex_unlock(&work_done_wait_mutex); 519 520 for (i = 0; i < nr_tasks; i++) { 521 task = tasks[i]; 522 ret = sem_wait(&task->ready_for_work); 523 BUG_ON(ret); 524 sem_init(&task->ready_for_work, 0, 0); 525 } 526 ret = pthread_mutex_lock(&work_done_wait_mutex); 527 BUG_ON(ret); 528 529 cpu_usage_0 = get_cpu_usage_nsec_parent(); 530 531 pthread_mutex_unlock(&start_work_mutex); 532 533 for (i = 0; i < nr_tasks; i++) { 534 task = tasks[i]; 535 ret = sem_wait(&task->work_done_sem); 536 BUG_ON(ret); 537 sem_init(&task->work_done_sem, 0, 0); 538 cpu_usage += task->cpu_usage; 539 task->cpu_usage = 0; 540 } 541 542 cpu_usage_1 = get_cpu_usage_nsec_parent(); 543 if (!runavg_cpu_usage) 544 runavg_cpu_usage = cpu_usage; 545 runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10; 546 547 parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 548 if (!runavg_parent_cpu_usage) 549 runavg_parent_cpu_usage = parent_cpu_usage; 550 runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 + 551 parent_cpu_usage)/10; 552 553 ret = pthread_mutex_lock(&start_work_mutex); 554 BUG_ON(ret); 555 556 for (i = 0; i < nr_tasks; i++) { 557 task = tasks[i]; 558 sem_init(&task->sleep_sem, 0, 0); 559 task->curr_event = 0; 560 } 561 } 562 563 static void run_one_test(void) 564 { 565 u64 T0, T1, delta, avg_delta, fluct, std_dev; 566 567 T0 = get_nsecs(); 568 wait_for_tasks(); 569 T1 = get_nsecs(); 570 571 delta = T1 - T0; 572 sum_runtime += delta; 573 nr_runs++; 574 575 avg_delta = sum_runtime / nr_runs; 576 if (delta < avg_delta) 577 fluct = avg_delta - delta; 578 else 579 fluct = delta - avg_delta; 580 sum_fluct += fluct; 581 std_dev = sum_fluct / nr_runs / sqrt(nr_runs); 582 if (!run_avg) 583 run_avg = delta; 584 run_avg = (run_avg*9 + delta)/10; 585 586 printf("#%-3ld: %0.3f, ", 587 nr_runs, (double)delta/1000000.0); 588 589 printf("ravg: %0.2f, ", 590 (double)run_avg/1e6); 591 592 printf("cpu: %0.2f / %0.2f", 593 (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6); 594 595 #if 0 596 /* 597 * rusage statistics done by the parent, these are less 598 * accurate than the sum_exec_runtime based statistics: 599 */ 600 printf(" [%0.2f / %0.2f]", 601 (double)parent_cpu_usage/1e6, 602 (double)runavg_parent_cpu_usage/1e6); 603 #endif 604 605 printf("\n"); 606 607 if (nr_sleep_corrections) 608 printf(" (%ld sleep corrections)\n", nr_sleep_corrections); 609 nr_sleep_corrections = 0; 610 } 611 612 static void test_calibrations(void) 613 { 614 u64 T0, T1; 615 616 T0 = get_nsecs(); 617 burn_nsecs(1e6); 618 T1 = get_nsecs(); 619 620 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 621 622 T0 = get_nsecs(); 623 sleep_nsecs(1e6); 624 T1 = get_nsecs(); 625 626 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 627 } 628 629 #define FILL_FIELD(ptr, field, event, data) \ 630 ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) 631 632 #define FILL_ARRAY(ptr, array, event, data) \ 633 do { \ 634 void *__array = raw_field_ptr(event, #array, data); \ 635 memcpy(ptr.array, __array, sizeof(ptr.array)); \ 636 } while(0) 637 638 #define FILL_COMMON_FIELDS(ptr, event, data) \ 639 do { \ 640 FILL_FIELD(ptr, common_type, event, data); \ 641 FILL_FIELD(ptr, common_flags, event, data); \ 642 FILL_FIELD(ptr, common_preempt_count, event, data); \ 643 FILL_FIELD(ptr, common_pid, event, data); \ 644 FILL_FIELD(ptr, common_tgid, event, data); \ 645 } while (0) 646 647 648 649 struct trace_switch_event { 650 u32 size; 651 652 u16 common_type; 653 u8 common_flags; 654 u8 common_preempt_count; 655 u32 common_pid; 656 u32 common_tgid; 657 658 char prev_comm[16]; 659 u32 prev_pid; 660 u32 prev_prio; 661 u64 prev_state; 662 char next_comm[16]; 663 u32 next_pid; 664 u32 next_prio; 665 }; 666 667 struct trace_runtime_event { 668 u32 size; 669 670 u16 common_type; 671 u8 common_flags; 672 u8 common_preempt_count; 673 u32 common_pid; 674 u32 common_tgid; 675 676 char comm[16]; 677 u32 pid; 678 u64 runtime; 679 u64 vruntime; 680 }; 681 682 struct trace_wakeup_event { 683 u32 size; 684 685 u16 common_type; 686 u8 common_flags; 687 u8 common_preempt_count; 688 u32 common_pid; 689 u32 common_tgid; 690 691 char comm[16]; 692 u32 pid; 693 694 u32 prio; 695 u32 success; 696 u32 cpu; 697 }; 698 699 struct trace_fork_event { 700 u32 size; 701 702 u16 common_type; 703 u8 common_flags; 704 u8 common_preempt_count; 705 u32 common_pid; 706 u32 common_tgid; 707 708 char parent_comm[16]; 709 u32 parent_pid; 710 char child_comm[16]; 711 u32 child_pid; 712 }; 713 714 struct trace_migrate_task_event { 715 u32 size; 716 717 u16 common_type; 718 u8 common_flags; 719 u8 common_preempt_count; 720 u32 common_pid; 721 u32 common_tgid; 722 723 char comm[16]; 724 u32 pid; 725 726 u32 prio; 727 u32 cpu; 728 }; 729 730 struct trace_sched_handler { 731 void (*switch_event)(struct trace_switch_event *, 732 struct perf_session *, 733 struct event *, 734 int cpu, 735 u64 timestamp, 736 struct thread *thread); 737 738 void (*runtime_event)(struct trace_runtime_event *, 739 struct perf_session *, 740 struct event *, 741 int cpu, 742 u64 timestamp, 743 struct thread *thread); 744 745 void (*wakeup_event)(struct trace_wakeup_event *, 746 struct perf_session *, 747 struct event *, 748 int cpu, 749 u64 timestamp, 750 struct thread *thread); 751 752 void (*fork_event)(struct trace_fork_event *, 753 struct event *, 754 int cpu, 755 u64 timestamp, 756 struct thread *thread); 757 758 void (*migrate_task_event)(struct trace_migrate_task_event *, 759 struct perf_session *session, 760 struct event *, 761 int cpu, 762 u64 timestamp, 763 struct thread *thread); 764 }; 765 766 767 static void 768 replay_wakeup_event(struct trace_wakeup_event *wakeup_event, 769 struct perf_session *session __used, 770 struct event *event, 771 int cpu __used, 772 u64 timestamp __used, 773 struct thread *thread __used) 774 { 775 struct task_desc *waker, *wakee; 776 777 if (verbose) { 778 printf("sched_wakeup event %p\n", event); 779 780 printf(" ... pid %d woke up %s/%d\n", 781 wakeup_event->common_pid, 782 wakeup_event->comm, 783 wakeup_event->pid); 784 } 785 786 waker = register_pid(wakeup_event->common_pid, "<unknown>"); 787 wakee = register_pid(wakeup_event->pid, wakeup_event->comm); 788 789 add_sched_event_wakeup(waker, timestamp, wakee); 790 } 791 792 static u64 cpu_last_switched[MAX_CPUS]; 793 794 static void 795 replay_switch_event(struct trace_switch_event *switch_event, 796 struct perf_session *session __used, 797 struct event *event, 798 int cpu, 799 u64 timestamp, 800 struct thread *thread __used) 801 { 802 struct task_desc *prev, *next; 803 u64 timestamp0; 804 s64 delta; 805 806 if (verbose) 807 printf("sched_switch event %p\n", event); 808 809 if (cpu >= MAX_CPUS || cpu < 0) 810 return; 811 812 timestamp0 = cpu_last_switched[cpu]; 813 if (timestamp0) 814 delta = timestamp - timestamp0; 815 else 816 delta = 0; 817 818 if (delta < 0) 819 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 820 821 if (verbose) { 822 printf(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 823 switch_event->prev_comm, switch_event->prev_pid, 824 switch_event->next_comm, switch_event->next_pid, 825 delta); 826 } 827 828 prev = register_pid(switch_event->prev_pid, switch_event->prev_comm); 829 next = register_pid(switch_event->next_pid, switch_event->next_comm); 830 831 cpu_last_switched[cpu] = timestamp; 832 833 add_sched_event_run(prev, timestamp, delta); 834 add_sched_event_sleep(prev, timestamp, switch_event->prev_state); 835 } 836 837 838 static void 839 replay_fork_event(struct trace_fork_event *fork_event, 840 struct event *event, 841 int cpu __used, 842 u64 timestamp __used, 843 struct thread *thread __used) 844 { 845 if (verbose) { 846 printf("sched_fork event %p\n", event); 847 printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); 848 printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); 849 } 850 register_pid(fork_event->parent_pid, fork_event->parent_comm); 851 register_pid(fork_event->child_pid, fork_event->child_comm); 852 } 853 854 static struct trace_sched_handler replay_ops = { 855 .wakeup_event = replay_wakeup_event, 856 .switch_event = replay_switch_event, 857 .fork_event = replay_fork_event, 858 }; 859 860 struct sort_dimension { 861 const char *name; 862 sort_fn_t cmp; 863 struct list_head list; 864 }; 865 866 static LIST_HEAD(cmp_pid); 867 868 static int 869 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 870 { 871 struct sort_dimension *sort; 872 int ret = 0; 873 874 BUG_ON(list_empty(list)); 875 876 list_for_each_entry(sort, list, list) { 877 ret = sort->cmp(l, r); 878 if (ret) 879 return ret; 880 } 881 882 return ret; 883 } 884 885 static struct work_atoms * 886 thread_atoms_search(struct rb_root *root, struct thread *thread, 887 struct list_head *sort_list) 888 { 889 struct rb_node *node = root->rb_node; 890 struct work_atoms key = { .thread = thread }; 891 892 while (node) { 893 struct work_atoms *atoms; 894 int cmp; 895 896 atoms = container_of(node, struct work_atoms, node); 897 898 cmp = thread_lat_cmp(sort_list, &key, atoms); 899 if (cmp > 0) 900 node = node->rb_left; 901 else if (cmp < 0) 902 node = node->rb_right; 903 else { 904 BUG_ON(thread != atoms->thread); 905 return atoms; 906 } 907 } 908 return NULL; 909 } 910 911 static void 912 __thread_latency_insert(struct rb_root *root, struct work_atoms *data, 913 struct list_head *sort_list) 914 { 915 struct rb_node **new = &(root->rb_node), *parent = NULL; 916 917 while (*new) { 918 struct work_atoms *this; 919 int cmp; 920 921 this = container_of(*new, struct work_atoms, node); 922 parent = *new; 923 924 cmp = thread_lat_cmp(sort_list, data, this); 925 926 if (cmp > 0) 927 new = &((*new)->rb_left); 928 else 929 new = &((*new)->rb_right); 930 } 931 932 rb_link_node(&data->node, parent, new); 933 rb_insert_color(&data->node, root); 934 } 935 936 static void thread_atoms_insert(struct thread *thread) 937 { 938 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 939 if (!atoms) 940 die("No memory"); 941 942 atoms->thread = thread; 943 INIT_LIST_HEAD(&atoms->work_list); 944 __thread_latency_insert(&atom_root, atoms, &cmp_pid); 945 } 946 947 static void 948 latency_fork_event(struct trace_fork_event *fork_event __used, 949 struct event *event __used, 950 int cpu __used, 951 u64 timestamp __used, 952 struct thread *thread __used) 953 { 954 /* should insert the newcomer */ 955 } 956 957 __used 958 static char sched_out_state(struct trace_switch_event *switch_event) 959 { 960 const char *str = TASK_STATE_TO_CHAR_STR; 961 962 return str[switch_event->prev_state]; 963 } 964 965 static void 966 add_sched_out_event(struct work_atoms *atoms, 967 char run_state, 968 u64 timestamp) 969 { 970 struct work_atom *atom = zalloc(sizeof(*atom)); 971 if (!atom) 972 die("Non memory"); 973 974 atom->sched_out_time = timestamp; 975 976 if (run_state == 'R') { 977 atom->state = THREAD_WAIT_CPU; 978 atom->wake_up_time = atom->sched_out_time; 979 } 980 981 list_add_tail(&atom->list, &atoms->work_list); 982 } 983 984 static void 985 add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) 986 { 987 struct work_atom *atom; 988 989 BUG_ON(list_empty(&atoms->work_list)); 990 991 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 992 993 atom->runtime += delta; 994 atoms->total_runtime += delta; 995 } 996 997 static void 998 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 999 { 1000 struct work_atom *atom; 1001 u64 delta; 1002 1003 if (list_empty(&atoms->work_list)) 1004 return; 1005 1006 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1007 1008 if (atom->state != THREAD_WAIT_CPU) 1009 return; 1010 1011 if (timestamp < atom->wake_up_time) { 1012 atom->state = THREAD_IGNORE; 1013 return; 1014 } 1015 1016 atom->state = THREAD_SCHED_IN; 1017 atom->sched_in_time = timestamp; 1018 1019 delta = atom->sched_in_time - atom->wake_up_time; 1020 atoms->total_lat += delta; 1021 if (delta > atoms->max_lat) { 1022 atoms->max_lat = delta; 1023 atoms->max_lat_at = timestamp; 1024 } 1025 atoms->nb_atoms++; 1026 } 1027 1028 static void 1029 latency_switch_event(struct trace_switch_event *switch_event, 1030 struct perf_session *session, 1031 struct event *event __used, 1032 int cpu, 1033 u64 timestamp, 1034 struct thread *thread __used) 1035 { 1036 struct work_atoms *out_events, *in_events; 1037 struct thread *sched_out, *sched_in; 1038 u64 timestamp0; 1039 s64 delta; 1040 1041 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1042 1043 timestamp0 = cpu_last_switched[cpu]; 1044 cpu_last_switched[cpu] = timestamp; 1045 if (timestamp0) 1046 delta = timestamp - timestamp0; 1047 else 1048 delta = 0; 1049 1050 if (delta < 0) 1051 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1052 1053 1054 sched_out = perf_session__findnew(session, switch_event->prev_pid); 1055 sched_in = perf_session__findnew(session, switch_event->next_pid); 1056 1057 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1058 if (!out_events) { 1059 thread_atoms_insert(sched_out); 1060 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1061 if (!out_events) 1062 die("out-event: Internal tree error"); 1063 } 1064 add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); 1065 1066 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1067 if (!in_events) { 1068 thread_atoms_insert(sched_in); 1069 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1070 if (!in_events) 1071 die("in-event: Internal tree error"); 1072 /* 1073 * Take came in we have not heard about yet, 1074 * add in an initial atom in runnable state: 1075 */ 1076 add_sched_out_event(in_events, 'R', timestamp); 1077 } 1078 add_sched_in_event(in_events, timestamp); 1079 } 1080 1081 static void 1082 latency_runtime_event(struct trace_runtime_event *runtime_event, 1083 struct perf_session *session, 1084 struct event *event __used, 1085 int cpu, 1086 u64 timestamp, 1087 struct thread *this_thread __used) 1088 { 1089 struct thread *thread = perf_session__findnew(session, runtime_event->pid); 1090 struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1091 1092 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1093 if (!atoms) { 1094 thread_atoms_insert(thread); 1095 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1096 if (!atoms) 1097 die("in-event: Internal tree error"); 1098 add_sched_out_event(atoms, 'R', timestamp); 1099 } 1100 1101 add_runtime_event(atoms, runtime_event->runtime, timestamp); 1102 } 1103 1104 static void 1105 latency_wakeup_event(struct trace_wakeup_event *wakeup_event, 1106 struct perf_session *session, 1107 struct event *__event __used, 1108 int cpu __used, 1109 u64 timestamp, 1110 struct thread *thread __used) 1111 { 1112 struct work_atoms *atoms; 1113 struct work_atom *atom; 1114 struct thread *wakee; 1115 1116 /* Note for later, it may be interesting to observe the failing cases */ 1117 if (!wakeup_event->success) 1118 return; 1119 1120 wakee = perf_session__findnew(session, wakeup_event->pid); 1121 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1122 if (!atoms) { 1123 thread_atoms_insert(wakee); 1124 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1125 if (!atoms) 1126 die("wakeup-event: Internal tree error"); 1127 add_sched_out_event(atoms, 'S', timestamp); 1128 } 1129 1130 BUG_ON(list_empty(&atoms->work_list)); 1131 1132 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1133 1134 /* 1135 * You WILL be missing events if you've recorded only 1136 * one CPU, or are only looking at only one, so don't 1137 * make useless noise. 1138 */ 1139 if (profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1140 nr_state_machine_bugs++; 1141 1142 nr_timestamps++; 1143 if (atom->sched_out_time > timestamp) { 1144 nr_unordered_timestamps++; 1145 return; 1146 } 1147 1148 atom->state = THREAD_WAIT_CPU; 1149 atom->wake_up_time = timestamp; 1150 } 1151 1152 static void 1153 latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, 1154 struct perf_session *session, 1155 struct event *__event __used, 1156 int cpu __used, 1157 u64 timestamp, 1158 struct thread *thread __used) 1159 { 1160 struct work_atoms *atoms; 1161 struct work_atom *atom; 1162 struct thread *migrant; 1163 1164 /* 1165 * Only need to worry about migration when profiling one CPU. 1166 */ 1167 if (profile_cpu == -1) 1168 return; 1169 1170 migrant = perf_session__findnew(session, migrate_task_event->pid); 1171 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1172 if (!atoms) { 1173 thread_atoms_insert(migrant); 1174 register_pid(migrant->pid, migrant->comm); 1175 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1176 if (!atoms) 1177 die("migration-event: Internal tree error"); 1178 add_sched_out_event(atoms, 'R', timestamp); 1179 } 1180 1181 BUG_ON(list_empty(&atoms->work_list)); 1182 1183 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1184 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1185 1186 nr_timestamps++; 1187 1188 if (atom->sched_out_time > timestamp) 1189 nr_unordered_timestamps++; 1190 } 1191 1192 static struct trace_sched_handler lat_ops = { 1193 .wakeup_event = latency_wakeup_event, 1194 .switch_event = latency_switch_event, 1195 .runtime_event = latency_runtime_event, 1196 .fork_event = latency_fork_event, 1197 .migrate_task_event = latency_migrate_task_event, 1198 }; 1199 1200 static void output_lat_thread(struct work_atoms *work_list) 1201 { 1202 int i; 1203 int ret; 1204 u64 avg; 1205 1206 if (!work_list->nb_atoms) 1207 return; 1208 /* 1209 * Ignore idle threads: 1210 */ 1211 if (!strcmp(work_list->thread->comm, "swapper")) 1212 return; 1213 1214 all_runtime += work_list->total_runtime; 1215 all_count += work_list->nb_atoms; 1216 1217 ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); 1218 1219 for (i = 0; i < 24 - ret; i++) 1220 printf(" "); 1221 1222 avg = work_list->total_lat / work_list->nb_atoms; 1223 1224 printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n", 1225 (double)work_list->total_runtime / 1e6, 1226 work_list->nb_atoms, (double)avg / 1e6, 1227 (double)work_list->max_lat / 1e6, 1228 (double)work_list->max_lat_at / 1e9); 1229 } 1230 1231 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1232 { 1233 if (l->thread->pid < r->thread->pid) 1234 return -1; 1235 if (l->thread->pid > r->thread->pid) 1236 return 1; 1237 1238 return 0; 1239 } 1240 1241 static struct sort_dimension pid_sort_dimension = { 1242 .name = "pid", 1243 .cmp = pid_cmp, 1244 }; 1245 1246 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1247 { 1248 u64 avgl, avgr; 1249 1250 if (!l->nb_atoms) 1251 return -1; 1252 1253 if (!r->nb_atoms) 1254 return 1; 1255 1256 avgl = l->total_lat / l->nb_atoms; 1257 avgr = r->total_lat / r->nb_atoms; 1258 1259 if (avgl < avgr) 1260 return -1; 1261 if (avgl > avgr) 1262 return 1; 1263 1264 return 0; 1265 } 1266 1267 static struct sort_dimension avg_sort_dimension = { 1268 .name = "avg", 1269 .cmp = avg_cmp, 1270 }; 1271 1272 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1273 { 1274 if (l->max_lat < r->max_lat) 1275 return -1; 1276 if (l->max_lat > r->max_lat) 1277 return 1; 1278 1279 return 0; 1280 } 1281 1282 static struct sort_dimension max_sort_dimension = { 1283 .name = "max", 1284 .cmp = max_cmp, 1285 }; 1286 1287 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1288 { 1289 if (l->nb_atoms < r->nb_atoms) 1290 return -1; 1291 if (l->nb_atoms > r->nb_atoms) 1292 return 1; 1293 1294 return 0; 1295 } 1296 1297 static struct sort_dimension switch_sort_dimension = { 1298 .name = "switch", 1299 .cmp = switch_cmp, 1300 }; 1301 1302 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1303 { 1304 if (l->total_runtime < r->total_runtime) 1305 return -1; 1306 if (l->total_runtime > r->total_runtime) 1307 return 1; 1308 1309 return 0; 1310 } 1311 1312 static struct sort_dimension runtime_sort_dimension = { 1313 .name = "runtime", 1314 .cmp = runtime_cmp, 1315 }; 1316 1317 static struct sort_dimension *available_sorts[] = { 1318 &pid_sort_dimension, 1319 &avg_sort_dimension, 1320 &max_sort_dimension, 1321 &switch_sort_dimension, 1322 &runtime_sort_dimension, 1323 }; 1324 1325 #define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *)) 1326 1327 static LIST_HEAD(sort_list); 1328 1329 static int sort_dimension__add(const char *tok, struct list_head *list) 1330 { 1331 int i; 1332 1333 for (i = 0; i < NB_AVAILABLE_SORTS; i++) { 1334 if (!strcmp(available_sorts[i]->name, tok)) { 1335 list_add_tail(&available_sorts[i]->list, list); 1336 1337 return 0; 1338 } 1339 } 1340 1341 return -1; 1342 } 1343 1344 static void setup_sorting(void); 1345 1346 static void sort_lat(void) 1347 { 1348 struct rb_node *node; 1349 1350 for (;;) { 1351 struct work_atoms *data; 1352 node = rb_first(&atom_root); 1353 if (!node) 1354 break; 1355 1356 rb_erase(node, &atom_root); 1357 data = rb_entry(node, struct work_atoms, node); 1358 __thread_latency_insert(&sorted_atom_root, data, &sort_list); 1359 } 1360 } 1361 1362 static struct trace_sched_handler *trace_handler; 1363 1364 static void 1365 process_sched_wakeup_event(void *data, struct perf_session *session, 1366 struct event *event, 1367 int cpu __used, 1368 u64 timestamp __used, 1369 struct thread *thread __used) 1370 { 1371 struct trace_wakeup_event wakeup_event; 1372 1373 FILL_COMMON_FIELDS(wakeup_event, event, data); 1374 1375 FILL_ARRAY(wakeup_event, comm, event, data); 1376 FILL_FIELD(wakeup_event, pid, event, data); 1377 FILL_FIELD(wakeup_event, prio, event, data); 1378 FILL_FIELD(wakeup_event, success, event, data); 1379 FILL_FIELD(wakeup_event, cpu, event, data); 1380 1381 if (trace_handler->wakeup_event) 1382 trace_handler->wakeup_event(&wakeup_event, session, event, 1383 cpu, timestamp, thread); 1384 } 1385 1386 /* 1387 * Track the current task - that way we can know whether there's any 1388 * weird events, such as a task being switched away that is not current. 1389 */ 1390 static int max_cpu; 1391 1392 static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; 1393 1394 static struct thread *curr_thread[MAX_CPUS]; 1395 1396 static char next_shortname1 = 'A'; 1397 static char next_shortname2 = '0'; 1398 1399 static void 1400 map_switch_event(struct trace_switch_event *switch_event, 1401 struct perf_session *session, 1402 struct event *event __used, 1403 int this_cpu, 1404 u64 timestamp, 1405 struct thread *thread __used) 1406 { 1407 struct thread *sched_out, *sched_in; 1408 int new_shortname; 1409 u64 timestamp0; 1410 s64 delta; 1411 int cpu; 1412 1413 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); 1414 1415 if (this_cpu > max_cpu) 1416 max_cpu = this_cpu; 1417 1418 timestamp0 = cpu_last_switched[this_cpu]; 1419 cpu_last_switched[this_cpu] = timestamp; 1420 if (timestamp0) 1421 delta = timestamp - timestamp0; 1422 else 1423 delta = 0; 1424 1425 if (delta < 0) 1426 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1427 1428 1429 sched_out = perf_session__findnew(session, switch_event->prev_pid); 1430 sched_in = perf_session__findnew(session, switch_event->next_pid); 1431 1432 curr_thread[this_cpu] = sched_in; 1433 1434 printf(" "); 1435 1436 new_shortname = 0; 1437 if (!sched_in->shortname[0]) { 1438 sched_in->shortname[0] = next_shortname1; 1439 sched_in->shortname[1] = next_shortname2; 1440 1441 if (next_shortname1 < 'Z') { 1442 next_shortname1++; 1443 } else { 1444 next_shortname1='A'; 1445 if (next_shortname2 < '9') { 1446 next_shortname2++; 1447 } else { 1448 next_shortname2='0'; 1449 } 1450 } 1451 new_shortname = 1; 1452 } 1453 1454 for (cpu = 0; cpu <= max_cpu; cpu++) { 1455 if (cpu != this_cpu) 1456 printf(" "); 1457 else 1458 printf("*"); 1459 1460 if (curr_thread[cpu]) { 1461 if (curr_thread[cpu]->pid) 1462 printf("%2s ", curr_thread[cpu]->shortname); 1463 else 1464 printf(". "); 1465 } else 1466 printf(" "); 1467 } 1468 1469 printf(" %12.6f secs ", (double)timestamp/1e9); 1470 if (new_shortname) { 1471 printf("%s => %s:%d\n", 1472 sched_in->shortname, sched_in->comm, sched_in->pid); 1473 } else { 1474 printf("\n"); 1475 } 1476 } 1477 1478 1479 static void 1480 process_sched_switch_event(void *data, struct perf_session *session, 1481 struct event *event, 1482 int this_cpu, 1483 u64 timestamp __used, 1484 struct thread *thread __used) 1485 { 1486 struct trace_switch_event switch_event; 1487 1488 FILL_COMMON_FIELDS(switch_event, event, data); 1489 1490 FILL_ARRAY(switch_event, prev_comm, event, data); 1491 FILL_FIELD(switch_event, prev_pid, event, data); 1492 FILL_FIELD(switch_event, prev_prio, event, data); 1493 FILL_FIELD(switch_event, prev_state, event, data); 1494 FILL_ARRAY(switch_event, next_comm, event, data); 1495 FILL_FIELD(switch_event, next_pid, event, data); 1496 FILL_FIELD(switch_event, next_prio, event, data); 1497 1498 if (curr_pid[this_cpu] != (u32)-1) { 1499 /* 1500 * Are we trying to switch away a PID that is 1501 * not current? 1502 */ 1503 if (curr_pid[this_cpu] != switch_event.prev_pid) 1504 nr_context_switch_bugs++; 1505 } 1506 if (trace_handler->switch_event) 1507 trace_handler->switch_event(&switch_event, session, event, 1508 this_cpu, timestamp, thread); 1509 1510 curr_pid[this_cpu] = switch_event.next_pid; 1511 } 1512 1513 static void 1514 process_sched_runtime_event(void *data, struct perf_session *session, 1515 struct event *event, 1516 int cpu __used, 1517 u64 timestamp __used, 1518 struct thread *thread __used) 1519 { 1520 struct trace_runtime_event runtime_event; 1521 1522 FILL_ARRAY(runtime_event, comm, event, data); 1523 FILL_FIELD(runtime_event, pid, event, data); 1524 FILL_FIELD(runtime_event, runtime, event, data); 1525 FILL_FIELD(runtime_event, vruntime, event, data); 1526 1527 if (trace_handler->runtime_event) 1528 trace_handler->runtime_event(&runtime_event, session, event, cpu, timestamp, thread); 1529 } 1530 1531 static void 1532 process_sched_fork_event(void *data, 1533 struct event *event, 1534 int cpu __used, 1535 u64 timestamp __used, 1536 struct thread *thread __used) 1537 { 1538 struct trace_fork_event fork_event; 1539 1540 FILL_COMMON_FIELDS(fork_event, event, data); 1541 1542 FILL_ARRAY(fork_event, parent_comm, event, data); 1543 FILL_FIELD(fork_event, parent_pid, event, data); 1544 FILL_ARRAY(fork_event, child_comm, event, data); 1545 FILL_FIELD(fork_event, child_pid, event, data); 1546 1547 if (trace_handler->fork_event) 1548 trace_handler->fork_event(&fork_event, event, 1549 cpu, timestamp, thread); 1550 } 1551 1552 static void 1553 process_sched_exit_event(struct event *event, 1554 int cpu __used, 1555 u64 timestamp __used, 1556 struct thread *thread __used) 1557 { 1558 if (verbose) 1559 printf("sched_exit event %p\n", event); 1560 } 1561 1562 static void 1563 process_sched_migrate_task_event(void *data, struct perf_session *session, 1564 struct event *event, 1565 int cpu __used, 1566 u64 timestamp __used, 1567 struct thread *thread __used) 1568 { 1569 struct trace_migrate_task_event migrate_task_event; 1570 1571 FILL_COMMON_FIELDS(migrate_task_event, event, data); 1572 1573 FILL_ARRAY(migrate_task_event, comm, event, data); 1574 FILL_FIELD(migrate_task_event, pid, event, data); 1575 FILL_FIELD(migrate_task_event, prio, event, data); 1576 FILL_FIELD(migrate_task_event, cpu, event, data); 1577 1578 if (trace_handler->migrate_task_event) 1579 trace_handler->migrate_task_event(&migrate_task_event, session, 1580 event, cpu, timestamp, thread); 1581 } 1582 1583 static void 1584 process_raw_event(event_t *raw_event __used, struct perf_session *session, 1585 void *data, int cpu, u64 timestamp, struct thread *thread) 1586 { 1587 struct event *event; 1588 int type; 1589 1590 1591 type = trace_parse_common_type(data); 1592 event = trace_find_event(type); 1593 1594 if (!strcmp(event->name, "sched_switch")) 1595 process_sched_switch_event(data, session, event, cpu, timestamp, thread); 1596 if (!strcmp(event->name, "sched_stat_runtime")) 1597 process_sched_runtime_event(data, session, event, cpu, timestamp, thread); 1598 if (!strcmp(event->name, "sched_wakeup")) 1599 process_sched_wakeup_event(data, session, event, cpu, timestamp, thread); 1600 if (!strcmp(event->name, "sched_wakeup_new")) 1601 process_sched_wakeup_event(data, session, event, cpu, timestamp, thread); 1602 if (!strcmp(event->name, "sched_process_fork")) 1603 process_sched_fork_event(data, event, cpu, timestamp, thread); 1604 if (!strcmp(event->name, "sched_process_exit")) 1605 process_sched_exit_event(event, cpu, timestamp, thread); 1606 if (!strcmp(event->name, "sched_migrate_task")) 1607 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); 1608 } 1609 1610 static int process_sample_event(event_t *event, struct sample_data *sample, 1611 struct perf_session *session) 1612 { 1613 struct thread *thread; 1614 1615 if (!(session->sample_type & PERF_SAMPLE_RAW)) 1616 return 0; 1617 1618 thread = perf_session__findnew(session, sample->pid); 1619 if (thread == NULL) { 1620 pr_debug("problem processing %d event, skipping it.\n", 1621 event->header.type); 1622 return -1; 1623 } 1624 1625 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 1626 1627 if (profile_cpu != -1 && profile_cpu != (int)sample->cpu) 1628 return 0; 1629 1630 process_raw_event(event, session, sample->raw_data, sample->cpu, 1631 sample->time, thread); 1632 1633 return 0; 1634 } 1635 1636 static struct perf_event_ops event_ops = { 1637 .sample = process_sample_event, 1638 .comm = event__process_comm, 1639 .lost = event__process_lost, 1640 .fork = event__process_task, 1641 .ordered_samples = true, 1642 }; 1643 1644 static int read_events(void) 1645 { 1646 int err = -EINVAL; 1647 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 1648 0, false, &event_ops); 1649 if (session == NULL) 1650 return -ENOMEM; 1651 1652 if (perf_session__has_traces(session, "record -R")) { 1653 err = perf_session__process_events(session, &event_ops); 1654 nr_events = session->hists.stats.nr_events[0]; 1655 nr_lost_events = session->hists.stats.total_lost; 1656 nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; 1657 } 1658 1659 perf_session__delete(session); 1660 return err; 1661 } 1662 1663 static void print_bad_events(void) 1664 { 1665 if (nr_unordered_timestamps && nr_timestamps) { 1666 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 1667 (double)nr_unordered_timestamps/(double)nr_timestamps*100.0, 1668 nr_unordered_timestamps, nr_timestamps); 1669 } 1670 if (nr_lost_events && nr_events) { 1671 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 1672 (double)nr_lost_events/(double)nr_events*100.0, 1673 nr_lost_events, nr_events, nr_lost_chunks); 1674 } 1675 if (nr_state_machine_bugs && nr_timestamps) { 1676 printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", 1677 (double)nr_state_machine_bugs/(double)nr_timestamps*100.0, 1678 nr_state_machine_bugs, nr_timestamps); 1679 if (nr_lost_events) 1680 printf(" (due to lost events?)"); 1681 printf("\n"); 1682 } 1683 if (nr_context_switch_bugs && nr_timestamps) { 1684 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 1685 (double)nr_context_switch_bugs/(double)nr_timestamps*100.0, 1686 nr_context_switch_bugs, nr_timestamps); 1687 if (nr_lost_events) 1688 printf(" (due to lost events?)"); 1689 printf("\n"); 1690 } 1691 } 1692 1693 static void __cmd_lat(void) 1694 { 1695 struct rb_node *next; 1696 1697 setup_pager(); 1698 read_events(); 1699 sort_lat(); 1700 1701 printf("\n ---------------------------------------------------------------------------------------------------------------\n"); 1702 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); 1703 printf(" ---------------------------------------------------------------------------------------------------------------\n"); 1704 1705 next = rb_first(&sorted_atom_root); 1706 1707 while (next) { 1708 struct work_atoms *work_list; 1709 1710 work_list = rb_entry(next, struct work_atoms, node); 1711 output_lat_thread(work_list); 1712 next = rb_next(next); 1713 } 1714 1715 printf(" -----------------------------------------------------------------------------------------\n"); 1716 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 1717 (double)all_runtime/1e6, all_count); 1718 1719 printf(" ---------------------------------------------------\n"); 1720 1721 print_bad_events(); 1722 printf("\n"); 1723 1724 } 1725 1726 static struct trace_sched_handler map_ops = { 1727 .wakeup_event = NULL, 1728 .switch_event = map_switch_event, 1729 .runtime_event = NULL, 1730 .fork_event = NULL, 1731 }; 1732 1733 static void __cmd_map(void) 1734 { 1735 max_cpu = sysconf(_SC_NPROCESSORS_CONF); 1736 1737 setup_pager(); 1738 read_events(); 1739 print_bad_events(); 1740 } 1741 1742 static void __cmd_replay(void) 1743 { 1744 unsigned long i; 1745 1746 calibrate_run_measurement_overhead(); 1747 calibrate_sleep_measurement_overhead(); 1748 1749 test_calibrations(); 1750 1751 read_events(); 1752 1753 printf("nr_run_events: %ld\n", nr_run_events); 1754 printf("nr_sleep_events: %ld\n", nr_sleep_events); 1755 printf("nr_wakeup_events: %ld\n", nr_wakeup_events); 1756 1757 if (targetless_wakeups) 1758 printf("target-less wakeups: %ld\n", targetless_wakeups); 1759 if (multitarget_wakeups) 1760 printf("multi-target wakeups: %ld\n", multitarget_wakeups); 1761 if (nr_run_events_optimized) 1762 printf("run atoms optimized: %ld\n", 1763 nr_run_events_optimized); 1764 1765 print_task_traces(); 1766 add_cross_task_wakeups(); 1767 1768 create_tasks(); 1769 printf("------------------------------------------------------------\n"); 1770 for (i = 0; i < replay_repeat; i++) 1771 run_one_test(); 1772 } 1773 1774 1775 static const char * const sched_usage[] = { 1776 "perf sched [<options>] {record|latency|map|replay|trace}", 1777 NULL 1778 }; 1779 1780 static const struct option sched_options[] = { 1781 OPT_STRING('i', "input", &input_name, "file", 1782 "input file name"), 1783 OPT_INCR('v', "verbose", &verbose, 1784 "be more verbose (show symbol address, etc)"), 1785 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1786 "dump raw trace in ASCII"), 1787 OPT_END() 1788 }; 1789 1790 static const char * const latency_usage[] = { 1791 "perf sched latency [<options>]", 1792 NULL 1793 }; 1794 1795 static const struct option latency_options[] = { 1796 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1797 "sort by key(s): runtime, switch, avg, max"), 1798 OPT_INCR('v', "verbose", &verbose, 1799 "be more verbose (show symbol address, etc)"), 1800 OPT_INTEGER('C', "CPU", &profile_cpu, 1801 "CPU to profile on"), 1802 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1803 "dump raw trace in ASCII"), 1804 OPT_END() 1805 }; 1806 1807 static const char * const replay_usage[] = { 1808 "perf sched replay [<options>]", 1809 NULL 1810 }; 1811 1812 static const struct option replay_options[] = { 1813 OPT_UINTEGER('r', "repeat", &replay_repeat, 1814 "repeat the workload replay N times (-1: infinite)"), 1815 OPT_INCR('v', "verbose", &verbose, 1816 "be more verbose (show symbol address, etc)"), 1817 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1818 "dump raw trace in ASCII"), 1819 OPT_END() 1820 }; 1821 1822 static void setup_sorting(void) 1823 { 1824 char *tmp, *tok, *str = strdup(sort_order); 1825 1826 for (tok = strtok_r(str, ", ", &tmp); 1827 tok; tok = strtok_r(NULL, ", ", &tmp)) { 1828 if (sort_dimension__add(tok, &sort_list) < 0) { 1829 error("Unknown --sort key: `%s'", tok); 1830 usage_with_options(latency_usage, latency_options); 1831 } 1832 } 1833 1834 free(str); 1835 1836 sort_dimension__add("pid", &cmp_pid); 1837 } 1838 1839 static const char *record_args[] = { 1840 "record", 1841 "-a", 1842 "-R", 1843 "-f", 1844 "-m", "1024", 1845 "-c", "1", 1846 "-e", "sched:sched_switch", 1847 "-e", "sched:sched_stat_wait", 1848 "-e", "sched:sched_stat_sleep", 1849 "-e", "sched:sched_stat_iowait", 1850 "-e", "sched:sched_stat_runtime", 1851 "-e", "sched:sched_process_exit", 1852 "-e", "sched:sched_process_fork", 1853 "-e", "sched:sched_wakeup", 1854 "-e", "sched:sched_migrate_task", 1855 }; 1856 1857 static int __cmd_record(int argc, const char **argv) 1858 { 1859 unsigned int rec_argc, i, j; 1860 const char **rec_argv; 1861 1862 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1863 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1864 1865 if (rec_argv == NULL) 1866 return -ENOMEM; 1867 1868 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1869 rec_argv[i] = strdup(record_args[i]); 1870 1871 for (j = 1; j < (unsigned int)argc; j++, i++) 1872 rec_argv[i] = argv[j]; 1873 1874 BUG_ON(i != rec_argc); 1875 1876 return cmd_record(i, rec_argv, NULL); 1877 } 1878 1879 int cmd_sched(int argc, const char **argv, const char *prefix __used) 1880 { 1881 argc = parse_options(argc, argv, sched_options, sched_usage, 1882 PARSE_OPT_STOP_AT_NON_OPTION); 1883 if (!argc) 1884 usage_with_options(sched_usage, sched_options); 1885 1886 /* 1887 * Aliased to 'perf script' for now: 1888 */ 1889 if (!strcmp(argv[0], "script")) 1890 return cmd_script(argc, argv, prefix); 1891 1892 symbol__init(); 1893 if (!strncmp(argv[0], "rec", 3)) { 1894 return __cmd_record(argc, argv); 1895 } else if (!strncmp(argv[0], "lat", 3)) { 1896 trace_handler = &lat_ops; 1897 if (argc > 1) { 1898 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 1899 if (argc) 1900 usage_with_options(latency_usage, latency_options); 1901 } 1902 setup_sorting(); 1903 __cmd_lat(); 1904 } else if (!strcmp(argv[0], "map")) { 1905 trace_handler = &map_ops; 1906 setup_sorting(); 1907 __cmd_map(); 1908 } else if (!strncmp(argv[0], "rep", 3)) { 1909 trace_handler = &replay_ops; 1910 if (argc) { 1911 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 1912 if (argc) 1913 usage_with_options(replay_usage, replay_options); 1914 } 1915 __cmd_replay(); 1916 } else { 1917 usage_with_options(sched_usage, sched_options); 1918 } 1919 1920 return 0; 1921 } 1922