1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/evlist.h" 6 #include "util/cache.h" 7 #include "util/evsel.h" 8 #include "util/symbol.h" 9 #include "util/thread.h" 10 #include "util/header.h" 11 #include "util/session.h" 12 #include "util/tool.h" 13 14 #include "util/parse-options.h" 15 #include "util/trace-event.h" 16 17 #include "util/debug.h" 18 19 #include <sys/prctl.h> 20 #include <sys/resource.h> 21 22 #include <semaphore.h> 23 #include <pthread.h> 24 #include <math.h> 25 26 static const char *input_name; 27 28 static char default_sort_order[] = "avg, max, switch, runtime"; 29 static const char *sort_order = default_sort_order; 30 31 static int profile_cpu = -1; 32 33 #define PR_SET_NAME 15 /* Set process name */ 34 #define MAX_CPUS 4096 35 36 static u64 run_measurement_overhead; 37 static u64 sleep_measurement_overhead; 38 39 #define COMM_LEN 20 40 #define SYM_LEN 129 41 42 #define MAX_PID 65536 43 44 static unsigned long nr_tasks; 45 46 struct perf_sched { 47 struct perf_tool tool; 48 struct perf_session *session; 49 }; 50 51 struct sched_atom; 52 53 struct task_desc { 54 unsigned long nr; 55 unsigned long pid; 56 char comm[COMM_LEN]; 57 58 unsigned long nr_events; 59 unsigned long curr_event; 60 struct sched_atom **atoms; 61 62 pthread_t thread; 63 sem_t sleep_sem; 64 65 sem_t ready_for_work; 66 sem_t work_done_sem; 67 68 u64 cpu_usage; 69 }; 70 71 enum sched_event_type { 72 SCHED_EVENT_RUN, 73 SCHED_EVENT_SLEEP, 74 SCHED_EVENT_WAKEUP, 75 SCHED_EVENT_MIGRATION, 76 }; 77 78 struct sched_atom { 79 enum sched_event_type type; 80 int specific_wait; 81 u64 timestamp; 82 u64 duration; 83 unsigned long nr; 84 sem_t *wait_sem; 85 struct task_desc *wakee; 86 }; 87 88 static struct task_desc *pid_to_task[MAX_PID]; 89 90 static struct task_desc **tasks; 91 92 static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; 93 static u64 start_time; 94 95 static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; 96 97 static unsigned long nr_run_events; 98 static unsigned long nr_sleep_events; 99 static unsigned long nr_wakeup_events; 100 101 static unsigned long nr_sleep_corrections; 102 static unsigned long nr_run_events_optimized; 103 104 static unsigned long targetless_wakeups; 105 static unsigned long multitarget_wakeups; 106 107 static u64 cpu_usage; 108 static u64 runavg_cpu_usage; 109 static u64 parent_cpu_usage; 110 static u64 runavg_parent_cpu_usage; 111 112 static unsigned long nr_runs; 113 static u64 sum_runtime; 114 static u64 sum_fluct; 115 static u64 run_avg; 116 117 static unsigned int replay_repeat = 10; 118 static unsigned long nr_timestamps; 119 static unsigned long nr_unordered_timestamps; 120 static unsigned long nr_state_machine_bugs; 121 static unsigned long nr_context_switch_bugs; 122 static unsigned long nr_events; 123 static unsigned long nr_lost_chunks; 124 static unsigned long nr_lost_events; 125 126 #define TASK_STATE_TO_CHAR_STR "RSDTtZX" 127 128 enum thread_state { 129 THREAD_SLEEPING = 0, 130 THREAD_WAIT_CPU, 131 THREAD_SCHED_IN, 132 THREAD_IGNORE 133 }; 134 135 struct work_atom { 136 struct list_head list; 137 enum thread_state state; 138 u64 sched_out_time; 139 u64 wake_up_time; 140 u64 sched_in_time; 141 u64 runtime; 142 }; 143 144 struct work_atoms { 145 struct list_head work_list; 146 struct thread *thread; 147 struct rb_node node; 148 u64 max_lat; 149 u64 max_lat_at; 150 u64 total_lat; 151 u64 nb_atoms; 152 u64 total_runtime; 153 }; 154 155 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 156 157 static struct rb_root atom_root, sorted_atom_root; 158 159 static u64 all_runtime; 160 static u64 all_count; 161 162 163 static u64 get_nsecs(void) 164 { 165 struct timespec ts; 166 167 clock_gettime(CLOCK_MONOTONIC, &ts); 168 169 return ts.tv_sec * 1000000000ULL + ts.tv_nsec; 170 } 171 172 static void burn_nsecs(u64 nsecs) 173 { 174 u64 T0 = get_nsecs(), T1; 175 176 do { 177 T1 = get_nsecs(); 178 } while (T1 + run_measurement_overhead < T0 + nsecs); 179 } 180 181 static void sleep_nsecs(u64 nsecs) 182 { 183 struct timespec ts; 184 185 ts.tv_nsec = nsecs % 999999999; 186 ts.tv_sec = nsecs / 999999999; 187 188 nanosleep(&ts, NULL); 189 } 190 191 static void calibrate_run_measurement_overhead(void) 192 { 193 u64 T0, T1, delta, min_delta = 1000000000ULL; 194 int i; 195 196 for (i = 0; i < 10; i++) { 197 T0 = get_nsecs(); 198 burn_nsecs(0); 199 T1 = get_nsecs(); 200 delta = T1-T0; 201 min_delta = min(min_delta, delta); 202 } 203 run_measurement_overhead = min_delta; 204 205 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 206 } 207 208 static void calibrate_sleep_measurement_overhead(void) 209 { 210 u64 T0, T1, delta, min_delta = 1000000000ULL; 211 int i; 212 213 for (i = 0; i < 10; i++) { 214 T0 = get_nsecs(); 215 sleep_nsecs(10000); 216 T1 = get_nsecs(); 217 delta = T1-T0; 218 min_delta = min(min_delta, delta); 219 } 220 min_delta -= 10000; 221 sleep_measurement_overhead = min_delta; 222 223 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 224 } 225 226 static struct sched_atom * 227 get_new_event(struct task_desc *task, u64 timestamp) 228 { 229 struct sched_atom *event = zalloc(sizeof(*event)); 230 unsigned long idx = task->nr_events; 231 size_t size; 232 233 event->timestamp = timestamp; 234 event->nr = idx; 235 236 task->nr_events++; 237 size = sizeof(struct sched_atom *) * task->nr_events; 238 task->atoms = realloc(task->atoms, size); 239 BUG_ON(!task->atoms); 240 241 task->atoms[idx] = event; 242 243 return event; 244 } 245 246 static struct sched_atom *last_event(struct task_desc *task) 247 { 248 if (!task->nr_events) 249 return NULL; 250 251 return task->atoms[task->nr_events - 1]; 252 } 253 254 static void 255 add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) 256 { 257 struct sched_atom *event, *curr_event = last_event(task); 258 259 /* 260 * optimize an existing RUN event by merging this one 261 * to it: 262 */ 263 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 264 nr_run_events_optimized++; 265 curr_event->duration += duration; 266 return; 267 } 268 269 event = get_new_event(task, timestamp); 270 271 event->type = SCHED_EVENT_RUN; 272 event->duration = duration; 273 274 nr_run_events++; 275 } 276 277 static void 278 add_sched_event_wakeup(struct task_desc *task, u64 timestamp, 279 struct task_desc *wakee) 280 { 281 struct sched_atom *event, *wakee_event; 282 283 event = get_new_event(task, timestamp); 284 event->type = SCHED_EVENT_WAKEUP; 285 event->wakee = wakee; 286 287 wakee_event = last_event(wakee); 288 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 289 targetless_wakeups++; 290 return; 291 } 292 if (wakee_event->wait_sem) { 293 multitarget_wakeups++; 294 return; 295 } 296 297 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 298 sem_init(wakee_event->wait_sem, 0, 0); 299 wakee_event->specific_wait = 1; 300 event->wait_sem = wakee_event->wait_sem; 301 302 nr_wakeup_events++; 303 } 304 305 static void 306 add_sched_event_sleep(struct task_desc *task, u64 timestamp, 307 u64 task_state __used) 308 { 309 struct sched_atom *event = get_new_event(task, timestamp); 310 311 event->type = SCHED_EVENT_SLEEP; 312 313 nr_sleep_events++; 314 } 315 316 static struct task_desc *register_pid(unsigned long pid, const char *comm) 317 { 318 struct task_desc *task; 319 320 BUG_ON(pid >= MAX_PID); 321 322 task = pid_to_task[pid]; 323 324 if (task) 325 return task; 326 327 task = zalloc(sizeof(*task)); 328 task->pid = pid; 329 task->nr = nr_tasks; 330 strcpy(task->comm, comm); 331 /* 332 * every task starts in sleeping state - this gets ignored 333 * if there's no wakeup pointing to this sleep state: 334 */ 335 add_sched_event_sleep(task, 0, 0); 336 337 pid_to_task[pid] = task; 338 nr_tasks++; 339 tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *)); 340 BUG_ON(!tasks); 341 tasks[task->nr] = task; 342 343 if (verbose) 344 printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm); 345 346 return task; 347 } 348 349 350 static void print_task_traces(void) 351 { 352 struct task_desc *task; 353 unsigned long i; 354 355 for (i = 0; i < nr_tasks; i++) { 356 task = tasks[i]; 357 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 358 task->nr, task->comm, task->pid, task->nr_events); 359 } 360 } 361 362 static void add_cross_task_wakeups(void) 363 { 364 struct task_desc *task1, *task2; 365 unsigned long i, j; 366 367 for (i = 0; i < nr_tasks; i++) { 368 task1 = tasks[i]; 369 j = i + 1; 370 if (j == nr_tasks) 371 j = 0; 372 task2 = tasks[j]; 373 add_sched_event_wakeup(task1, 0, task2); 374 } 375 } 376 377 static void 378 process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) 379 { 380 int ret = 0; 381 382 switch (atom->type) { 383 case SCHED_EVENT_RUN: 384 burn_nsecs(atom->duration); 385 break; 386 case SCHED_EVENT_SLEEP: 387 if (atom->wait_sem) 388 ret = sem_wait(atom->wait_sem); 389 BUG_ON(ret); 390 break; 391 case SCHED_EVENT_WAKEUP: 392 if (atom->wait_sem) 393 ret = sem_post(atom->wait_sem); 394 BUG_ON(ret); 395 break; 396 case SCHED_EVENT_MIGRATION: 397 break; 398 default: 399 BUG_ON(1); 400 } 401 } 402 403 static u64 get_cpu_usage_nsec_parent(void) 404 { 405 struct rusage ru; 406 u64 sum; 407 int err; 408 409 err = getrusage(RUSAGE_SELF, &ru); 410 BUG_ON(err); 411 412 sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3; 413 sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3; 414 415 return sum; 416 } 417 418 static int self_open_counters(void) 419 { 420 struct perf_event_attr attr; 421 int fd; 422 423 memset(&attr, 0, sizeof(attr)); 424 425 attr.type = PERF_TYPE_SOFTWARE; 426 attr.config = PERF_COUNT_SW_TASK_CLOCK; 427 428 fd = sys_perf_event_open(&attr, 0, -1, -1, 0); 429 430 if (fd < 0) 431 die("Error: sys_perf_event_open() syscall returned" 432 "with %d (%s)\n", fd, strerror(errno)); 433 return fd; 434 } 435 436 static u64 get_cpu_usage_nsec_self(int fd) 437 { 438 u64 runtime; 439 int ret; 440 441 ret = read(fd, &runtime, sizeof(runtime)); 442 BUG_ON(ret != sizeof(runtime)); 443 444 return runtime; 445 } 446 447 static void *thread_func(void *ctx) 448 { 449 struct task_desc *this_task = ctx; 450 u64 cpu_usage_0, cpu_usage_1; 451 unsigned long i, ret; 452 char comm2[22]; 453 int fd; 454 455 sprintf(comm2, ":%s", this_task->comm); 456 prctl(PR_SET_NAME, comm2); 457 fd = self_open_counters(); 458 459 again: 460 ret = sem_post(&this_task->ready_for_work); 461 BUG_ON(ret); 462 ret = pthread_mutex_lock(&start_work_mutex); 463 BUG_ON(ret); 464 ret = pthread_mutex_unlock(&start_work_mutex); 465 BUG_ON(ret); 466 467 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 468 469 for (i = 0; i < this_task->nr_events; i++) { 470 this_task->curr_event = i; 471 process_sched_event(this_task, this_task->atoms[i]); 472 } 473 474 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 475 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 476 ret = sem_post(&this_task->work_done_sem); 477 BUG_ON(ret); 478 479 ret = pthread_mutex_lock(&work_done_wait_mutex); 480 BUG_ON(ret); 481 ret = pthread_mutex_unlock(&work_done_wait_mutex); 482 BUG_ON(ret); 483 484 goto again; 485 } 486 487 static void create_tasks(void) 488 { 489 struct task_desc *task; 490 pthread_attr_t attr; 491 unsigned long i; 492 int err; 493 494 err = pthread_attr_init(&attr); 495 BUG_ON(err); 496 err = pthread_attr_setstacksize(&attr, 497 (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); 498 BUG_ON(err); 499 err = pthread_mutex_lock(&start_work_mutex); 500 BUG_ON(err); 501 err = pthread_mutex_lock(&work_done_wait_mutex); 502 BUG_ON(err); 503 for (i = 0; i < nr_tasks; i++) { 504 task = tasks[i]; 505 sem_init(&task->sleep_sem, 0, 0); 506 sem_init(&task->ready_for_work, 0, 0); 507 sem_init(&task->work_done_sem, 0, 0); 508 task->curr_event = 0; 509 err = pthread_create(&task->thread, &attr, thread_func, task); 510 BUG_ON(err); 511 } 512 } 513 514 static void wait_for_tasks(void) 515 { 516 u64 cpu_usage_0, cpu_usage_1; 517 struct task_desc *task; 518 unsigned long i, ret; 519 520 start_time = get_nsecs(); 521 cpu_usage = 0; 522 pthread_mutex_unlock(&work_done_wait_mutex); 523 524 for (i = 0; i < nr_tasks; i++) { 525 task = tasks[i]; 526 ret = sem_wait(&task->ready_for_work); 527 BUG_ON(ret); 528 sem_init(&task->ready_for_work, 0, 0); 529 } 530 ret = pthread_mutex_lock(&work_done_wait_mutex); 531 BUG_ON(ret); 532 533 cpu_usage_0 = get_cpu_usage_nsec_parent(); 534 535 pthread_mutex_unlock(&start_work_mutex); 536 537 for (i = 0; i < nr_tasks; i++) { 538 task = tasks[i]; 539 ret = sem_wait(&task->work_done_sem); 540 BUG_ON(ret); 541 sem_init(&task->work_done_sem, 0, 0); 542 cpu_usage += task->cpu_usage; 543 task->cpu_usage = 0; 544 } 545 546 cpu_usage_1 = get_cpu_usage_nsec_parent(); 547 if (!runavg_cpu_usage) 548 runavg_cpu_usage = cpu_usage; 549 runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10; 550 551 parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 552 if (!runavg_parent_cpu_usage) 553 runavg_parent_cpu_usage = parent_cpu_usage; 554 runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 + 555 parent_cpu_usage)/10; 556 557 ret = pthread_mutex_lock(&start_work_mutex); 558 BUG_ON(ret); 559 560 for (i = 0; i < nr_tasks; i++) { 561 task = tasks[i]; 562 sem_init(&task->sleep_sem, 0, 0); 563 task->curr_event = 0; 564 } 565 } 566 567 static void run_one_test(void) 568 { 569 u64 T0, T1, delta, avg_delta, fluct; 570 571 T0 = get_nsecs(); 572 wait_for_tasks(); 573 T1 = get_nsecs(); 574 575 delta = T1 - T0; 576 sum_runtime += delta; 577 nr_runs++; 578 579 avg_delta = sum_runtime / nr_runs; 580 if (delta < avg_delta) 581 fluct = avg_delta - delta; 582 else 583 fluct = delta - avg_delta; 584 sum_fluct += fluct; 585 if (!run_avg) 586 run_avg = delta; 587 run_avg = (run_avg*9 + delta)/10; 588 589 printf("#%-3ld: %0.3f, ", 590 nr_runs, (double)delta/1000000.0); 591 592 printf("ravg: %0.2f, ", 593 (double)run_avg/1e6); 594 595 printf("cpu: %0.2f / %0.2f", 596 (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6); 597 598 #if 0 599 /* 600 * rusage statistics done by the parent, these are less 601 * accurate than the sum_exec_runtime based statistics: 602 */ 603 printf(" [%0.2f / %0.2f]", 604 (double)parent_cpu_usage/1e6, 605 (double)runavg_parent_cpu_usage/1e6); 606 #endif 607 608 printf("\n"); 609 610 if (nr_sleep_corrections) 611 printf(" (%ld sleep corrections)\n", nr_sleep_corrections); 612 nr_sleep_corrections = 0; 613 } 614 615 static void test_calibrations(void) 616 { 617 u64 T0, T1; 618 619 T0 = get_nsecs(); 620 burn_nsecs(1e6); 621 T1 = get_nsecs(); 622 623 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 624 625 T0 = get_nsecs(); 626 sleep_nsecs(1e6); 627 T1 = get_nsecs(); 628 629 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 630 } 631 632 #define FILL_FIELD(ptr, field, event, data) \ 633 ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) 634 635 #define FILL_ARRAY(ptr, array, event, data) \ 636 do { \ 637 void *__array = raw_field_ptr(event, #array, data); \ 638 memcpy(ptr.array, __array, sizeof(ptr.array)); \ 639 } while(0) 640 641 #define FILL_COMMON_FIELDS(ptr, event, data) \ 642 do { \ 643 FILL_FIELD(ptr, common_type, event, data); \ 644 FILL_FIELD(ptr, common_flags, event, data); \ 645 FILL_FIELD(ptr, common_preempt_count, event, data); \ 646 FILL_FIELD(ptr, common_pid, event, data); \ 647 FILL_FIELD(ptr, common_tgid, event, data); \ 648 } while (0) 649 650 651 652 struct trace_switch_event { 653 u32 size; 654 655 u16 common_type; 656 u8 common_flags; 657 u8 common_preempt_count; 658 u32 common_pid; 659 u32 common_tgid; 660 661 char prev_comm[16]; 662 u32 prev_pid; 663 u32 prev_prio; 664 u64 prev_state; 665 char next_comm[16]; 666 u32 next_pid; 667 u32 next_prio; 668 }; 669 670 struct trace_runtime_event { 671 u32 size; 672 673 u16 common_type; 674 u8 common_flags; 675 u8 common_preempt_count; 676 u32 common_pid; 677 u32 common_tgid; 678 679 char comm[16]; 680 u32 pid; 681 u64 runtime; 682 u64 vruntime; 683 }; 684 685 struct trace_wakeup_event { 686 u32 size; 687 688 u16 common_type; 689 u8 common_flags; 690 u8 common_preempt_count; 691 u32 common_pid; 692 u32 common_tgid; 693 694 char comm[16]; 695 u32 pid; 696 697 u32 prio; 698 u32 success; 699 u32 cpu; 700 }; 701 702 struct trace_fork_event { 703 u32 size; 704 705 u16 common_type; 706 u8 common_flags; 707 u8 common_preempt_count; 708 u32 common_pid; 709 u32 common_tgid; 710 711 char parent_comm[16]; 712 u32 parent_pid; 713 char child_comm[16]; 714 u32 child_pid; 715 }; 716 717 struct trace_migrate_task_event { 718 u32 size; 719 720 u16 common_type; 721 u8 common_flags; 722 u8 common_preempt_count; 723 u32 common_pid; 724 u32 common_tgid; 725 726 char comm[16]; 727 u32 pid; 728 729 u32 prio; 730 u32 cpu; 731 }; 732 733 struct trace_sched_handler { 734 void (*switch_event)(struct trace_switch_event *, 735 struct machine *, 736 struct event_format *, 737 int cpu, 738 u64 timestamp, 739 struct thread *thread); 740 741 void (*runtime_event)(struct trace_runtime_event *, 742 struct machine *, 743 struct event_format *, 744 int cpu, 745 u64 timestamp, 746 struct thread *thread); 747 748 void (*wakeup_event)(struct trace_wakeup_event *, 749 struct machine *, 750 struct event_format *, 751 int cpu, 752 u64 timestamp, 753 struct thread *thread); 754 755 void (*fork_event)(struct trace_fork_event *, 756 struct event_format *, 757 int cpu, 758 u64 timestamp, 759 struct thread *thread); 760 761 void (*migrate_task_event)(struct trace_migrate_task_event *, 762 struct machine *machine, 763 struct event_format *, 764 int cpu, 765 u64 timestamp, 766 struct thread *thread); 767 }; 768 769 770 static void 771 replay_wakeup_event(struct trace_wakeup_event *wakeup_event, 772 struct machine *machine __used, 773 struct event_format *event, 774 int cpu __used, 775 u64 timestamp __used, 776 struct thread *thread __used) 777 { 778 struct task_desc *waker, *wakee; 779 780 if (verbose) { 781 printf("sched_wakeup event %p\n", event); 782 783 printf(" ... pid %d woke up %s/%d\n", 784 wakeup_event->common_pid, 785 wakeup_event->comm, 786 wakeup_event->pid); 787 } 788 789 waker = register_pid(wakeup_event->common_pid, "<unknown>"); 790 wakee = register_pid(wakeup_event->pid, wakeup_event->comm); 791 792 add_sched_event_wakeup(waker, timestamp, wakee); 793 } 794 795 static u64 cpu_last_switched[MAX_CPUS]; 796 797 static void 798 replay_switch_event(struct trace_switch_event *switch_event, 799 struct machine *machine __used, 800 struct event_format *event, 801 int cpu, 802 u64 timestamp, 803 struct thread *thread __used) 804 { 805 struct task_desc *prev, __used *next; 806 u64 timestamp0; 807 s64 delta; 808 809 if (verbose) 810 printf("sched_switch event %p\n", event); 811 812 if (cpu >= MAX_CPUS || cpu < 0) 813 return; 814 815 timestamp0 = cpu_last_switched[cpu]; 816 if (timestamp0) 817 delta = timestamp - timestamp0; 818 else 819 delta = 0; 820 821 if (delta < 0) 822 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 823 824 if (verbose) { 825 printf(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 826 switch_event->prev_comm, switch_event->prev_pid, 827 switch_event->next_comm, switch_event->next_pid, 828 delta); 829 } 830 831 prev = register_pid(switch_event->prev_pid, switch_event->prev_comm); 832 next = register_pid(switch_event->next_pid, switch_event->next_comm); 833 834 cpu_last_switched[cpu] = timestamp; 835 836 add_sched_event_run(prev, timestamp, delta); 837 add_sched_event_sleep(prev, timestamp, switch_event->prev_state); 838 } 839 840 841 static void 842 replay_fork_event(struct trace_fork_event *fork_event, 843 struct event_format *event, 844 int cpu __used, 845 u64 timestamp __used, 846 struct thread *thread __used) 847 { 848 if (verbose) { 849 printf("sched_fork event %p\n", event); 850 printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); 851 printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); 852 } 853 register_pid(fork_event->parent_pid, fork_event->parent_comm); 854 register_pid(fork_event->child_pid, fork_event->child_comm); 855 } 856 857 static struct trace_sched_handler replay_ops = { 858 .wakeup_event = replay_wakeup_event, 859 .switch_event = replay_switch_event, 860 .fork_event = replay_fork_event, 861 }; 862 863 struct sort_dimension { 864 const char *name; 865 sort_fn_t cmp; 866 struct list_head list; 867 }; 868 869 static LIST_HEAD(cmp_pid); 870 871 static int 872 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 873 { 874 struct sort_dimension *sort; 875 int ret = 0; 876 877 BUG_ON(list_empty(list)); 878 879 list_for_each_entry(sort, list, list) { 880 ret = sort->cmp(l, r); 881 if (ret) 882 return ret; 883 } 884 885 return ret; 886 } 887 888 static struct work_atoms * 889 thread_atoms_search(struct rb_root *root, struct thread *thread, 890 struct list_head *sort_list) 891 { 892 struct rb_node *node = root->rb_node; 893 struct work_atoms key = { .thread = thread }; 894 895 while (node) { 896 struct work_atoms *atoms; 897 int cmp; 898 899 atoms = container_of(node, struct work_atoms, node); 900 901 cmp = thread_lat_cmp(sort_list, &key, atoms); 902 if (cmp > 0) 903 node = node->rb_left; 904 else if (cmp < 0) 905 node = node->rb_right; 906 else { 907 BUG_ON(thread != atoms->thread); 908 return atoms; 909 } 910 } 911 return NULL; 912 } 913 914 static void 915 __thread_latency_insert(struct rb_root *root, struct work_atoms *data, 916 struct list_head *sort_list) 917 { 918 struct rb_node **new = &(root->rb_node), *parent = NULL; 919 920 while (*new) { 921 struct work_atoms *this; 922 int cmp; 923 924 this = container_of(*new, struct work_atoms, node); 925 parent = *new; 926 927 cmp = thread_lat_cmp(sort_list, data, this); 928 929 if (cmp > 0) 930 new = &((*new)->rb_left); 931 else 932 new = &((*new)->rb_right); 933 } 934 935 rb_link_node(&data->node, parent, new); 936 rb_insert_color(&data->node, root); 937 } 938 939 static void thread_atoms_insert(struct thread *thread) 940 { 941 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 942 if (!atoms) 943 die("No memory"); 944 945 atoms->thread = thread; 946 INIT_LIST_HEAD(&atoms->work_list); 947 __thread_latency_insert(&atom_root, atoms, &cmp_pid); 948 } 949 950 static void 951 latency_fork_event(struct trace_fork_event *fork_event __used, 952 struct event_format *event __used, 953 int cpu __used, 954 u64 timestamp __used, 955 struct thread *thread __used) 956 { 957 /* should insert the newcomer */ 958 } 959 960 __used 961 static char sched_out_state(struct trace_switch_event *switch_event) 962 { 963 const char *str = TASK_STATE_TO_CHAR_STR; 964 965 return str[switch_event->prev_state]; 966 } 967 968 static void 969 add_sched_out_event(struct work_atoms *atoms, 970 char run_state, 971 u64 timestamp) 972 { 973 struct work_atom *atom = zalloc(sizeof(*atom)); 974 if (!atom) 975 die("Non memory"); 976 977 atom->sched_out_time = timestamp; 978 979 if (run_state == 'R') { 980 atom->state = THREAD_WAIT_CPU; 981 atom->wake_up_time = atom->sched_out_time; 982 } 983 984 list_add_tail(&atom->list, &atoms->work_list); 985 } 986 987 static void 988 add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) 989 { 990 struct work_atom *atom; 991 992 BUG_ON(list_empty(&atoms->work_list)); 993 994 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 995 996 atom->runtime += delta; 997 atoms->total_runtime += delta; 998 } 999 1000 static void 1001 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 1002 { 1003 struct work_atom *atom; 1004 u64 delta; 1005 1006 if (list_empty(&atoms->work_list)) 1007 return; 1008 1009 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1010 1011 if (atom->state != THREAD_WAIT_CPU) 1012 return; 1013 1014 if (timestamp < atom->wake_up_time) { 1015 atom->state = THREAD_IGNORE; 1016 return; 1017 } 1018 1019 atom->state = THREAD_SCHED_IN; 1020 atom->sched_in_time = timestamp; 1021 1022 delta = atom->sched_in_time - atom->wake_up_time; 1023 atoms->total_lat += delta; 1024 if (delta > atoms->max_lat) { 1025 atoms->max_lat = delta; 1026 atoms->max_lat_at = timestamp; 1027 } 1028 atoms->nb_atoms++; 1029 } 1030 1031 static void 1032 latency_switch_event(struct trace_switch_event *switch_event, 1033 struct machine *machine, 1034 struct event_format *event __used, 1035 int cpu, 1036 u64 timestamp, 1037 struct thread *thread __used) 1038 { 1039 struct work_atoms *out_events, *in_events; 1040 struct thread *sched_out, *sched_in; 1041 u64 timestamp0; 1042 s64 delta; 1043 1044 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1045 1046 timestamp0 = cpu_last_switched[cpu]; 1047 cpu_last_switched[cpu] = timestamp; 1048 if (timestamp0) 1049 delta = timestamp - timestamp0; 1050 else 1051 delta = 0; 1052 1053 if (delta < 0) 1054 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1055 1056 1057 sched_out = machine__findnew_thread(machine, switch_event->prev_pid); 1058 sched_in = machine__findnew_thread(machine, switch_event->next_pid); 1059 1060 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1061 if (!out_events) { 1062 thread_atoms_insert(sched_out); 1063 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1064 if (!out_events) 1065 die("out-event: Internal tree error"); 1066 } 1067 add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); 1068 1069 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1070 if (!in_events) { 1071 thread_atoms_insert(sched_in); 1072 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1073 if (!in_events) 1074 die("in-event: Internal tree error"); 1075 /* 1076 * Take came in we have not heard about yet, 1077 * add in an initial atom in runnable state: 1078 */ 1079 add_sched_out_event(in_events, 'R', timestamp); 1080 } 1081 add_sched_in_event(in_events, timestamp); 1082 } 1083 1084 static void 1085 latency_runtime_event(struct trace_runtime_event *runtime_event, 1086 struct machine *machine, 1087 struct event_format *event __used, 1088 int cpu, 1089 u64 timestamp, 1090 struct thread *this_thread __used) 1091 { 1092 struct thread *thread = machine__findnew_thread(machine, runtime_event->pid); 1093 struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1094 1095 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1096 if (!atoms) { 1097 thread_atoms_insert(thread); 1098 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1099 if (!atoms) 1100 die("in-event: Internal tree error"); 1101 add_sched_out_event(atoms, 'R', timestamp); 1102 } 1103 1104 add_runtime_event(atoms, runtime_event->runtime, timestamp); 1105 } 1106 1107 static void 1108 latency_wakeup_event(struct trace_wakeup_event *wakeup_event, 1109 struct machine *machine, 1110 struct event_format *__event __used, 1111 int cpu __used, 1112 u64 timestamp, 1113 struct thread *thread __used) 1114 { 1115 struct work_atoms *atoms; 1116 struct work_atom *atom; 1117 struct thread *wakee; 1118 1119 /* Note for later, it may be interesting to observe the failing cases */ 1120 if (!wakeup_event->success) 1121 return; 1122 1123 wakee = machine__findnew_thread(machine, wakeup_event->pid); 1124 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1125 if (!atoms) { 1126 thread_atoms_insert(wakee); 1127 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1128 if (!atoms) 1129 die("wakeup-event: Internal tree error"); 1130 add_sched_out_event(atoms, 'S', timestamp); 1131 } 1132 1133 BUG_ON(list_empty(&atoms->work_list)); 1134 1135 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1136 1137 /* 1138 * You WILL be missing events if you've recorded only 1139 * one CPU, or are only looking at only one, so don't 1140 * make useless noise. 1141 */ 1142 if (profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1143 nr_state_machine_bugs++; 1144 1145 nr_timestamps++; 1146 if (atom->sched_out_time > timestamp) { 1147 nr_unordered_timestamps++; 1148 return; 1149 } 1150 1151 atom->state = THREAD_WAIT_CPU; 1152 atom->wake_up_time = timestamp; 1153 } 1154 1155 static void 1156 latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, 1157 struct machine *machine, 1158 struct event_format *__event __used, 1159 int cpu __used, 1160 u64 timestamp, 1161 struct thread *thread __used) 1162 { 1163 struct work_atoms *atoms; 1164 struct work_atom *atom; 1165 struct thread *migrant; 1166 1167 /* 1168 * Only need to worry about migration when profiling one CPU. 1169 */ 1170 if (profile_cpu == -1) 1171 return; 1172 1173 migrant = machine__findnew_thread(machine, migrate_task_event->pid); 1174 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1175 if (!atoms) { 1176 thread_atoms_insert(migrant); 1177 register_pid(migrant->pid, migrant->comm); 1178 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1179 if (!atoms) 1180 die("migration-event: Internal tree error"); 1181 add_sched_out_event(atoms, 'R', timestamp); 1182 } 1183 1184 BUG_ON(list_empty(&atoms->work_list)); 1185 1186 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1187 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1188 1189 nr_timestamps++; 1190 1191 if (atom->sched_out_time > timestamp) 1192 nr_unordered_timestamps++; 1193 } 1194 1195 static struct trace_sched_handler lat_ops = { 1196 .wakeup_event = latency_wakeup_event, 1197 .switch_event = latency_switch_event, 1198 .runtime_event = latency_runtime_event, 1199 .fork_event = latency_fork_event, 1200 .migrate_task_event = latency_migrate_task_event, 1201 }; 1202 1203 static void output_lat_thread(struct work_atoms *work_list) 1204 { 1205 int i; 1206 int ret; 1207 u64 avg; 1208 1209 if (!work_list->nb_atoms) 1210 return; 1211 /* 1212 * Ignore idle threads: 1213 */ 1214 if (!strcmp(work_list->thread->comm, "swapper")) 1215 return; 1216 1217 all_runtime += work_list->total_runtime; 1218 all_count += work_list->nb_atoms; 1219 1220 ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); 1221 1222 for (i = 0; i < 24 - ret; i++) 1223 printf(" "); 1224 1225 avg = work_list->total_lat / work_list->nb_atoms; 1226 1227 printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n", 1228 (double)work_list->total_runtime / 1e6, 1229 work_list->nb_atoms, (double)avg / 1e6, 1230 (double)work_list->max_lat / 1e6, 1231 (double)work_list->max_lat_at / 1e9); 1232 } 1233 1234 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1235 { 1236 if (l->thread->pid < r->thread->pid) 1237 return -1; 1238 if (l->thread->pid > r->thread->pid) 1239 return 1; 1240 1241 return 0; 1242 } 1243 1244 static struct sort_dimension pid_sort_dimension = { 1245 .name = "pid", 1246 .cmp = pid_cmp, 1247 }; 1248 1249 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1250 { 1251 u64 avgl, avgr; 1252 1253 if (!l->nb_atoms) 1254 return -1; 1255 1256 if (!r->nb_atoms) 1257 return 1; 1258 1259 avgl = l->total_lat / l->nb_atoms; 1260 avgr = r->total_lat / r->nb_atoms; 1261 1262 if (avgl < avgr) 1263 return -1; 1264 if (avgl > avgr) 1265 return 1; 1266 1267 return 0; 1268 } 1269 1270 static struct sort_dimension avg_sort_dimension = { 1271 .name = "avg", 1272 .cmp = avg_cmp, 1273 }; 1274 1275 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1276 { 1277 if (l->max_lat < r->max_lat) 1278 return -1; 1279 if (l->max_lat > r->max_lat) 1280 return 1; 1281 1282 return 0; 1283 } 1284 1285 static struct sort_dimension max_sort_dimension = { 1286 .name = "max", 1287 .cmp = max_cmp, 1288 }; 1289 1290 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1291 { 1292 if (l->nb_atoms < r->nb_atoms) 1293 return -1; 1294 if (l->nb_atoms > r->nb_atoms) 1295 return 1; 1296 1297 return 0; 1298 } 1299 1300 static struct sort_dimension switch_sort_dimension = { 1301 .name = "switch", 1302 .cmp = switch_cmp, 1303 }; 1304 1305 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1306 { 1307 if (l->total_runtime < r->total_runtime) 1308 return -1; 1309 if (l->total_runtime > r->total_runtime) 1310 return 1; 1311 1312 return 0; 1313 } 1314 1315 static struct sort_dimension runtime_sort_dimension = { 1316 .name = "runtime", 1317 .cmp = runtime_cmp, 1318 }; 1319 1320 static struct sort_dimension *available_sorts[] = { 1321 &pid_sort_dimension, 1322 &avg_sort_dimension, 1323 &max_sort_dimension, 1324 &switch_sort_dimension, 1325 &runtime_sort_dimension, 1326 }; 1327 1328 #define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *)) 1329 1330 static LIST_HEAD(sort_list); 1331 1332 static int sort_dimension__add(const char *tok, struct list_head *list) 1333 { 1334 int i; 1335 1336 for (i = 0; i < NB_AVAILABLE_SORTS; i++) { 1337 if (!strcmp(available_sorts[i]->name, tok)) { 1338 list_add_tail(&available_sorts[i]->list, list); 1339 1340 return 0; 1341 } 1342 } 1343 1344 return -1; 1345 } 1346 1347 static void setup_sorting(void); 1348 1349 static void sort_lat(void) 1350 { 1351 struct rb_node *node; 1352 1353 for (;;) { 1354 struct work_atoms *data; 1355 node = rb_first(&atom_root); 1356 if (!node) 1357 break; 1358 1359 rb_erase(node, &atom_root); 1360 data = rb_entry(node, struct work_atoms, node); 1361 __thread_latency_insert(&sorted_atom_root, data, &sort_list); 1362 } 1363 } 1364 1365 static struct trace_sched_handler *trace_handler; 1366 1367 static void 1368 process_sched_wakeup_event(struct perf_tool *tool __used, 1369 struct event_format *event, 1370 struct perf_sample *sample, 1371 struct machine *machine, 1372 struct thread *thread) 1373 { 1374 void *data = sample->raw_data; 1375 struct trace_wakeup_event wakeup_event; 1376 1377 FILL_COMMON_FIELDS(wakeup_event, event, data); 1378 1379 FILL_ARRAY(wakeup_event, comm, event, data); 1380 FILL_FIELD(wakeup_event, pid, event, data); 1381 FILL_FIELD(wakeup_event, prio, event, data); 1382 FILL_FIELD(wakeup_event, success, event, data); 1383 FILL_FIELD(wakeup_event, cpu, event, data); 1384 1385 if (trace_handler->wakeup_event) 1386 trace_handler->wakeup_event(&wakeup_event, machine, event, 1387 sample->cpu, sample->time, thread); 1388 } 1389 1390 /* 1391 * Track the current task - that way we can know whether there's any 1392 * weird events, such as a task being switched away that is not current. 1393 */ 1394 static int max_cpu; 1395 1396 static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; 1397 1398 static struct thread *curr_thread[MAX_CPUS]; 1399 1400 static char next_shortname1 = 'A'; 1401 static char next_shortname2 = '0'; 1402 1403 static void 1404 map_switch_event(struct trace_switch_event *switch_event, 1405 struct machine *machine, 1406 struct event_format *event __used, 1407 int this_cpu, 1408 u64 timestamp, 1409 struct thread *thread __used) 1410 { 1411 struct thread *sched_out __used, *sched_in; 1412 int new_shortname; 1413 u64 timestamp0; 1414 s64 delta; 1415 int cpu; 1416 1417 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); 1418 1419 if (this_cpu > max_cpu) 1420 max_cpu = this_cpu; 1421 1422 timestamp0 = cpu_last_switched[this_cpu]; 1423 cpu_last_switched[this_cpu] = timestamp; 1424 if (timestamp0) 1425 delta = timestamp - timestamp0; 1426 else 1427 delta = 0; 1428 1429 if (delta < 0) 1430 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1431 1432 1433 sched_out = machine__findnew_thread(machine, switch_event->prev_pid); 1434 sched_in = machine__findnew_thread(machine, switch_event->next_pid); 1435 1436 curr_thread[this_cpu] = sched_in; 1437 1438 printf(" "); 1439 1440 new_shortname = 0; 1441 if (!sched_in->shortname[0]) { 1442 sched_in->shortname[0] = next_shortname1; 1443 sched_in->shortname[1] = next_shortname2; 1444 1445 if (next_shortname1 < 'Z') { 1446 next_shortname1++; 1447 } else { 1448 next_shortname1='A'; 1449 if (next_shortname2 < '9') { 1450 next_shortname2++; 1451 } else { 1452 next_shortname2='0'; 1453 } 1454 } 1455 new_shortname = 1; 1456 } 1457 1458 for (cpu = 0; cpu <= max_cpu; cpu++) { 1459 if (cpu != this_cpu) 1460 printf(" "); 1461 else 1462 printf("*"); 1463 1464 if (curr_thread[cpu]) { 1465 if (curr_thread[cpu]->pid) 1466 printf("%2s ", curr_thread[cpu]->shortname); 1467 else 1468 printf(". "); 1469 } else 1470 printf(" "); 1471 } 1472 1473 printf(" %12.6f secs ", (double)timestamp/1e9); 1474 if (new_shortname) { 1475 printf("%s => %s:%d\n", 1476 sched_in->shortname, sched_in->comm, sched_in->pid); 1477 } else { 1478 printf("\n"); 1479 } 1480 } 1481 1482 static void 1483 process_sched_switch_event(struct perf_tool *tool __used, 1484 struct event_format *event, 1485 struct perf_sample *sample, 1486 struct machine *machine, 1487 struct thread *thread) 1488 { 1489 int this_cpu = sample->cpu; 1490 void *data = sample->raw_data; 1491 struct trace_switch_event switch_event; 1492 1493 FILL_COMMON_FIELDS(switch_event, event, data); 1494 1495 FILL_ARRAY(switch_event, prev_comm, event, data); 1496 FILL_FIELD(switch_event, prev_pid, event, data); 1497 FILL_FIELD(switch_event, prev_prio, event, data); 1498 FILL_FIELD(switch_event, prev_state, event, data); 1499 FILL_ARRAY(switch_event, next_comm, event, data); 1500 FILL_FIELD(switch_event, next_pid, event, data); 1501 FILL_FIELD(switch_event, next_prio, event, data); 1502 1503 if (curr_pid[this_cpu] != (u32)-1) { 1504 /* 1505 * Are we trying to switch away a PID that is 1506 * not current? 1507 */ 1508 if (curr_pid[this_cpu] != switch_event.prev_pid) 1509 nr_context_switch_bugs++; 1510 } 1511 if (trace_handler->switch_event) 1512 trace_handler->switch_event(&switch_event, machine, event, 1513 this_cpu, sample->time, thread); 1514 1515 curr_pid[this_cpu] = switch_event.next_pid; 1516 } 1517 1518 static void 1519 process_sched_runtime_event(struct perf_tool *tool __used, 1520 struct event_format *event, 1521 struct perf_sample *sample, 1522 struct machine *machine, 1523 struct thread *thread) 1524 { 1525 void *data = sample->raw_data; 1526 struct trace_runtime_event runtime_event; 1527 1528 FILL_ARRAY(runtime_event, comm, event, data); 1529 FILL_FIELD(runtime_event, pid, event, data); 1530 FILL_FIELD(runtime_event, runtime, event, data); 1531 FILL_FIELD(runtime_event, vruntime, event, data); 1532 1533 if (trace_handler->runtime_event) 1534 trace_handler->runtime_event(&runtime_event, machine, event, 1535 sample->cpu, sample->time, thread); 1536 } 1537 1538 static void 1539 process_sched_fork_event(struct perf_tool *tool __used, 1540 struct event_format *event, 1541 struct perf_sample *sample, 1542 struct machine *machine __used, 1543 struct thread *thread) 1544 { 1545 void *data = sample->raw_data; 1546 struct trace_fork_event fork_event; 1547 1548 FILL_COMMON_FIELDS(fork_event, event, data); 1549 1550 FILL_ARRAY(fork_event, parent_comm, event, data); 1551 FILL_FIELD(fork_event, parent_pid, event, data); 1552 FILL_ARRAY(fork_event, child_comm, event, data); 1553 FILL_FIELD(fork_event, child_pid, event, data); 1554 1555 if (trace_handler->fork_event) 1556 trace_handler->fork_event(&fork_event, event, 1557 sample->cpu, sample->time, thread); 1558 } 1559 1560 static void 1561 process_sched_exit_event(struct perf_tool *tool __used, 1562 struct event_format *event, 1563 struct perf_sample *sample __used, 1564 struct machine *machine __used, 1565 struct thread *thread __used) 1566 { 1567 if (verbose) 1568 printf("sched_exit event %p\n", event); 1569 } 1570 1571 static void 1572 process_sched_migrate_task_event(struct perf_tool *tool __used, 1573 struct event_format *event, 1574 struct perf_sample *sample, 1575 struct machine *machine, 1576 struct thread *thread) 1577 { 1578 void *data = sample->raw_data; 1579 struct trace_migrate_task_event migrate_task_event; 1580 1581 FILL_COMMON_FIELDS(migrate_task_event, event, data); 1582 1583 FILL_ARRAY(migrate_task_event, comm, event, data); 1584 FILL_FIELD(migrate_task_event, pid, event, data); 1585 FILL_FIELD(migrate_task_event, prio, event, data); 1586 FILL_FIELD(migrate_task_event, cpu, event, data); 1587 1588 if (trace_handler->migrate_task_event) 1589 trace_handler->migrate_task_event(&migrate_task_event, machine, 1590 event, sample->cpu, 1591 sample->time, thread); 1592 } 1593 1594 typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event_format *event, 1595 struct perf_sample *sample, 1596 struct machine *machine, 1597 struct thread *thread); 1598 1599 static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, 1600 union perf_event *event __used, 1601 struct perf_sample *sample, 1602 struct perf_evsel *evsel, 1603 struct machine *machine) 1604 { 1605 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1606 struct pevent *pevent = sched->session->pevent; 1607 struct thread *thread = machine__findnew_thread(machine, sample->pid); 1608 1609 if (thread == NULL) { 1610 pr_debug("problem processing %s event, skipping it.\n", 1611 perf_evsel__name(evsel)); 1612 return -1; 1613 } 1614 1615 evsel->hists.stats.total_period += sample->period; 1616 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 1617 1618 if (evsel->handler.func != NULL) { 1619 tracepoint_handler f = evsel->handler.func; 1620 1621 if (evsel->handler.data == NULL) 1622 evsel->handler.data = pevent_find_event(pevent, 1623 evsel->attr.config); 1624 1625 f(tool, evsel->handler.data, sample, machine, thread); 1626 } 1627 1628 return 0; 1629 } 1630 1631 static struct perf_sched perf_sched = { 1632 .tool = { 1633 .sample = perf_sched__process_tracepoint_sample, 1634 .comm = perf_event__process_comm, 1635 .lost = perf_event__process_lost, 1636 .fork = perf_event__process_task, 1637 .ordered_samples = true, 1638 }, 1639 }; 1640 1641 static void read_events(bool destroy, struct perf_session **psession) 1642 { 1643 int err = -EINVAL; 1644 const struct perf_evsel_str_handler handlers[] = { 1645 { "sched:sched_switch", process_sched_switch_event, }, 1646 { "sched:sched_stat_runtime", process_sched_runtime_event, }, 1647 { "sched:sched_wakeup", process_sched_wakeup_event, }, 1648 { "sched:sched_wakeup_new", process_sched_wakeup_event, }, 1649 { "sched:sched_process_fork", process_sched_fork_event, }, 1650 { "sched:sched_process_exit", process_sched_exit_event, }, 1651 { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1652 }; 1653 struct perf_session *session; 1654 1655 session = perf_session__new(input_name, O_RDONLY, 0, false, 1656 &perf_sched.tool); 1657 if (session == NULL) 1658 die("No Memory"); 1659 1660 perf_sched.session = session; 1661 1662 err = perf_session__set_tracepoints_handlers(session, handlers); 1663 assert(err == 0); 1664 1665 if (perf_session__has_traces(session, "record -R")) { 1666 err = perf_session__process_events(session, &perf_sched.tool); 1667 if (err) 1668 die("Failed to process events, error %d", err); 1669 1670 nr_events = session->hists.stats.nr_events[0]; 1671 nr_lost_events = session->hists.stats.total_lost; 1672 nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; 1673 } 1674 1675 if (destroy) 1676 perf_session__delete(session); 1677 1678 if (psession) 1679 *psession = session; 1680 } 1681 1682 static void print_bad_events(void) 1683 { 1684 if (nr_unordered_timestamps && nr_timestamps) { 1685 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 1686 (double)nr_unordered_timestamps/(double)nr_timestamps*100.0, 1687 nr_unordered_timestamps, nr_timestamps); 1688 } 1689 if (nr_lost_events && nr_events) { 1690 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 1691 (double)nr_lost_events/(double)nr_events*100.0, 1692 nr_lost_events, nr_events, nr_lost_chunks); 1693 } 1694 if (nr_state_machine_bugs && nr_timestamps) { 1695 printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", 1696 (double)nr_state_machine_bugs/(double)nr_timestamps*100.0, 1697 nr_state_machine_bugs, nr_timestamps); 1698 if (nr_lost_events) 1699 printf(" (due to lost events?)"); 1700 printf("\n"); 1701 } 1702 if (nr_context_switch_bugs && nr_timestamps) { 1703 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 1704 (double)nr_context_switch_bugs/(double)nr_timestamps*100.0, 1705 nr_context_switch_bugs, nr_timestamps); 1706 if (nr_lost_events) 1707 printf(" (due to lost events?)"); 1708 printf("\n"); 1709 } 1710 } 1711 1712 static void __cmd_lat(void) 1713 { 1714 struct rb_node *next; 1715 struct perf_session *session; 1716 1717 setup_pager(); 1718 read_events(false, &session); 1719 sort_lat(); 1720 1721 printf("\n ---------------------------------------------------------------------------------------------------------------\n"); 1722 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); 1723 printf(" ---------------------------------------------------------------------------------------------------------------\n"); 1724 1725 next = rb_first(&sorted_atom_root); 1726 1727 while (next) { 1728 struct work_atoms *work_list; 1729 1730 work_list = rb_entry(next, struct work_atoms, node); 1731 output_lat_thread(work_list); 1732 next = rb_next(next); 1733 } 1734 1735 printf(" -----------------------------------------------------------------------------------------\n"); 1736 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 1737 (double)all_runtime/1e6, all_count); 1738 1739 printf(" ---------------------------------------------------\n"); 1740 1741 print_bad_events(); 1742 printf("\n"); 1743 1744 perf_session__delete(session); 1745 } 1746 1747 static struct trace_sched_handler map_ops = { 1748 .wakeup_event = NULL, 1749 .switch_event = map_switch_event, 1750 .runtime_event = NULL, 1751 .fork_event = NULL, 1752 }; 1753 1754 static void __cmd_map(void) 1755 { 1756 max_cpu = sysconf(_SC_NPROCESSORS_CONF); 1757 1758 setup_pager(); 1759 read_events(true, NULL); 1760 print_bad_events(); 1761 } 1762 1763 static void __cmd_replay(void) 1764 { 1765 unsigned long i; 1766 1767 calibrate_run_measurement_overhead(); 1768 calibrate_sleep_measurement_overhead(); 1769 1770 test_calibrations(); 1771 1772 read_events(true, NULL); 1773 1774 printf("nr_run_events: %ld\n", nr_run_events); 1775 printf("nr_sleep_events: %ld\n", nr_sleep_events); 1776 printf("nr_wakeup_events: %ld\n", nr_wakeup_events); 1777 1778 if (targetless_wakeups) 1779 printf("target-less wakeups: %ld\n", targetless_wakeups); 1780 if (multitarget_wakeups) 1781 printf("multi-target wakeups: %ld\n", multitarget_wakeups); 1782 if (nr_run_events_optimized) 1783 printf("run atoms optimized: %ld\n", 1784 nr_run_events_optimized); 1785 1786 print_task_traces(); 1787 add_cross_task_wakeups(); 1788 1789 create_tasks(); 1790 printf("------------------------------------------------------------\n"); 1791 for (i = 0; i < replay_repeat; i++) 1792 run_one_test(); 1793 } 1794 1795 1796 static const char * const sched_usage[] = { 1797 "perf sched [<options>] {record|latency|map|replay|script}", 1798 NULL 1799 }; 1800 1801 static const struct option sched_options[] = { 1802 OPT_STRING('i', "input", &input_name, "file", 1803 "input file name"), 1804 OPT_INCR('v', "verbose", &verbose, 1805 "be more verbose (show symbol address, etc)"), 1806 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1807 "dump raw trace in ASCII"), 1808 OPT_END() 1809 }; 1810 1811 static const char * const latency_usage[] = { 1812 "perf sched latency [<options>]", 1813 NULL 1814 }; 1815 1816 static const struct option latency_options[] = { 1817 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1818 "sort by key(s): runtime, switch, avg, max"), 1819 OPT_INCR('v', "verbose", &verbose, 1820 "be more verbose (show symbol address, etc)"), 1821 OPT_INTEGER('C', "CPU", &profile_cpu, 1822 "CPU to profile on"), 1823 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1824 "dump raw trace in ASCII"), 1825 OPT_END() 1826 }; 1827 1828 static const char * const replay_usage[] = { 1829 "perf sched replay [<options>]", 1830 NULL 1831 }; 1832 1833 static const struct option replay_options[] = { 1834 OPT_UINTEGER('r', "repeat", &replay_repeat, 1835 "repeat the workload replay N times (-1: infinite)"), 1836 OPT_INCR('v', "verbose", &verbose, 1837 "be more verbose (show symbol address, etc)"), 1838 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1839 "dump raw trace in ASCII"), 1840 OPT_END() 1841 }; 1842 1843 static void setup_sorting(void) 1844 { 1845 char *tmp, *tok, *str = strdup(sort_order); 1846 1847 for (tok = strtok_r(str, ", ", &tmp); 1848 tok; tok = strtok_r(NULL, ", ", &tmp)) { 1849 if (sort_dimension__add(tok, &sort_list) < 0) { 1850 error("Unknown --sort key: `%s'", tok); 1851 usage_with_options(latency_usage, latency_options); 1852 } 1853 } 1854 1855 free(str); 1856 1857 sort_dimension__add("pid", &cmp_pid); 1858 } 1859 1860 static const char *record_args[] = { 1861 "record", 1862 "-a", 1863 "-R", 1864 "-f", 1865 "-m", "1024", 1866 "-c", "1", 1867 "-e", "sched:sched_switch", 1868 "-e", "sched:sched_stat_wait", 1869 "-e", "sched:sched_stat_sleep", 1870 "-e", "sched:sched_stat_iowait", 1871 "-e", "sched:sched_stat_runtime", 1872 "-e", "sched:sched_process_exit", 1873 "-e", "sched:sched_process_fork", 1874 "-e", "sched:sched_wakeup", 1875 "-e", "sched:sched_migrate_task", 1876 }; 1877 1878 static int __cmd_record(int argc, const char **argv) 1879 { 1880 unsigned int rec_argc, i, j; 1881 const char **rec_argv; 1882 1883 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1884 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1885 1886 if (rec_argv == NULL) 1887 return -ENOMEM; 1888 1889 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1890 rec_argv[i] = strdup(record_args[i]); 1891 1892 for (j = 1; j < (unsigned int)argc; j++, i++) 1893 rec_argv[i] = argv[j]; 1894 1895 BUG_ON(i != rec_argc); 1896 1897 return cmd_record(i, rec_argv, NULL); 1898 } 1899 1900 int cmd_sched(int argc, const char **argv, const char *prefix __used) 1901 { 1902 argc = parse_options(argc, argv, sched_options, sched_usage, 1903 PARSE_OPT_STOP_AT_NON_OPTION); 1904 if (!argc) 1905 usage_with_options(sched_usage, sched_options); 1906 1907 /* 1908 * Aliased to 'perf script' for now: 1909 */ 1910 if (!strcmp(argv[0], "script")) 1911 return cmd_script(argc, argv, prefix); 1912 1913 symbol__init(); 1914 if (!strncmp(argv[0], "rec", 3)) { 1915 return __cmd_record(argc, argv); 1916 } else if (!strncmp(argv[0], "lat", 3)) { 1917 trace_handler = &lat_ops; 1918 if (argc > 1) { 1919 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 1920 if (argc) 1921 usage_with_options(latency_usage, latency_options); 1922 } 1923 setup_sorting(); 1924 __cmd_lat(); 1925 } else if (!strcmp(argv[0], "map")) { 1926 trace_handler = &map_ops; 1927 setup_sorting(); 1928 __cmd_map(); 1929 } else if (!strncmp(argv[0], "rep", 3)) { 1930 trace_handler = &replay_ops; 1931 if (argc) { 1932 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 1933 if (argc) 1934 usage_with_options(replay_usage, replay_options); 1935 } 1936 __cmd_replay(); 1937 } else { 1938 usage_with_options(sched_usage, sched_options); 1939 } 1940 1941 return 0; 1942 } 1943