1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/cache.h" 6 #include "util/symbol.h" 7 #include "util/thread.h" 8 #include "util/header.h" 9 10 #include "util/parse-options.h" 11 #include "util/trace-event.h" 12 13 #include "util/debug.h" 14 15 #include <sys/types.h> 16 #include <sys/prctl.h> 17 18 #include <semaphore.h> 19 #include <pthread.h> 20 #include <math.h> 21 22 static char const *input_name = "perf.data"; 23 static int input; 24 static unsigned long page_size; 25 static unsigned long mmap_window = 32; 26 27 static unsigned long total_comm = 0; 28 29 static struct rb_root threads; 30 static struct thread *last_match; 31 32 static struct perf_header *header; 33 static u64 sample_type; 34 35 static char default_sort_order[] = "avg, max, switch, runtime"; 36 static char *sort_order = default_sort_order; 37 38 #define PR_SET_NAME 15 /* Set process name */ 39 #define MAX_CPUS 4096 40 41 #define BUG_ON(x) assert(!(x)) 42 43 static u64 run_measurement_overhead; 44 static u64 sleep_measurement_overhead; 45 46 #define COMM_LEN 20 47 #define SYM_LEN 129 48 49 #define MAX_PID 65536 50 51 static unsigned long nr_tasks; 52 53 struct sched_atom; 54 55 struct task_desc { 56 unsigned long nr; 57 unsigned long pid; 58 char comm[COMM_LEN]; 59 60 unsigned long nr_events; 61 unsigned long curr_event; 62 struct sched_atom **atoms; 63 64 pthread_t thread; 65 sem_t sleep_sem; 66 67 sem_t ready_for_work; 68 sem_t work_done_sem; 69 70 u64 cpu_usage; 71 }; 72 73 enum sched_event_type { 74 SCHED_EVENT_RUN, 75 SCHED_EVENT_SLEEP, 76 SCHED_EVENT_WAKEUP, 77 }; 78 79 struct sched_atom { 80 enum sched_event_type type; 81 u64 timestamp; 82 u64 duration; 83 unsigned long nr; 84 int specific_wait; 85 sem_t *wait_sem; 86 struct task_desc *wakee; 87 }; 88 89 static struct task_desc *pid_to_task[MAX_PID]; 90 91 static struct task_desc **tasks; 92 93 static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; 94 static u64 start_time; 95 96 static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; 97 98 static unsigned long nr_run_events; 99 static unsigned long nr_sleep_events; 100 static unsigned long nr_wakeup_events; 101 102 static unsigned long nr_sleep_corrections; 103 static unsigned long nr_run_events_optimized; 104 105 static unsigned long targetless_wakeups; 106 static unsigned long multitarget_wakeups; 107 108 static u64 cpu_usage; 109 static u64 runavg_cpu_usage; 110 static u64 parent_cpu_usage; 111 static u64 runavg_parent_cpu_usage; 112 113 static unsigned long nr_runs; 114 static u64 sum_runtime; 115 static u64 sum_fluct; 116 static u64 run_avg; 117 118 static unsigned long replay_repeat = 10; 119 static unsigned long nr_timestamps; 120 static unsigned long nr_unordered_timestamps; 121 static unsigned long nr_state_machine_bugs; 122 static unsigned long nr_context_switch_bugs; 123 static unsigned long nr_events; 124 static unsigned long nr_lost_chunks; 125 static unsigned long nr_lost_events; 126 127 #define TASK_STATE_TO_CHAR_STR "RSDTtZX" 128 129 enum thread_state { 130 THREAD_SLEEPING = 0, 131 THREAD_WAIT_CPU, 132 THREAD_SCHED_IN, 133 THREAD_IGNORE 134 }; 135 136 struct work_atom { 137 struct list_head list; 138 enum thread_state state; 139 u64 sched_out_time; 140 u64 wake_up_time; 141 u64 sched_in_time; 142 u64 runtime; 143 }; 144 145 struct work_atoms { 146 struct list_head work_list; 147 struct thread *thread; 148 struct rb_node node; 149 u64 max_lat; 150 u64 total_lat; 151 u64 nb_atoms; 152 u64 total_runtime; 153 }; 154 155 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 156 157 static struct rb_root atom_root, sorted_atom_root; 158 159 static u64 all_runtime; 160 static u64 all_count; 161 162 163 static u64 get_nsecs(void) 164 { 165 struct timespec ts; 166 167 clock_gettime(CLOCK_MONOTONIC, &ts); 168 169 return ts.tv_sec * 1000000000ULL + ts.tv_nsec; 170 } 171 172 static void burn_nsecs(u64 nsecs) 173 { 174 u64 T0 = get_nsecs(), T1; 175 176 do { 177 T1 = get_nsecs(); 178 } while (T1 + run_measurement_overhead < T0 + nsecs); 179 } 180 181 static void sleep_nsecs(u64 nsecs) 182 { 183 struct timespec ts; 184 185 ts.tv_nsec = nsecs % 999999999; 186 ts.tv_sec = nsecs / 999999999; 187 188 nanosleep(&ts, NULL); 189 } 190 191 static void calibrate_run_measurement_overhead(void) 192 { 193 u64 T0, T1, delta, min_delta = 1000000000ULL; 194 int i; 195 196 for (i = 0; i < 10; i++) { 197 T0 = get_nsecs(); 198 burn_nsecs(0); 199 T1 = get_nsecs(); 200 delta = T1-T0; 201 min_delta = min(min_delta, delta); 202 } 203 run_measurement_overhead = min_delta; 204 205 printf("run measurement overhead: %Ld nsecs\n", min_delta); 206 } 207 208 static void calibrate_sleep_measurement_overhead(void) 209 { 210 u64 T0, T1, delta, min_delta = 1000000000ULL; 211 int i; 212 213 for (i = 0; i < 10; i++) { 214 T0 = get_nsecs(); 215 sleep_nsecs(10000); 216 T1 = get_nsecs(); 217 delta = T1-T0; 218 min_delta = min(min_delta, delta); 219 } 220 min_delta -= 10000; 221 sleep_measurement_overhead = min_delta; 222 223 printf("sleep measurement overhead: %Ld nsecs\n", min_delta); 224 } 225 226 static struct sched_atom * 227 get_new_event(struct task_desc *task, u64 timestamp) 228 { 229 struct sched_atom *event = calloc(1, sizeof(*event)); 230 unsigned long idx = task->nr_events; 231 size_t size; 232 233 event->timestamp = timestamp; 234 event->nr = idx; 235 236 task->nr_events++; 237 size = sizeof(struct sched_atom *) * task->nr_events; 238 task->atoms = realloc(task->atoms, size); 239 BUG_ON(!task->atoms); 240 241 task->atoms[idx] = event; 242 243 return event; 244 } 245 246 static struct sched_atom *last_event(struct task_desc *task) 247 { 248 if (!task->nr_events) 249 return NULL; 250 251 return task->atoms[task->nr_events - 1]; 252 } 253 254 static void 255 add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) 256 { 257 struct sched_atom *event, *curr_event = last_event(task); 258 259 /* 260 * optimize an existing RUN event by merging this one 261 * to it: 262 */ 263 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 264 nr_run_events_optimized++; 265 curr_event->duration += duration; 266 return; 267 } 268 269 event = get_new_event(task, timestamp); 270 271 event->type = SCHED_EVENT_RUN; 272 event->duration = duration; 273 274 nr_run_events++; 275 } 276 277 static void 278 add_sched_event_wakeup(struct task_desc *task, u64 timestamp, 279 struct task_desc *wakee) 280 { 281 struct sched_atom *event, *wakee_event; 282 283 event = get_new_event(task, timestamp); 284 event->type = SCHED_EVENT_WAKEUP; 285 event->wakee = wakee; 286 287 wakee_event = last_event(wakee); 288 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 289 targetless_wakeups++; 290 return; 291 } 292 if (wakee_event->wait_sem) { 293 multitarget_wakeups++; 294 return; 295 } 296 297 wakee_event->wait_sem = calloc(1, sizeof(*wakee_event->wait_sem)); 298 sem_init(wakee_event->wait_sem, 0, 0); 299 wakee_event->specific_wait = 1; 300 event->wait_sem = wakee_event->wait_sem; 301 302 nr_wakeup_events++; 303 } 304 305 static void 306 add_sched_event_sleep(struct task_desc *task, u64 timestamp, 307 u64 task_state __used) 308 { 309 struct sched_atom *event = get_new_event(task, timestamp); 310 311 event->type = SCHED_EVENT_SLEEP; 312 313 nr_sleep_events++; 314 } 315 316 static struct task_desc *register_pid(unsigned long pid, const char *comm) 317 { 318 struct task_desc *task; 319 320 BUG_ON(pid >= MAX_PID); 321 322 task = pid_to_task[pid]; 323 324 if (task) 325 return task; 326 327 task = calloc(1, sizeof(*task)); 328 task->pid = pid; 329 task->nr = nr_tasks; 330 strcpy(task->comm, comm); 331 /* 332 * every task starts in sleeping state - this gets ignored 333 * if there's no wakeup pointing to this sleep state: 334 */ 335 add_sched_event_sleep(task, 0, 0); 336 337 pid_to_task[pid] = task; 338 nr_tasks++; 339 tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *)); 340 BUG_ON(!tasks); 341 tasks[task->nr] = task; 342 343 if (verbose) 344 printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm); 345 346 return task; 347 } 348 349 350 static void print_task_traces(void) 351 { 352 struct task_desc *task; 353 unsigned long i; 354 355 for (i = 0; i < nr_tasks; i++) { 356 task = tasks[i]; 357 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 358 task->nr, task->comm, task->pid, task->nr_events); 359 } 360 } 361 362 static void add_cross_task_wakeups(void) 363 { 364 struct task_desc *task1, *task2; 365 unsigned long i, j; 366 367 for (i = 0; i < nr_tasks; i++) { 368 task1 = tasks[i]; 369 j = i + 1; 370 if (j == nr_tasks) 371 j = 0; 372 task2 = tasks[j]; 373 add_sched_event_wakeup(task1, 0, task2); 374 } 375 } 376 377 static void 378 process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) 379 { 380 int ret = 0; 381 u64 now; 382 long long delta; 383 384 now = get_nsecs(); 385 delta = start_time + atom->timestamp - now; 386 387 switch (atom->type) { 388 case SCHED_EVENT_RUN: 389 burn_nsecs(atom->duration); 390 break; 391 case SCHED_EVENT_SLEEP: 392 if (atom->wait_sem) 393 ret = sem_wait(atom->wait_sem); 394 BUG_ON(ret); 395 break; 396 case SCHED_EVENT_WAKEUP: 397 if (atom->wait_sem) 398 ret = sem_post(atom->wait_sem); 399 BUG_ON(ret); 400 break; 401 default: 402 BUG_ON(1); 403 } 404 } 405 406 static u64 get_cpu_usage_nsec_parent(void) 407 { 408 struct rusage ru; 409 u64 sum; 410 int err; 411 412 err = getrusage(RUSAGE_SELF, &ru); 413 BUG_ON(err); 414 415 sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3; 416 sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3; 417 418 return sum; 419 } 420 421 static u64 get_cpu_usage_nsec_self(void) 422 { 423 char filename [] = "/proc/1234567890/sched"; 424 unsigned long msecs, nsecs; 425 char *line = NULL; 426 u64 total = 0; 427 size_t len = 0; 428 ssize_t chars; 429 FILE *file; 430 int ret; 431 432 sprintf(filename, "/proc/%d/sched", getpid()); 433 file = fopen(filename, "r"); 434 BUG_ON(!file); 435 436 while ((chars = getline(&line, &len, file)) != -1) { 437 ret = sscanf(line, "se.sum_exec_runtime : %ld.%06ld\n", 438 &msecs, &nsecs); 439 if (ret == 2) { 440 total = msecs*1e6 + nsecs; 441 break; 442 } 443 } 444 if (line) 445 free(line); 446 fclose(file); 447 448 return total; 449 } 450 451 static void *thread_func(void *ctx) 452 { 453 struct task_desc *this_task = ctx; 454 u64 cpu_usage_0, cpu_usage_1; 455 unsigned long i, ret; 456 char comm2[22]; 457 458 sprintf(comm2, ":%s", this_task->comm); 459 prctl(PR_SET_NAME, comm2); 460 461 again: 462 ret = sem_post(&this_task->ready_for_work); 463 BUG_ON(ret); 464 ret = pthread_mutex_lock(&start_work_mutex); 465 BUG_ON(ret); 466 ret = pthread_mutex_unlock(&start_work_mutex); 467 BUG_ON(ret); 468 469 cpu_usage_0 = get_cpu_usage_nsec_self(); 470 471 for (i = 0; i < this_task->nr_events; i++) { 472 this_task->curr_event = i; 473 process_sched_event(this_task, this_task->atoms[i]); 474 } 475 476 cpu_usage_1 = get_cpu_usage_nsec_self(); 477 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 478 479 ret = sem_post(&this_task->work_done_sem); 480 BUG_ON(ret); 481 482 ret = pthread_mutex_lock(&work_done_wait_mutex); 483 BUG_ON(ret); 484 ret = pthread_mutex_unlock(&work_done_wait_mutex); 485 BUG_ON(ret); 486 487 goto again; 488 } 489 490 static void create_tasks(void) 491 { 492 struct task_desc *task; 493 pthread_attr_t attr; 494 unsigned long i; 495 int err; 496 497 err = pthread_attr_init(&attr); 498 BUG_ON(err); 499 err = pthread_attr_setstacksize(&attr, (size_t)(16*1024)); 500 BUG_ON(err); 501 err = pthread_mutex_lock(&start_work_mutex); 502 BUG_ON(err); 503 err = pthread_mutex_lock(&work_done_wait_mutex); 504 BUG_ON(err); 505 for (i = 0; i < nr_tasks; i++) { 506 task = tasks[i]; 507 sem_init(&task->sleep_sem, 0, 0); 508 sem_init(&task->ready_for_work, 0, 0); 509 sem_init(&task->work_done_sem, 0, 0); 510 task->curr_event = 0; 511 err = pthread_create(&task->thread, &attr, thread_func, task); 512 BUG_ON(err); 513 } 514 } 515 516 static void wait_for_tasks(void) 517 { 518 u64 cpu_usage_0, cpu_usage_1; 519 struct task_desc *task; 520 unsigned long i, ret; 521 522 start_time = get_nsecs(); 523 cpu_usage = 0; 524 pthread_mutex_unlock(&work_done_wait_mutex); 525 526 for (i = 0; i < nr_tasks; i++) { 527 task = tasks[i]; 528 ret = sem_wait(&task->ready_for_work); 529 BUG_ON(ret); 530 sem_init(&task->ready_for_work, 0, 0); 531 } 532 ret = pthread_mutex_lock(&work_done_wait_mutex); 533 BUG_ON(ret); 534 535 cpu_usage_0 = get_cpu_usage_nsec_parent(); 536 537 pthread_mutex_unlock(&start_work_mutex); 538 539 for (i = 0; i < nr_tasks; i++) { 540 task = tasks[i]; 541 ret = sem_wait(&task->work_done_sem); 542 BUG_ON(ret); 543 sem_init(&task->work_done_sem, 0, 0); 544 cpu_usage += task->cpu_usage; 545 task->cpu_usage = 0; 546 } 547 548 cpu_usage_1 = get_cpu_usage_nsec_parent(); 549 if (!runavg_cpu_usage) 550 runavg_cpu_usage = cpu_usage; 551 runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10; 552 553 parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 554 if (!runavg_parent_cpu_usage) 555 runavg_parent_cpu_usage = parent_cpu_usage; 556 runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 + 557 parent_cpu_usage)/10; 558 559 ret = pthread_mutex_lock(&start_work_mutex); 560 BUG_ON(ret); 561 562 for (i = 0; i < nr_tasks; i++) { 563 task = tasks[i]; 564 sem_init(&task->sleep_sem, 0, 0); 565 task->curr_event = 0; 566 } 567 } 568 569 static void run_one_test(void) 570 { 571 u64 T0, T1, delta, avg_delta, fluct, std_dev; 572 573 T0 = get_nsecs(); 574 wait_for_tasks(); 575 T1 = get_nsecs(); 576 577 delta = T1 - T0; 578 sum_runtime += delta; 579 nr_runs++; 580 581 avg_delta = sum_runtime / nr_runs; 582 if (delta < avg_delta) 583 fluct = avg_delta - delta; 584 else 585 fluct = delta - avg_delta; 586 sum_fluct += fluct; 587 std_dev = sum_fluct / nr_runs / sqrt(nr_runs); 588 if (!run_avg) 589 run_avg = delta; 590 run_avg = (run_avg*9 + delta)/10; 591 592 printf("#%-3ld: %0.3f, ", 593 nr_runs, (double)delta/1000000.0); 594 595 printf("ravg: %0.2f, ", 596 (double)run_avg/1e6); 597 598 printf("cpu: %0.2f / %0.2f", 599 (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6); 600 601 #if 0 602 /* 603 * rusage statistics done by the parent, these are less 604 * accurate than the sum_exec_runtime based statistics: 605 */ 606 printf(" [%0.2f / %0.2f]", 607 (double)parent_cpu_usage/1e6, 608 (double)runavg_parent_cpu_usage/1e6); 609 #endif 610 611 printf("\n"); 612 613 if (nr_sleep_corrections) 614 printf(" (%ld sleep corrections)\n", nr_sleep_corrections); 615 nr_sleep_corrections = 0; 616 } 617 618 static void test_calibrations(void) 619 { 620 u64 T0, T1; 621 622 T0 = get_nsecs(); 623 burn_nsecs(1e6); 624 T1 = get_nsecs(); 625 626 printf("the run test took %Ld nsecs\n", T1-T0); 627 628 T0 = get_nsecs(); 629 sleep_nsecs(1e6); 630 T1 = get_nsecs(); 631 632 printf("the sleep test took %Ld nsecs\n", T1-T0); 633 } 634 635 static int 636 process_comm_event(event_t *event, unsigned long offset, unsigned long head) 637 { 638 struct thread *thread; 639 640 thread = threads__findnew(event->comm.pid, &threads, &last_match); 641 642 dump_printf("%p [%p]: perf_event_comm: %s:%d\n", 643 (void *)(offset + head), 644 (void *)(long)(event->header.size), 645 event->comm.comm, event->comm.pid); 646 647 if (thread == NULL || 648 thread__set_comm(thread, event->comm.comm)) { 649 dump_printf("problem processing perf_event_comm, skipping event.\n"); 650 return -1; 651 } 652 total_comm++; 653 654 return 0; 655 } 656 657 658 struct raw_event_sample { 659 u32 size; 660 char data[0]; 661 }; 662 663 #define FILL_FIELD(ptr, field, event, data) \ 664 ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) 665 666 #define FILL_ARRAY(ptr, array, event, data) \ 667 do { \ 668 void *__array = raw_field_ptr(event, #array, data); \ 669 memcpy(ptr.array, __array, sizeof(ptr.array)); \ 670 } while(0) 671 672 #define FILL_COMMON_FIELDS(ptr, event, data) \ 673 do { \ 674 FILL_FIELD(ptr, common_type, event, data); \ 675 FILL_FIELD(ptr, common_flags, event, data); \ 676 FILL_FIELD(ptr, common_preempt_count, event, data); \ 677 FILL_FIELD(ptr, common_pid, event, data); \ 678 FILL_FIELD(ptr, common_tgid, event, data); \ 679 } while (0) 680 681 682 683 struct trace_switch_event { 684 u32 size; 685 686 u16 common_type; 687 u8 common_flags; 688 u8 common_preempt_count; 689 u32 common_pid; 690 u32 common_tgid; 691 692 char prev_comm[16]; 693 u32 prev_pid; 694 u32 prev_prio; 695 u64 prev_state; 696 char next_comm[16]; 697 u32 next_pid; 698 u32 next_prio; 699 }; 700 701 struct trace_runtime_event { 702 u32 size; 703 704 u16 common_type; 705 u8 common_flags; 706 u8 common_preempt_count; 707 u32 common_pid; 708 u32 common_tgid; 709 710 char comm[16]; 711 u32 pid; 712 u64 runtime; 713 u64 vruntime; 714 }; 715 716 struct trace_wakeup_event { 717 u32 size; 718 719 u16 common_type; 720 u8 common_flags; 721 u8 common_preempt_count; 722 u32 common_pid; 723 u32 common_tgid; 724 725 char comm[16]; 726 u32 pid; 727 728 u32 prio; 729 u32 success; 730 u32 cpu; 731 }; 732 733 struct trace_fork_event { 734 u32 size; 735 736 u16 common_type; 737 u8 common_flags; 738 u8 common_preempt_count; 739 u32 common_pid; 740 u32 common_tgid; 741 742 char parent_comm[16]; 743 u32 parent_pid; 744 char child_comm[16]; 745 u32 child_pid; 746 }; 747 748 struct trace_sched_handler { 749 void (*switch_event)(struct trace_switch_event *, 750 struct event *, 751 int cpu, 752 u64 timestamp, 753 struct thread *thread); 754 755 void (*runtime_event)(struct trace_runtime_event *, 756 struct event *, 757 int cpu, 758 u64 timestamp, 759 struct thread *thread); 760 761 void (*wakeup_event)(struct trace_wakeup_event *, 762 struct event *, 763 int cpu, 764 u64 timestamp, 765 struct thread *thread); 766 767 void (*fork_event)(struct trace_fork_event *, 768 struct event *, 769 int cpu, 770 u64 timestamp, 771 struct thread *thread); 772 }; 773 774 775 static void 776 replay_wakeup_event(struct trace_wakeup_event *wakeup_event, 777 struct event *event, 778 int cpu __used, 779 u64 timestamp __used, 780 struct thread *thread __used) 781 { 782 struct task_desc *waker, *wakee; 783 784 if (verbose) { 785 printf("sched_wakeup event %p\n", event); 786 787 printf(" ... pid %d woke up %s/%d\n", 788 wakeup_event->common_pid, 789 wakeup_event->comm, 790 wakeup_event->pid); 791 } 792 793 waker = register_pid(wakeup_event->common_pid, "<unknown>"); 794 wakee = register_pid(wakeup_event->pid, wakeup_event->comm); 795 796 add_sched_event_wakeup(waker, timestamp, wakee); 797 } 798 799 static u64 cpu_last_switched[MAX_CPUS]; 800 801 static void 802 replay_switch_event(struct trace_switch_event *switch_event, 803 struct event *event, 804 int cpu, 805 u64 timestamp, 806 struct thread *thread __used) 807 { 808 struct task_desc *prev, *next; 809 u64 timestamp0; 810 s64 delta; 811 812 if (verbose) 813 printf("sched_switch event %p\n", event); 814 815 if (cpu >= MAX_CPUS || cpu < 0) 816 return; 817 818 timestamp0 = cpu_last_switched[cpu]; 819 if (timestamp0) 820 delta = timestamp - timestamp0; 821 else 822 delta = 0; 823 824 if (delta < 0) 825 die("hm, delta: %Ld < 0 ?\n", delta); 826 827 if (verbose) { 828 printf(" ... switch from %s/%d to %s/%d [ran %Ld nsecs]\n", 829 switch_event->prev_comm, switch_event->prev_pid, 830 switch_event->next_comm, switch_event->next_pid, 831 delta); 832 } 833 834 prev = register_pid(switch_event->prev_pid, switch_event->prev_comm); 835 next = register_pid(switch_event->next_pid, switch_event->next_comm); 836 837 cpu_last_switched[cpu] = timestamp; 838 839 add_sched_event_run(prev, timestamp, delta); 840 add_sched_event_sleep(prev, timestamp, switch_event->prev_state); 841 } 842 843 844 static void 845 replay_fork_event(struct trace_fork_event *fork_event, 846 struct event *event, 847 int cpu __used, 848 u64 timestamp __used, 849 struct thread *thread __used) 850 { 851 if (verbose) { 852 printf("sched_fork event %p\n", event); 853 printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); 854 printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); 855 } 856 register_pid(fork_event->parent_pid, fork_event->parent_comm); 857 register_pid(fork_event->child_pid, fork_event->child_comm); 858 } 859 860 static struct trace_sched_handler replay_ops = { 861 .wakeup_event = replay_wakeup_event, 862 .switch_event = replay_switch_event, 863 .fork_event = replay_fork_event, 864 }; 865 866 struct sort_dimension { 867 const char *name; 868 sort_fn_t cmp; 869 struct list_head list; 870 }; 871 872 static LIST_HEAD(cmp_pid); 873 874 static int 875 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 876 { 877 struct sort_dimension *sort; 878 int ret = 0; 879 880 BUG_ON(list_empty(list)); 881 882 list_for_each_entry(sort, list, list) { 883 ret = sort->cmp(l, r); 884 if (ret) 885 return ret; 886 } 887 888 return ret; 889 } 890 891 static struct work_atoms * 892 thread_atoms_search(struct rb_root *root, struct thread *thread, 893 struct list_head *sort_list) 894 { 895 struct rb_node *node = root->rb_node; 896 struct work_atoms key = { .thread = thread }; 897 898 while (node) { 899 struct work_atoms *atoms; 900 int cmp; 901 902 atoms = container_of(node, struct work_atoms, node); 903 904 cmp = thread_lat_cmp(sort_list, &key, atoms); 905 if (cmp > 0) 906 node = node->rb_left; 907 else if (cmp < 0) 908 node = node->rb_right; 909 else { 910 BUG_ON(thread != atoms->thread); 911 return atoms; 912 } 913 } 914 return NULL; 915 } 916 917 static void 918 __thread_latency_insert(struct rb_root *root, struct work_atoms *data, 919 struct list_head *sort_list) 920 { 921 struct rb_node **new = &(root->rb_node), *parent = NULL; 922 923 while (*new) { 924 struct work_atoms *this; 925 int cmp; 926 927 this = container_of(*new, struct work_atoms, node); 928 parent = *new; 929 930 cmp = thread_lat_cmp(sort_list, data, this); 931 932 if (cmp > 0) 933 new = &((*new)->rb_left); 934 else 935 new = &((*new)->rb_right); 936 } 937 938 rb_link_node(&data->node, parent, new); 939 rb_insert_color(&data->node, root); 940 } 941 942 static void thread_atoms_insert(struct thread *thread) 943 { 944 struct work_atoms *atoms; 945 946 atoms = calloc(sizeof(*atoms), 1); 947 if (!atoms) 948 die("No memory"); 949 950 atoms->thread = thread; 951 INIT_LIST_HEAD(&atoms->work_list); 952 __thread_latency_insert(&atom_root, atoms, &cmp_pid); 953 } 954 955 static void 956 latency_fork_event(struct trace_fork_event *fork_event __used, 957 struct event *event __used, 958 int cpu __used, 959 u64 timestamp __used, 960 struct thread *thread __used) 961 { 962 /* should insert the newcomer */ 963 } 964 965 __used 966 static char sched_out_state(struct trace_switch_event *switch_event) 967 { 968 const char *str = TASK_STATE_TO_CHAR_STR; 969 970 return str[switch_event->prev_state]; 971 } 972 973 static void 974 add_sched_out_event(struct work_atoms *atoms, 975 char run_state, 976 u64 timestamp) 977 { 978 struct work_atom *atom; 979 980 atom = calloc(sizeof(*atom), 1); 981 if (!atom) 982 die("Non memory"); 983 984 atom->sched_out_time = timestamp; 985 986 if (run_state == 'R') { 987 atom->state = THREAD_WAIT_CPU; 988 atom->wake_up_time = atom->sched_out_time; 989 } 990 991 list_add_tail(&atom->list, &atoms->work_list); 992 } 993 994 static void 995 add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) 996 { 997 struct work_atom *atom; 998 999 BUG_ON(list_empty(&atoms->work_list)); 1000 1001 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1002 1003 atom->runtime += delta; 1004 atoms->total_runtime += delta; 1005 } 1006 1007 static void 1008 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 1009 { 1010 struct work_atom *atom; 1011 u64 delta; 1012 1013 if (list_empty(&atoms->work_list)) 1014 return; 1015 1016 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1017 1018 if (atom->state != THREAD_WAIT_CPU) 1019 return; 1020 1021 if (timestamp < atom->wake_up_time) { 1022 atom->state = THREAD_IGNORE; 1023 return; 1024 } 1025 1026 atom->state = THREAD_SCHED_IN; 1027 atom->sched_in_time = timestamp; 1028 1029 delta = atom->sched_in_time - atom->wake_up_time; 1030 atoms->total_lat += delta; 1031 if (delta > atoms->max_lat) 1032 atoms->max_lat = delta; 1033 atoms->nb_atoms++; 1034 } 1035 1036 static void 1037 latency_switch_event(struct trace_switch_event *switch_event, 1038 struct event *event __used, 1039 int cpu, 1040 u64 timestamp, 1041 struct thread *thread __used) 1042 { 1043 struct work_atoms *out_events, *in_events; 1044 struct thread *sched_out, *sched_in; 1045 u64 timestamp0; 1046 s64 delta; 1047 1048 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1049 1050 timestamp0 = cpu_last_switched[cpu]; 1051 cpu_last_switched[cpu] = timestamp; 1052 if (timestamp0) 1053 delta = timestamp - timestamp0; 1054 else 1055 delta = 0; 1056 1057 if (delta < 0) 1058 die("hm, delta: %Ld < 0 ?\n", delta); 1059 1060 1061 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); 1062 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); 1063 1064 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1065 if (!out_events) { 1066 thread_atoms_insert(sched_out); 1067 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1068 if (!out_events) 1069 die("out-event: Internal tree error"); 1070 } 1071 add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); 1072 1073 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1074 if (!in_events) { 1075 thread_atoms_insert(sched_in); 1076 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1077 if (!in_events) 1078 die("in-event: Internal tree error"); 1079 /* 1080 * Take came in we have not heard about yet, 1081 * add in an initial atom in runnable state: 1082 */ 1083 add_sched_out_event(in_events, 'R', timestamp); 1084 } 1085 add_sched_in_event(in_events, timestamp); 1086 } 1087 1088 static void 1089 latency_runtime_event(struct trace_runtime_event *runtime_event, 1090 struct event *event __used, 1091 int cpu, 1092 u64 timestamp, 1093 struct thread *this_thread __used) 1094 { 1095 struct work_atoms *atoms; 1096 struct thread *thread; 1097 1098 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1099 1100 thread = threads__findnew(runtime_event->pid, &threads, &last_match); 1101 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1102 if (!atoms) { 1103 thread_atoms_insert(thread); 1104 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1105 if (!atoms) 1106 die("in-event: Internal tree error"); 1107 add_sched_out_event(atoms, 'R', timestamp); 1108 } 1109 1110 add_runtime_event(atoms, runtime_event->runtime, timestamp); 1111 } 1112 1113 static void 1114 latency_wakeup_event(struct trace_wakeup_event *wakeup_event, 1115 struct event *__event __used, 1116 int cpu __used, 1117 u64 timestamp, 1118 struct thread *thread __used) 1119 { 1120 struct work_atoms *atoms; 1121 struct work_atom *atom; 1122 struct thread *wakee; 1123 1124 /* Note for later, it may be interesting to observe the failing cases */ 1125 if (!wakeup_event->success) 1126 return; 1127 1128 wakee = threads__findnew(wakeup_event->pid, &threads, &last_match); 1129 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1130 if (!atoms) { 1131 thread_atoms_insert(wakee); 1132 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1133 if (!atoms) 1134 die("wakeup-event: Internal tree error"); 1135 add_sched_out_event(atoms, 'S', timestamp); 1136 } 1137 1138 BUG_ON(list_empty(&atoms->work_list)); 1139 1140 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1141 1142 if (atom->state != THREAD_SLEEPING) 1143 nr_state_machine_bugs++; 1144 1145 nr_timestamps++; 1146 if (atom->sched_out_time > timestamp) { 1147 nr_unordered_timestamps++; 1148 return; 1149 } 1150 1151 atom->state = THREAD_WAIT_CPU; 1152 atom->wake_up_time = timestamp; 1153 } 1154 1155 static struct trace_sched_handler lat_ops = { 1156 .wakeup_event = latency_wakeup_event, 1157 .switch_event = latency_switch_event, 1158 .runtime_event = latency_runtime_event, 1159 .fork_event = latency_fork_event, 1160 }; 1161 1162 static void output_lat_thread(struct work_atoms *work_list) 1163 { 1164 int i; 1165 int ret; 1166 u64 avg; 1167 1168 if (!work_list->nb_atoms) 1169 return; 1170 /* 1171 * Ignore idle threads: 1172 */ 1173 if (!strcmp(work_list->thread->comm, "swapper")) 1174 return; 1175 1176 all_runtime += work_list->total_runtime; 1177 all_count += work_list->nb_atoms; 1178 1179 ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); 1180 1181 for (i = 0; i < 24 - ret; i++) 1182 printf(" "); 1183 1184 avg = work_list->total_lat / work_list->nb_atoms; 1185 1186 printf("|%11.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n", 1187 (double)work_list->total_runtime / 1e6, 1188 work_list->nb_atoms, (double)avg / 1e6, 1189 (double)work_list->max_lat / 1e6); 1190 } 1191 1192 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1193 { 1194 if (l->thread->pid < r->thread->pid) 1195 return -1; 1196 if (l->thread->pid > r->thread->pid) 1197 return 1; 1198 1199 return 0; 1200 } 1201 1202 static struct sort_dimension pid_sort_dimension = { 1203 .name = "pid", 1204 .cmp = pid_cmp, 1205 }; 1206 1207 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1208 { 1209 u64 avgl, avgr; 1210 1211 if (!l->nb_atoms) 1212 return -1; 1213 1214 if (!r->nb_atoms) 1215 return 1; 1216 1217 avgl = l->total_lat / l->nb_atoms; 1218 avgr = r->total_lat / r->nb_atoms; 1219 1220 if (avgl < avgr) 1221 return -1; 1222 if (avgl > avgr) 1223 return 1; 1224 1225 return 0; 1226 } 1227 1228 static struct sort_dimension avg_sort_dimension = { 1229 .name = "avg", 1230 .cmp = avg_cmp, 1231 }; 1232 1233 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1234 { 1235 if (l->max_lat < r->max_lat) 1236 return -1; 1237 if (l->max_lat > r->max_lat) 1238 return 1; 1239 1240 return 0; 1241 } 1242 1243 static struct sort_dimension max_sort_dimension = { 1244 .name = "max", 1245 .cmp = max_cmp, 1246 }; 1247 1248 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1249 { 1250 if (l->nb_atoms < r->nb_atoms) 1251 return -1; 1252 if (l->nb_atoms > r->nb_atoms) 1253 return 1; 1254 1255 return 0; 1256 } 1257 1258 static struct sort_dimension switch_sort_dimension = { 1259 .name = "switch", 1260 .cmp = switch_cmp, 1261 }; 1262 1263 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1264 { 1265 if (l->total_runtime < r->total_runtime) 1266 return -1; 1267 if (l->total_runtime > r->total_runtime) 1268 return 1; 1269 1270 return 0; 1271 } 1272 1273 static struct sort_dimension runtime_sort_dimension = { 1274 .name = "runtime", 1275 .cmp = runtime_cmp, 1276 }; 1277 1278 static struct sort_dimension *available_sorts[] = { 1279 &pid_sort_dimension, 1280 &avg_sort_dimension, 1281 &max_sort_dimension, 1282 &switch_sort_dimension, 1283 &runtime_sort_dimension, 1284 }; 1285 1286 #define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *)) 1287 1288 static LIST_HEAD(sort_list); 1289 1290 static int sort_dimension__add(char *tok, struct list_head *list) 1291 { 1292 int i; 1293 1294 for (i = 0; i < NB_AVAILABLE_SORTS; i++) { 1295 if (!strcmp(available_sorts[i]->name, tok)) { 1296 list_add_tail(&available_sorts[i]->list, list); 1297 1298 return 0; 1299 } 1300 } 1301 1302 return -1; 1303 } 1304 1305 static void setup_sorting(void); 1306 1307 static void sort_lat(void) 1308 { 1309 struct rb_node *node; 1310 1311 for (;;) { 1312 struct work_atoms *data; 1313 node = rb_first(&atom_root); 1314 if (!node) 1315 break; 1316 1317 rb_erase(node, &atom_root); 1318 data = rb_entry(node, struct work_atoms, node); 1319 __thread_latency_insert(&sorted_atom_root, data, &sort_list); 1320 } 1321 } 1322 1323 static struct trace_sched_handler *trace_handler; 1324 1325 static void 1326 process_sched_wakeup_event(struct raw_event_sample *raw, 1327 struct event *event, 1328 int cpu __used, 1329 u64 timestamp __used, 1330 struct thread *thread __used) 1331 { 1332 struct trace_wakeup_event wakeup_event; 1333 1334 FILL_COMMON_FIELDS(wakeup_event, event, raw->data); 1335 1336 FILL_ARRAY(wakeup_event, comm, event, raw->data); 1337 FILL_FIELD(wakeup_event, pid, event, raw->data); 1338 FILL_FIELD(wakeup_event, prio, event, raw->data); 1339 FILL_FIELD(wakeup_event, success, event, raw->data); 1340 FILL_FIELD(wakeup_event, cpu, event, raw->data); 1341 1342 if (trace_handler->wakeup_event) 1343 trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread); 1344 } 1345 1346 /* 1347 * Track the current task - that way we can know whether there's any 1348 * weird events, such as a task being switched away that is not current. 1349 */ 1350 static int max_cpu; 1351 1352 static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; 1353 1354 static struct thread *curr_thread[MAX_CPUS]; 1355 1356 static char next_shortname1 = 'A'; 1357 static char next_shortname2 = '0'; 1358 1359 static void 1360 map_switch_event(struct trace_switch_event *switch_event, 1361 struct event *event __used, 1362 int this_cpu, 1363 u64 timestamp, 1364 struct thread *thread __used) 1365 { 1366 struct thread *sched_out, *sched_in; 1367 int new_shortname; 1368 u64 timestamp0; 1369 s64 delta; 1370 int cpu; 1371 1372 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); 1373 1374 if (this_cpu > max_cpu) 1375 max_cpu = this_cpu; 1376 1377 timestamp0 = cpu_last_switched[this_cpu]; 1378 cpu_last_switched[this_cpu] = timestamp; 1379 if (timestamp0) 1380 delta = timestamp - timestamp0; 1381 else 1382 delta = 0; 1383 1384 if (delta < 0) 1385 die("hm, delta: %Ld < 0 ?\n", delta); 1386 1387 1388 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); 1389 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); 1390 1391 curr_thread[this_cpu] = sched_in; 1392 1393 printf(" "); 1394 1395 new_shortname = 0; 1396 if (!sched_in->shortname[0]) { 1397 sched_in->shortname[0] = next_shortname1; 1398 sched_in->shortname[1] = next_shortname2; 1399 1400 if (next_shortname1 < 'Z') { 1401 next_shortname1++; 1402 } else { 1403 next_shortname1='A'; 1404 if (next_shortname2 < '9') { 1405 next_shortname2++; 1406 } else { 1407 next_shortname2='0'; 1408 } 1409 } 1410 new_shortname = 1; 1411 } 1412 1413 for (cpu = 0; cpu <= max_cpu; cpu++) { 1414 if (cpu != this_cpu) 1415 printf(" "); 1416 else 1417 printf("*"); 1418 1419 if (curr_thread[cpu]) { 1420 if (curr_thread[cpu]->pid) 1421 printf("%2s ", curr_thread[cpu]->shortname); 1422 else 1423 printf(". "); 1424 } else 1425 printf(" "); 1426 } 1427 1428 printf(" %12.6f secs ", (double)timestamp/1e9); 1429 if (new_shortname) { 1430 printf("%s => %s:%d\n", 1431 sched_in->shortname, sched_in->comm, sched_in->pid); 1432 } else { 1433 printf("\n"); 1434 } 1435 } 1436 1437 1438 static void 1439 process_sched_switch_event(struct raw_event_sample *raw, 1440 struct event *event, 1441 int this_cpu, 1442 u64 timestamp __used, 1443 struct thread *thread __used) 1444 { 1445 struct trace_switch_event switch_event; 1446 1447 FILL_COMMON_FIELDS(switch_event, event, raw->data); 1448 1449 FILL_ARRAY(switch_event, prev_comm, event, raw->data); 1450 FILL_FIELD(switch_event, prev_pid, event, raw->data); 1451 FILL_FIELD(switch_event, prev_prio, event, raw->data); 1452 FILL_FIELD(switch_event, prev_state, event, raw->data); 1453 FILL_ARRAY(switch_event, next_comm, event, raw->data); 1454 FILL_FIELD(switch_event, next_pid, event, raw->data); 1455 FILL_FIELD(switch_event, next_prio, event, raw->data); 1456 1457 if (curr_pid[this_cpu] != (u32)-1) { 1458 /* 1459 * Are we trying to switch away a PID that is 1460 * not current? 1461 */ 1462 if (curr_pid[this_cpu] != switch_event.prev_pid) 1463 nr_context_switch_bugs++; 1464 } 1465 if (trace_handler->switch_event) 1466 trace_handler->switch_event(&switch_event, event, this_cpu, timestamp, thread); 1467 1468 curr_pid[this_cpu] = switch_event.next_pid; 1469 } 1470 1471 static void 1472 process_sched_runtime_event(struct raw_event_sample *raw, 1473 struct event *event, 1474 int cpu __used, 1475 u64 timestamp __used, 1476 struct thread *thread __used) 1477 { 1478 struct trace_runtime_event runtime_event; 1479 1480 FILL_ARRAY(runtime_event, comm, event, raw->data); 1481 FILL_FIELD(runtime_event, pid, event, raw->data); 1482 FILL_FIELD(runtime_event, runtime, event, raw->data); 1483 FILL_FIELD(runtime_event, vruntime, event, raw->data); 1484 1485 if (trace_handler->runtime_event) 1486 trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread); 1487 } 1488 1489 static void 1490 process_sched_fork_event(struct raw_event_sample *raw, 1491 struct event *event, 1492 int cpu __used, 1493 u64 timestamp __used, 1494 struct thread *thread __used) 1495 { 1496 struct trace_fork_event fork_event; 1497 1498 FILL_COMMON_FIELDS(fork_event, event, raw->data); 1499 1500 FILL_ARRAY(fork_event, parent_comm, event, raw->data); 1501 FILL_FIELD(fork_event, parent_pid, event, raw->data); 1502 FILL_ARRAY(fork_event, child_comm, event, raw->data); 1503 FILL_FIELD(fork_event, child_pid, event, raw->data); 1504 1505 if (trace_handler->fork_event) 1506 trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread); 1507 } 1508 1509 static void 1510 process_sched_exit_event(struct event *event, 1511 int cpu __used, 1512 u64 timestamp __used, 1513 struct thread *thread __used) 1514 { 1515 if (verbose) 1516 printf("sched_exit event %p\n", event); 1517 } 1518 1519 static void 1520 process_raw_event(event_t *raw_event __used, void *more_data, 1521 int cpu, u64 timestamp, struct thread *thread) 1522 { 1523 struct raw_event_sample *raw = more_data; 1524 struct event *event; 1525 int type; 1526 1527 type = trace_parse_common_type(raw->data); 1528 event = trace_find_event(type); 1529 1530 if (!strcmp(event->name, "sched_switch")) 1531 process_sched_switch_event(raw, event, cpu, timestamp, thread); 1532 if (!strcmp(event->name, "sched_stat_runtime")) 1533 process_sched_runtime_event(raw, event, cpu, timestamp, thread); 1534 if (!strcmp(event->name, "sched_wakeup")) 1535 process_sched_wakeup_event(raw, event, cpu, timestamp, thread); 1536 if (!strcmp(event->name, "sched_wakeup_new")) 1537 process_sched_wakeup_event(raw, event, cpu, timestamp, thread); 1538 if (!strcmp(event->name, "sched_process_fork")) 1539 process_sched_fork_event(raw, event, cpu, timestamp, thread); 1540 if (!strcmp(event->name, "sched_process_exit")) 1541 process_sched_exit_event(event, cpu, timestamp, thread); 1542 } 1543 1544 static int 1545 process_sample_event(event_t *event, unsigned long offset, unsigned long head) 1546 { 1547 char level; 1548 int show = 0; 1549 struct dso *dso = NULL; 1550 struct thread *thread; 1551 u64 ip = event->ip.ip; 1552 u64 timestamp = -1; 1553 u32 cpu = -1; 1554 u64 period = 1; 1555 void *more_data = event->ip.__more_data; 1556 int cpumode; 1557 1558 thread = threads__findnew(event->ip.pid, &threads, &last_match); 1559 1560 if (sample_type & PERF_SAMPLE_TIME) { 1561 timestamp = *(u64 *)more_data; 1562 more_data += sizeof(u64); 1563 } 1564 1565 if (sample_type & PERF_SAMPLE_CPU) { 1566 cpu = *(u32 *)more_data; 1567 more_data += sizeof(u32); 1568 more_data += sizeof(u32); /* reserved */ 1569 } 1570 1571 if (sample_type & PERF_SAMPLE_PERIOD) { 1572 period = *(u64 *)more_data; 1573 more_data += sizeof(u64); 1574 } 1575 1576 dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", 1577 (void *)(offset + head), 1578 (void *)(long)(event->header.size), 1579 event->header.misc, 1580 event->ip.pid, event->ip.tid, 1581 (void *)(long)ip, 1582 (long long)period); 1583 1584 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 1585 1586 if (thread == NULL) { 1587 eprintf("problem processing %d event, skipping it.\n", 1588 event->header.type); 1589 return -1; 1590 } 1591 1592 cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK; 1593 1594 if (cpumode == PERF_EVENT_MISC_KERNEL) { 1595 show = SHOW_KERNEL; 1596 level = 'k'; 1597 1598 dso = kernel_dso; 1599 1600 dump_printf(" ...... dso: %s\n", dso->name); 1601 1602 } else if (cpumode == PERF_EVENT_MISC_USER) { 1603 1604 show = SHOW_USER; 1605 level = '.'; 1606 1607 } else { 1608 show = SHOW_HV; 1609 level = 'H'; 1610 1611 dso = hypervisor_dso; 1612 1613 dump_printf(" ...... dso: [hypervisor]\n"); 1614 } 1615 1616 if (sample_type & PERF_SAMPLE_RAW) 1617 process_raw_event(event, more_data, cpu, timestamp, thread); 1618 1619 return 0; 1620 } 1621 1622 static int 1623 process_event(event_t *event, unsigned long offset, unsigned long head) 1624 { 1625 trace_event(event); 1626 1627 nr_events++; 1628 switch (event->header.type) { 1629 case PERF_EVENT_MMAP: 1630 return 0; 1631 case PERF_EVENT_LOST: 1632 nr_lost_chunks++; 1633 nr_lost_events += event->lost.lost; 1634 return 0; 1635 1636 case PERF_EVENT_COMM: 1637 return process_comm_event(event, offset, head); 1638 1639 case PERF_EVENT_EXIT ... PERF_EVENT_READ: 1640 return 0; 1641 1642 case PERF_EVENT_SAMPLE: 1643 return process_sample_event(event, offset, head); 1644 1645 case PERF_EVENT_MAX: 1646 default: 1647 return -1; 1648 } 1649 1650 return 0; 1651 } 1652 1653 static int read_events(void) 1654 { 1655 int ret, rc = EXIT_FAILURE; 1656 unsigned long offset = 0; 1657 unsigned long head = 0; 1658 struct stat perf_stat; 1659 event_t *event; 1660 uint32_t size; 1661 char *buf; 1662 1663 trace_report(); 1664 register_idle_thread(&threads, &last_match); 1665 1666 input = open(input_name, O_RDONLY); 1667 if (input < 0) { 1668 perror("failed to open file"); 1669 exit(-1); 1670 } 1671 1672 ret = fstat(input, &perf_stat); 1673 if (ret < 0) { 1674 perror("failed to stat file"); 1675 exit(-1); 1676 } 1677 1678 if (!perf_stat.st_size) { 1679 fprintf(stderr, "zero-sized file, nothing to do!\n"); 1680 exit(0); 1681 } 1682 header = perf_header__read(input); 1683 head = header->data_offset; 1684 sample_type = perf_header__sample_type(header); 1685 1686 if (!(sample_type & PERF_SAMPLE_RAW)) 1687 die("No trace sample to read. Did you call perf record " 1688 "without -R?"); 1689 1690 if (load_kernel() < 0) { 1691 perror("failed to load kernel symbols"); 1692 return EXIT_FAILURE; 1693 } 1694 1695 remap: 1696 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, 1697 MAP_SHARED, input, offset); 1698 if (buf == MAP_FAILED) { 1699 perror("failed to mmap file"); 1700 exit(-1); 1701 } 1702 1703 more: 1704 event = (event_t *)(buf + head); 1705 1706 size = event->header.size; 1707 if (!size) 1708 size = 8; 1709 1710 if (head + event->header.size >= page_size * mmap_window) { 1711 unsigned long shift = page_size * (head / page_size); 1712 int res; 1713 1714 res = munmap(buf, page_size * mmap_window); 1715 assert(res == 0); 1716 1717 offset += shift; 1718 head -= shift; 1719 goto remap; 1720 } 1721 1722 size = event->header.size; 1723 1724 1725 if (!size || process_event(event, offset, head) < 0) { 1726 1727 /* 1728 * assume we lost track of the stream, check alignment, and 1729 * increment a single u64 in the hope to catch on again 'soon'. 1730 */ 1731 1732 if (unlikely(head & 7)) 1733 head &= ~7ULL; 1734 1735 size = 8; 1736 } 1737 1738 head += size; 1739 1740 if (offset + head < (unsigned long)perf_stat.st_size) 1741 goto more; 1742 1743 rc = EXIT_SUCCESS; 1744 close(input); 1745 1746 return rc; 1747 } 1748 1749 static void print_bad_events(void) 1750 { 1751 if (nr_unordered_timestamps && nr_timestamps) { 1752 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 1753 (double)nr_unordered_timestamps/(double)nr_timestamps*100.0, 1754 nr_unordered_timestamps, nr_timestamps); 1755 } 1756 if (nr_lost_events && nr_events) { 1757 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 1758 (double)nr_lost_events/(double)nr_events*100.0, 1759 nr_lost_events, nr_events, nr_lost_chunks); 1760 } 1761 if (nr_state_machine_bugs && nr_timestamps) { 1762 printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", 1763 (double)nr_state_machine_bugs/(double)nr_timestamps*100.0, 1764 nr_state_machine_bugs, nr_timestamps); 1765 if (nr_lost_events) 1766 printf(" (due to lost events?)"); 1767 printf("\n"); 1768 } 1769 if (nr_context_switch_bugs && nr_timestamps) { 1770 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 1771 (double)nr_context_switch_bugs/(double)nr_timestamps*100.0, 1772 nr_context_switch_bugs, nr_timestamps); 1773 if (nr_lost_events) 1774 printf(" (due to lost events?)"); 1775 printf("\n"); 1776 } 1777 } 1778 1779 static void __cmd_lat(void) 1780 { 1781 struct rb_node *next; 1782 1783 setup_pager(); 1784 read_events(); 1785 sort_lat(); 1786 1787 printf("\n -----------------------------------------------------------------------------------------\n"); 1788 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |\n"); 1789 printf(" -----------------------------------------------------------------------------------------\n"); 1790 1791 next = rb_first(&sorted_atom_root); 1792 1793 while (next) { 1794 struct work_atoms *work_list; 1795 1796 work_list = rb_entry(next, struct work_atoms, node); 1797 output_lat_thread(work_list); 1798 next = rb_next(next); 1799 } 1800 1801 printf(" -----------------------------------------------------------------------------------------\n"); 1802 printf(" TOTAL: |%11.3f ms |%9Ld |\n", 1803 (double)all_runtime/1e6, all_count); 1804 1805 printf(" ---------------------------------------------------\n"); 1806 1807 print_bad_events(); 1808 printf("\n"); 1809 1810 } 1811 1812 static struct trace_sched_handler map_ops = { 1813 .wakeup_event = NULL, 1814 .switch_event = map_switch_event, 1815 .runtime_event = NULL, 1816 .fork_event = NULL, 1817 }; 1818 1819 static void __cmd_map(void) 1820 { 1821 max_cpu = sysconf(_SC_NPROCESSORS_CONF); 1822 1823 setup_pager(); 1824 read_events(); 1825 print_bad_events(); 1826 } 1827 1828 static void __cmd_replay(void) 1829 { 1830 unsigned long i; 1831 1832 calibrate_run_measurement_overhead(); 1833 calibrate_sleep_measurement_overhead(); 1834 1835 test_calibrations(); 1836 1837 read_events(); 1838 1839 printf("nr_run_events: %ld\n", nr_run_events); 1840 printf("nr_sleep_events: %ld\n", nr_sleep_events); 1841 printf("nr_wakeup_events: %ld\n", nr_wakeup_events); 1842 1843 if (targetless_wakeups) 1844 printf("target-less wakeups: %ld\n", targetless_wakeups); 1845 if (multitarget_wakeups) 1846 printf("multi-target wakeups: %ld\n", multitarget_wakeups); 1847 if (nr_run_events_optimized) 1848 printf("run atoms optimized: %ld\n", 1849 nr_run_events_optimized); 1850 1851 print_task_traces(); 1852 add_cross_task_wakeups(); 1853 1854 create_tasks(); 1855 printf("------------------------------------------------------------\n"); 1856 for (i = 0; i < replay_repeat; i++) 1857 run_one_test(); 1858 } 1859 1860 1861 static const char * const sched_usage[] = { 1862 "perf sched [<options>] {record|latency|map|replay|trace}", 1863 NULL 1864 }; 1865 1866 static const struct option sched_options[] = { 1867 OPT_STRING('i', "input", &input_name, "file", 1868 "input file name"), 1869 OPT_BOOLEAN('v', "verbose", &verbose, 1870 "be more verbose (show symbol address, etc)"), 1871 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1872 "dump raw trace in ASCII"), 1873 OPT_END() 1874 }; 1875 1876 static const char * const latency_usage[] = { 1877 "perf sched latency [<options>]", 1878 NULL 1879 }; 1880 1881 static const struct option latency_options[] = { 1882 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1883 "sort by key(s): runtime, switch, avg, max"), 1884 OPT_BOOLEAN('v', "verbose", &verbose, 1885 "be more verbose (show symbol address, etc)"), 1886 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1887 "dump raw trace in ASCII"), 1888 OPT_END() 1889 }; 1890 1891 static const char * const replay_usage[] = { 1892 "perf sched replay [<options>]", 1893 NULL 1894 }; 1895 1896 static const struct option replay_options[] = { 1897 OPT_INTEGER('r', "repeat", &replay_repeat, 1898 "repeat the workload replay N times (-1: infinite)"), 1899 OPT_BOOLEAN('v', "verbose", &verbose, 1900 "be more verbose (show symbol address, etc)"), 1901 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1902 "dump raw trace in ASCII"), 1903 OPT_END() 1904 }; 1905 1906 static void setup_sorting(void) 1907 { 1908 char *tmp, *tok, *str = strdup(sort_order); 1909 1910 for (tok = strtok_r(str, ", ", &tmp); 1911 tok; tok = strtok_r(NULL, ", ", &tmp)) { 1912 if (sort_dimension__add(tok, &sort_list) < 0) { 1913 error("Unknown --sort key: `%s'", tok); 1914 usage_with_options(latency_usage, latency_options); 1915 } 1916 } 1917 1918 free(str); 1919 1920 sort_dimension__add((char *)"pid", &cmp_pid); 1921 } 1922 1923 static const char *record_args[] = { 1924 "record", 1925 "-a", 1926 "-R", 1927 "-M", 1928 "-f", 1929 "-m", "1024", 1930 "-c", "1", 1931 "-e", "sched:sched_switch:r", 1932 "-e", "sched:sched_stat_wait:r", 1933 "-e", "sched:sched_stat_sleep:r", 1934 "-e", "sched:sched_stat_iowait:r", 1935 "-e", "sched:sched_stat_runtime:r", 1936 "-e", "sched:sched_process_exit:r", 1937 "-e", "sched:sched_process_fork:r", 1938 "-e", "sched:sched_wakeup:r", 1939 "-e", "sched:sched_migrate_task:r", 1940 }; 1941 1942 static int __cmd_record(int argc, const char **argv) 1943 { 1944 unsigned int rec_argc, i, j; 1945 const char **rec_argv; 1946 1947 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1948 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1949 1950 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1951 rec_argv[i] = strdup(record_args[i]); 1952 1953 for (j = 1; j < (unsigned int)argc; j++, i++) 1954 rec_argv[i] = argv[j]; 1955 1956 BUG_ON(i != rec_argc); 1957 1958 return cmd_record(i, rec_argv, NULL); 1959 } 1960 1961 int cmd_sched(int argc, const char **argv, const char *prefix __used) 1962 { 1963 symbol__init(); 1964 page_size = getpagesize(); 1965 1966 argc = parse_options(argc, argv, sched_options, sched_usage, 1967 PARSE_OPT_STOP_AT_NON_OPTION); 1968 if (!argc) 1969 usage_with_options(sched_usage, sched_options); 1970 1971 if (!strncmp(argv[0], "rec", 3)) { 1972 return __cmd_record(argc, argv); 1973 } else if (!strncmp(argv[0], "lat", 3)) { 1974 trace_handler = &lat_ops; 1975 if (argc > 1) { 1976 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 1977 if (argc) 1978 usage_with_options(latency_usage, latency_options); 1979 } 1980 setup_sorting(); 1981 __cmd_lat(); 1982 } else if (!strcmp(argv[0], "map")) { 1983 trace_handler = &map_ops; 1984 setup_sorting(); 1985 __cmd_map(); 1986 } else if (!strncmp(argv[0], "rep", 3)) { 1987 trace_handler = &replay_ops; 1988 if (argc) { 1989 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 1990 if (argc) 1991 usage_with_options(replay_usage, replay_options); 1992 } 1993 __cmd_replay(); 1994 } else if (!strcmp(argv[0], "trace")) { 1995 /* 1996 * Aliased to 'perf trace' for now: 1997 */ 1998 return cmd_trace(argc, argv, prefix); 1999 } else { 2000 usage_with_options(sched_usage, sched_options); 2001 } 2002 2003 return 0; 2004 } 2005