1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/evlist.h" 6 #include "util/cache.h" 7 #include "util/evsel.h" 8 #include "util/symbol.h" 9 #include "util/thread.h" 10 #include "util/header.h" 11 #include "util/session.h" 12 #include "util/tool.h" 13 #include "util/cloexec.h" 14 #include "util/thread_map.h" 15 #include "util/color.h" 16 #include "util/stat.h" 17 #include "util/callchain.h" 18 #include "util/time-utils.h" 19 20 #include <subcmd/parse-options.h> 21 #include "util/trace-event.h" 22 23 #include "util/debug.h" 24 25 #include <linux/log2.h> 26 #include <sys/prctl.h> 27 #include <sys/resource.h> 28 29 #include <semaphore.h> 30 #include <pthread.h> 31 #include <math.h> 32 #include <api/fs/fs.h> 33 #include <linux/time64.h> 34 35 #define PR_SET_NAME 15 /* Set process name */ 36 #define MAX_CPUS 4096 37 #define COMM_LEN 20 38 #define SYM_LEN 129 39 #define MAX_PID 1024000 40 41 struct sched_atom; 42 43 struct task_desc { 44 unsigned long nr; 45 unsigned long pid; 46 char comm[COMM_LEN]; 47 48 unsigned long nr_events; 49 unsigned long curr_event; 50 struct sched_atom **atoms; 51 52 pthread_t thread; 53 sem_t sleep_sem; 54 55 sem_t ready_for_work; 56 sem_t work_done_sem; 57 58 u64 cpu_usage; 59 }; 60 61 enum sched_event_type { 62 SCHED_EVENT_RUN, 63 SCHED_EVENT_SLEEP, 64 SCHED_EVENT_WAKEUP, 65 SCHED_EVENT_MIGRATION, 66 }; 67 68 struct sched_atom { 69 enum sched_event_type type; 70 int specific_wait; 71 u64 timestamp; 72 u64 duration; 73 unsigned long nr; 74 sem_t *wait_sem; 75 struct task_desc *wakee; 76 }; 77 78 #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP" 79 80 enum thread_state { 81 THREAD_SLEEPING = 0, 82 THREAD_WAIT_CPU, 83 THREAD_SCHED_IN, 84 THREAD_IGNORE 85 }; 86 87 struct work_atom { 88 struct list_head list; 89 enum thread_state state; 90 u64 sched_out_time; 91 u64 wake_up_time; 92 u64 sched_in_time; 93 u64 runtime; 94 }; 95 96 struct work_atoms { 97 struct list_head work_list; 98 struct thread *thread; 99 struct rb_node node; 100 u64 max_lat; 101 u64 max_lat_at; 102 u64 total_lat; 103 u64 nb_atoms; 104 u64 total_runtime; 105 int num_merged; 106 }; 107 108 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 109 110 struct perf_sched; 111 112 struct trace_sched_handler { 113 int (*switch_event)(struct perf_sched *sched, struct perf_evsel *evsel, 114 struct perf_sample *sample, struct machine *machine); 115 116 int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel, 117 struct perf_sample *sample, struct machine *machine); 118 119 int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel, 120 struct perf_sample *sample, struct machine *machine); 121 122 /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */ 123 int (*fork_event)(struct perf_sched *sched, union perf_event *event, 124 struct machine *machine); 125 126 int (*migrate_task_event)(struct perf_sched *sched, 127 struct perf_evsel *evsel, 128 struct perf_sample *sample, 129 struct machine *machine); 130 }; 131 132 #define COLOR_PIDS PERF_COLOR_BLUE 133 #define COLOR_CPUS PERF_COLOR_BG_RED 134 135 struct perf_sched_map { 136 DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); 137 int *comp_cpus; 138 bool comp; 139 struct thread_map *color_pids; 140 const char *color_pids_str; 141 struct cpu_map *color_cpus; 142 const char *color_cpus_str; 143 struct cpu_map *cpus; 144 const char *cpus_str; 145 }; 146 147 struct perf_sched { 148 struct perf_tool tool; 149 const char *sort_order; 150 unsigned long nr_tasks; 151 struct task_desc **pid_to_task; 152 struct task_desc **tasks; 153 const struct trace_sched_handler *tp_handler; 154 pthread_mutex_t start_work_mutex; 155 pthread_mutex_t work_done_wait_mutex; 156 int profile_cpu; 157 /* 158 * Track the current task - that way we can know whether there's any 159 * weird events, such as a task being switched away that is not current. 160 */ 161 int max_cpu; 162 u32 curr_pid[MAX_CPUS]; 163 struct thread *curr_thread[MAX_CPUS]; 164 char next_shortname1; 165 char next_shortname2; 166 unsigned int replay_repeat; 167 unsigned long nr_run_events; 168 unsigned long nr_sleep_events; 169 unsigned long nr_wakeup_events; 170 unsigned long nr_sleep_corrections; 171 unsigned long nr_run_events_optimized; 172 unsigned long targetless_wakeups; 173 unsigned long multitarget_wakeups; 174 unsigned long nr_runs; 175 unsigned long nr_timestamps; 176 unsigned long nr_unordered_timestamps; 177 unsigned long nr_context_switch_bugs; 178 unsigned long nr_events; 179 unsigned long nr_lost_chunks; 180 unsigned long nr_lost_events; 181 u64 run_measurement_overhead; 182 u64 sleep_measurement_overhead; 183 u64 start_time; 184 u64 cpu_usage; 185 u64 runavg_cpu_usage; 186 u64 parent_cpu_usage; 187 u64 runavg_parent_cpu_usage; 188 u64 sum_runtime; 189 u64 sum_fluct; 190 u64 run_avg; 191 u64 all_runtime; 192 u64 all_count; 193 u64 cpu_last_switched[MAX_CPUS]; 194 struct rb_root atom_root, sorted_atom_root, merged_atom_root; 195 struct list_head sort_list, cmp_pid; 196 bool force; 197 bool skip_merge; 198 struct perf_sched_map map; 199 200 /* options for timehist command */ 201 bool summary; 202 bool summary_only; 203 bool idle_hist; 204 bool show_callchain; 205 unsigned int max_stack; 206 bool show_cpu_visual; 207 bool show_wakeups; 208 bool show_migrations; 209 u64 skipped_samples; 210 const char *time_str; 211 struct perf_time_interval ptime; 212 struct perf_time_interval hist_time; 213 }; 214 215 /* per thread run time data */ 216 struct thread_runtime { 217 u64 last_time; /* time of previous sched in/out event */ 218 u64 dt_run; /* run time */ 219 u64 dt_wait; /* time between CPU access (off cpu) */ 220 u64 dt_delay; /* time between wakeup and sched-in */ 221 u64 ready_to_run; /* time of wakeup */ 222 223 struct stats run_stats; 224 u64 total_run_time; 225 226 u64 migrations; 227 }; 228 229 /* per event run time data */ 230 struct evsel_runtime { 231 u64 *last_time; /* time this event was last seen per cpu */ 232 u32 ncpu; /* highest cpu slot allocated */ 233 }; 234 235 /* per cpu idle time data */ 236 struct idle_thread_runtime { 237 struct thread_runtime tr; 238 struct thread *last_thread; 239 struct rb_root sorted_root; 240 struct callchain_root callchain; 241 struct callchain_cursor cursor; 242 }; 243 244 /* track idle times per cpu */ 245 static struct thread **idle_threads; 246 static int idle_max_cpu; 247 static char idle_comm[] = "<idle>"; 248 249 static u64 get_nsecs(void) 250 { 251 struct timespec ts; 252 253 clock_gettime(CLOCK_MONOTONIC, &ts); 254 255 return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; 256 } 257 258 static void burn_nsecs(struct perf_sched *sched, u64 nsecs) 259 { 260 u64 T0 = get_nsecs(), T1; 261 262 do { 263 T1 = get_nsecs(); 264 } while (T1 + sched->run_measurement_overhead < T0 + nsecs); 265 } 266 267 static void sleep_nsecs(u64 nsecs) 268 { 269 struct timespec ts; 270 271 ts.tv_nsec = nsecs % 999999999; 272 ts.tv_sec = nsecs / 999999999; 273 274 nanosleep(&ts, NULL); 275 } 276 277 static void calibrate_run_measurement_overhead(struct perf_sched *sched) 278 { 279 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 280 int i; 281 282 for (i = 0; i < 10; i++) { 283 T0 = get_nsecs(); 284 burn_nsecs(sched, 0); 285 T1 = get_nsecs(); 286 delta = T1-T0; 287 min_delta = min(min_delta, delta); 288 } 289 sched->run_measurement_overhead = min_delta; 290 291 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 292 } 293 294 static void calibrate_sleep_measurement_overhead(struct perf_sched *sched) 295 { 296 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 297 int i; 298 299 for (i = 0; i < 10; i++) { 300 T0 = get_nsecs(); 301 sleep_nsecs(10000); 302 T1 = get_nsecs(); 303 delta = T1-T0; 304 min_delta = min(min_delta, delta); 305 } 306 min_delta -= 10000; 307 sched->sleep_measurement_overhead = min_delta; 308 309 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 310 } 311 312 static struct sched_atom * 313 get_new_event(struct task_desc *task, u64 timestamp) 314 { 315 struct sched_atom *event = zalloc(sizeof(*event)); 316 unsigned long idx = task->nr_events; 317 size_t size; 318 319 event->timestamp = timestamp; 320 event->nr = idx; 321 322 task->nr_events++; 323 size = sizeof(struct sched_atom *) * task->nr_events; 324 task->atoms = realloc(task->atoms, size); 325 BUG_ON(!task->atoms); 326 327 task->atoms[idx] = event; 328 329 return event; 330 } 331 332 static struct sched_atom *last_event(struct task_desc *task) 333 { 334 if (!task->nr_events) 335 return NULL; 336 337 return task->atoms[task->nr_events - 1]; 338 } 339 340 static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task, 341 u64 timestamp, u64 duration) 342 { 343 struct sched_atom *event, *curr_event = last_event(task); 344 345 /* 346 * optimize an existing RUN event by merging this one 347 * to it: 348 */ 349 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 350 sched->nr_run_events_optimized++; 351 curr_event->duration += duration; 352 return; 353 } 354 355 event = get_new_event(task, timestamp); 356 357 event->type = SCHED_EVENT_RUN; 358 event->duration = duration; 359 360 sched->nr_run_events++; 361 } 362 363 static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task, 364 u64 timestamp, struct task_desc *wakee) 365 { 366 struct sched_atom *event, *wakee_event; 367 368 event = get_new_event(task, timestamp); 369 event->type = SCHED_EVENT_WAKEUP; 370 event->wakee = wakee; 371 372 wakee_event = last_event(wakee); 373 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 374 sched->targetless_wakeups++; 375 return; 376 } 377 if (wakee_event->wait_sem) { 378 sched->multitarget_wakeups++; 379 return; 380 } 381 382 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 383 sem_init(wakee_event->wait_sem, 0, 0); 384 wakee_event->specific_wait = 1; 385 event->wait_sem = wakee_event->wait_sem; 386 387 sched->nr_wakeup_events++; 388 } 389 390 static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, 391 u64 timestamp, u64 task_state __maybe_unused) 392 { 393 struct sched_atom *event = get_new_event(task, timestamp); 394 395 event->type = SCHED_EVENT_SLEEP; 396 397 sched->nr_sleep_events++; 398 } 399 400 static struct task_desc *register_pid(struct perf_sched *sched, 401 unsigned long pid, const char *comm) 402 { 403 struct task_desc *task; 404 static int pid_max; 405 406 if (sched->pid_to_task == NULL) { 407 if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) 408 pid_max = MAX_PID; 409 BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); 410 } 411 if (pid >= (unsigned long)pid_max) { 412 BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) * 413 sizeof(struct task_desc *))) == NULL); 414 while (pid >= (unsigned long)pid_max) 415 sched->pid_to_task[pid_max++] = NULL; 416 } 417 418 task = sched->pid_to_task[pid]; 419 420 if (task) 421 return task; 422 423 task = zalloc(sizeof(*task)); 424 task->pid = pid; 425 task->nr = sched->nr_tasks; 426 strcpy(task->comm, comm); 427 /* 428 * every task starts in sleeping state - this gets ignored 429 * if there's no wakeup pointing to this sleep state: 430 */ 431 add_sched_event_sleep(sched, task, 0, 0); 432 433 sched->pid_to_task[pid] = task; 434 sched->nr_tasks++; 435 sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *)); 436 BUG_ON(!sched->tasks); 437 sched->tasks[task->nr] = task; 438 439 if (verbose) 440 printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm); 441 442 return task; 443 } 444 445 446 static void print_task_traces(struct perf_sched *sched) 447 { 448 struct task_desc *task; 449 unsigned long i; 450 451 for (i = 0; i < sched->nr_tasks; i++) { 452 task = sched->tasks[i]; 453 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 454 task->nr, task->comm, task->pid, task->nr_events); 455 } 456 } 457 458 static void add_cross_task_wakeups(struct perf_sched *sched) 459 { 460 struct task_desc *task1, *task2; 461 unsigned long i, j; 462 463 for (i = 0; i < sched->nr_tasks; i++) { 464 task1 = sched->tasks[i]; 465 j = i + 1; 466 if (j == sched->nr_tasks) 467 j = 0; 468 task2 = sched->tasks[j]; 469 add_sched_event_wakeup(sched, task1, 0, task2); 470 } 471 } 472 473 static void perf_sched__process_event(struct perf_sched *sched, 474 struct sched_atom *atom) 475 { 476 int ret = 0; 477 478 switch (atom->type) { 479 case SCHED_EVENT_RUN: 480 burn_nsecs(sched, atom->duration); 481 break; 482 case SCHED_EVENT_SLEEP: 483 if (atom->wait_sem) 484 ret = sem_wait(atom->wait_sem); 485 BUG_ON(ret); 486 break; 487 case SCHED_EVENT_WAKEUP: 488 if (atom->wait_sem) 489 ret = sem_post(atom->wait_sem); 490 BUG_ON(ret); 491 break; 492 case SCHED_EVENT_MIGRATION: 493 break; 494 default: 495 BUG_ON(1); 496 } 497 } 498 499 static u64 get_cpu_usage_nsec_parent(void) 500 { 501 struct rusage ru; 502 u64 sum; 503 int err; 504 505 err = getrusage(RUSAGE_SELF, &ru); 506 BUG_ON(err); 507 508 sum = ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC; 509 sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC; 510 511 return sum; 512 } 513 514 static int self_open_counters(struct perf_sched *sched, unsigned long cur_task) 515 { 516 struct perf_event_attr attr; 517 char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE]; 518 int fd; 519 struct rlimit limit; 520 bool need_privilege = false; 521 522 memset(&attr, 0, sizeof(attr)); 523 524 attr.type = PERF_TYPE_SOFTWARE; 525 attr.config = PERF_COUNT_SW_TASK_CLOCK; 526 527 force_again: 528 fd = sys_perf_event_open(&attr, 0, -1, -1, 529 perf_event_open_cloexec_flag()); 530 531 if (fd < 0) { 532 if (errno == EMFILE) { 533 if (sched->force) { 534 BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1); 535 limit.rlim_cur += sched->nr_tasks - cur_task; 536 if (limit.rlim_cur > limit.rlim_max) { 537 limit.rlim_max = limit.rlim_cur; 538 need_privilege = true; 539 } 540 if (setrlimit(RLIMIT_NOFILE, &limit) == -1) { 541 if (need_privilege && errno == EPERM) 542 strcpy(info, "Need privilege\n"); 543 } else 544 goto force_again; 545 } else 546 strcpy(info, "Have a try with -f option\n"); 547 } 548 pr_err("Error: sys_perf_event_open() syscall returned " 549 "with %d (%s)\n%s", fd, 550 str_error_r(errno, sbuf, sizeof(sbuf)), info); 551 exit(EXIT_FAILURE); 552 } 553 return fd; 554 } 555 556 static u64 get_cpu_usage_nsec_self(int fd) 557 { 558 u64 runtime; 559 int ret; 560 561 ret = read(fd, &runtime, sizeof(runtime)); 562 BUG_ON(ret != sizeof(runtime)); 563 564 return runtime; 565 } 566 567 struct sched_thread_parms { 568 struct task_desc *task; 569 struct perf_sched *sched; 570 int fd; 571 }; 572 573 static void *thread_func(void *ctx) 574 { 575 struct sched_thread_parms *parms = ctx; 576 struct task_desc *this_task = parms->task; 577 struct perf_sched *sched = parms->sched; 578 u64 cpu_usage_0, cpu_usage_1; 579 unsigned long i, ret; 580 char comm2[22]; 581 int fd = parms->fd; 582 583 zfree(&parms); 584 585 sprintf(comm2, ":%s", this_task->comm); 586 prctl(PR_SET_NAME, comm2); 587 if (fd < 0) 588 return NULL; 589 again: 590 ret = sem_post(&this_task->ready_for_work); 591 BUG_ON(ret); 592 ret = pthread_mutex_lock(&sched->start_work_mutex); 593 BUG_ON(ret); 594 ret = pthread_mutex_unlock(&sched->start_work_mutex); 595 BUG_ON(ret); 596 597 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 598 599 for (i = 0; i < this_task->nr_events; i++) { 600 this_task->curr_event = i; 601 perf_sched__process_event(sched, this_task->atoms[i]); 602 } 603 604 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 605 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 606 ret = sem_post(&this_task->work_done_sem); 607 BUG_ON(ret); 608 609 ret = pthread_mutex_lock(&sched->work_done_wait_mutex); 610 BUG_ON(ret); 611 ret = pthread_mutex_unlock(&sched->work_done_wait_mutex); 612 BUG_ON(ret); 613 614 goto again; 615 } 616 617 static void create_tasks(struct perf_sched *sched) 618 { 619 struct task_desc *task; 620 pthread_attr_t attr; 621 unsigned long i; 622 int err; 623 624 err = pthread_attr_init(&attr); 625 BUG_ON(err); 626 err = pthread_attr_setstacksize(&attr, 627 (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); 628 BUG_ON(err); 629 err = pthread_mutex_lock(&sched->start_work_mutex); 630 BUG_ON(err); 631 err = pthread_mutex_lock(&sched->work_done_wait_mutex); 632 BUG_ON(err); 633 for (i = 0; i < sched->nr_tasks; i++) { 634 struct sched_thread_parms *parms = malloc(sizeof(*parms)); 635 BUG_ON(parms == NULL); 636 parms->task = task = sched->tasks[i]; 637 parms->sched = sched; 638 parms->fd = self_open_counters(sched, i); 639 sem_init(&task->sleep_sem, 0, 0); 640 sem_init(&task->ready_for_work, 0, 0); 641 sem_init(&task->work_done_sem, 0, 0); 642 task->curr_event = 0; 643 err = pthread_create(&task->thread, &attr, thread_func, parms); 644 BUG_ON(err); 645 } 646 } 647 648 static void wait_for_tasks(struct perf_sched *sched) 649 { 650 u64 cpu_usage_0, cpu_usage_1; 651 struct task_desc *task; 652 unsigned long i, ret; 653 654 sched->start_time = get_nsecs(); 655 sched->cpu_usage = 0; 656 pthread_mutex_unlock(&sched->work_done_wait_mutex); 657 658 for (i = 0; i < sched->nr_tasks; i++) { 659 task = sched->tasks[i]; 660 ret = sem_wait(&task->ready_for_work); 661 BUG_ON(ret); 662 sem_init(&task->ready_for_work, 0, 0); 663 } 664 ret = pthread_mutex_lock(&sched->work_done_wait_mutex); 665 BUG_ON(ret); 666 667 cpu_usage_0 = get_cpu_usage_nsec_parent(); 668 669 pthread_mutex_unlock(&sched->start_work_mutex); 670 671 for (i = 0; i < sched->nr_tasks; i++) { 672 task = sched->tasks[i]; 673 ret = sem_wait(&task->work_done_sem); 674 BUG_ON(ret); 675 sem_init(&task->work_done_sem, 0, 0); 676 sched->cpu_usage += task->cpu_usage; 677 task->cpu_usage = 0; 678 } 679 680 cpu_usage_1 = get_cpu_usage_nsec_parent(); 681 if (!sched->runavg_cpu_usage) 682 sched->runavg_cpu_usage = sched->cpu_usage; 683 sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat; 684 685 sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 686 if (!sched->runavg_parent_cpu_usage) 687 sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; 688 sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) + 689 sched->parent_cpu_usage)/sched->replay_repeat; 690 691 ret = pthread_mutex_lock(&sched->start_work_mutex); 692 BUG_ON(ret); 693 694 for (i = 0; i < sched->nr_tasks; i++) { 695 task = sched->tasks[i]; 696 sem_init(&task->sleep_sem, 0, 0); 697 task->curr_event = 0; 698 } 699 } 700 701 static void run_one_test(struct perf_sched *sched) 702 { 703 u64 T0, T1, delta, avg_delta, fluct; 704 705 T0 = get_nsecs(); 706 wait_for_tasks(sched); 707 T1 = get_nsecs(); 708 709 delta = T1 - T0; 710 sched->sum_runtime += delta; 711 sched->nr_runs++; 712 713 avg_delta = sched->sum_runtime / sched->nr_runs; 714 if (delta < avg_delta) 715 fluct = avg_delta - delta; 716 else 717 fluct = delta - avg_delta; 718 sched->sum_fluct += fluct; 719 if (!sched->run_avg) 720 sched->run_avg = delta; 721 sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat; 722 723 printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC); 724 725 printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC); 726 727 printf("cpu: %0.2f / %0.2f", 728 (double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC); 729 730 #if 0 731 /* 732 * rusage statistics done by the parent, these are less 733 * accurate than the sched->sum_exec_runtime based statistics: 734 */ 735 printf(" [%0.2f / %0.2f]", 736 (double)sched->parent_cpu_usage / NSEC_PER_MSEC, 737 (double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC); 738 #endif 739 740 printf("\n"); 741 742 if (sched->nr_sleep_corrections) 743 printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections); 744 sched->nr_sleep_corrections = 0; 745 } 746 747 static void test_calibrations(struct perf_sched *sched) 748 { 749 u64 T0, T1; 750 751 T0 = get_nsecs(); 752 burn_nsecs(sched, NSEC_PER_MSEC); 753 T1 = get_nsecs(); 754 755 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 756 757 T0 = get_nsecs(); 758 sleep_nsecs(NSEC_PER_MSEC); 759 T1 = get_nsecs(); 760 761 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 762 } 763 764 static int 765 replay_wakeup_event(struct perf_sched *sched, 766 struct perf_evsel *evsel, struct perf_sample *sample, 767 struct machine *machine __maybe_unused) 768 { 769 const char *comm = perf_evsel__strval(evsel, sample, "comm"); 770 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 771 struct task_desc *waker, *wakee; 772 773 if (verbose) { 774 printf("sched_wakeup event %p\n", evsel); 775 776 printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid); 777 } 778 779 waker = register_pid(sched, sample->tid, "<unknown>"); 780 wakee = register_pid(sched, pid, comm); 781 782 add_sched_event_wakeup(sched, waker, sample->time, wakee); 783 return 0; 784 } 785 786 static int replay_switch_event(struct perf_sched *sched, 787 struct perf_evsel *evsel, 788 struct perf_sample *sample, 789 struct machine *machine __maybe_unused) 790 { 791 const char *prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"), 792 *next_comm = perf_evsel__strval(evsel, sample, "next_comm"); 793 const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), 794 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 795 const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); 796 struct task_desc *prev, __maybe_unused *next; 797 u64 timestamp0, timestamp = sample->time; 798 int cpu = sample->cpu; 799 s64 delta; 800 801 if (verbose) 802 printf("sched_switch event %p\n", evsel); 803 804 if (cpu >= MAX_CPUS || cpu < 0) 805 return 0; 806 807 timestamp0 = sched->cpu_last_switched[cpu]; 808 if (timestamp0) 809 delta = timestamp - timestamp0; 810 else 811 delta = 0; 812 813 if (delta < 0) { 814 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 815 return -1; 816 } 817 818 pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 819 prev_comm, prev_pid, next_comm, next_pid, delta); 820 821 prev = register_pid(sched, prev_pid, prev_comm); 822 next = register_pid(sched, next_pid, next_comm); 823 824 sched->cpu_last_switched[cpu] = timestamp; 825 826 add_sched_event_run(sched, prev, timestamp, delta); 827 add_sched_event_sleep(sched, prev, timestamp, prev_state); 828 829 return 0; 830 } 831 832 static int replay_fork_event(struct perf_sched *sched, 833 union perf_event *event, 834 struct machine *machine) 835 { 836 struct thread *child, *parent; 837 838 child = machine__findnew_thread(machine, event->fork.pid, 839 event->fork.tid); 840 parent = machine__findnew_thread(machine, event->fork.ppid, 841 event->fork.ptid); 842 843 if (child == NULL || parent == NULL) { 844 pr_debug("thread does not exist on fork event: child %p, parent %p\n", 845 child, parent); 846 goto out_put; 847 } 848 849 if (verbose) { 850 printf("fork event\n"); 851 printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid); 852 printf("... child: %s/%d\n", thread__comm_str(child), child->tid); 853 } 854 855 register_pid(sched, parent->tid, thread__comm_str(parent)); 856 register_pid(sched, child->tid, thread__comm_str(child)); 857 out_put: 858 thread__put(child); 859 thread__put(parent); 860 return 0; 861 } 862 863 struct sort_dimension { 864 const char *name; 865 sort_fn_t cmp; 866 struct list_head list; 867 }; 868 869 static int 870 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 871 { 872 struct sort_dimension *sort; 873 int ret = 0; 874 875 BUG_ON(list_empty(list)); 876 877 list_for_each_entry(sort, list, list) { 878 ret = sort->cmp(l, r); 879 if (ret) 880 return ret; 881 } 882 883 return ret; 884 } 885 886 static struct work_atoms * 887 thread_atoms_search(struct rb_root *root, struct thread *thread, 888 struct list_head *sort_list) 889 { 890 struct rb_node *node = root->rb_node; 891 struct work_atoms key = { .thread = thread }; 892 893 while (node) { 894 struct work_atoms *atoms; 895 int cmp; 896 897 atoms = container_of(node, struct work_atoms, node); 898 899 cmp = thread_lat_cmp(sort_list, &key, atoms); 900 if (cmp > 0) 901 node = node->rb_left; 902 else if (cmp < 0) 903 node = node->rb_right; 904 else { 905 BUG_ON(thread != atoms->thread); 906 return atoms; 907 } 908 } 909 return NULL; 910 } 911 912 static void 913 __thread_latency_insert(struct rb_root *root, struct work_atoms *data, 914 struct list_head *sort_list) 915 { 916 struct rb_node **new = &(root->rb_node), *parent = NULL; 917 918 while (*new) { 919 struct work_atoms *this; 920 int cmp; 921 922 this = container_of(*new, struct work_atoms, node); 923 parent = *new; 924 925 cmp = thread_lat_cmp(sort_list, data, this); 926 927 if (cmp > 0) 928 new = &((*new)->rb_left); 929 else 930 new = &((*new)->rb_right); 931 } 932 933 rb_link_node(&data->node, parent, new); 934 rb_insert_color(&data->node, root); 935 } 936 937 static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) 938 { 939 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 940 if (!atoms) { 941 pr_err("No memory at %s\n", __func__); 942 return -1; 943 } 944 945 atoms->thread = thread__get(thread); 946 INIT_LIST_HEAD(&atoms->work_list); 947 __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); 948 return 0; 949 } 950 951 static char sched_out_state(u64 prev_state) 952 { 953 const char *str = TASK_STATE_TO_CHAR_STR; 954 955 return str[prev_state]; 956 } 957 958 static int 959 add_sched_out_event(struct work_atoms *atoms, 960 char run_state, 961 u64 timestamp) 962 { 963 struct work_atom *atom = zalloc(sizeof(*atom)); 964 if (!atom) { 965 pr_err("Non memory at %s", __func__); 966 return -1; 967 } 968 969 atom->sched_out_time = timestamp; 970 971 if (run_state == 'R') { 972 atom->state = THREAD_WAIT_CPU; 973 atom->wake_up_time = atom->sched_out_time; 974 } 975 976 list_add_tail(&atom->list, &atoms->work_list); 977 return 0; 978 } 979 980 static void 981 add_runtime_event(struct work_atoms *atoms, u64 delta, 982 u64 timestamp __maybe_unused) 983 { 984 struct work_atom *atom; 985 986 BUG_ON(list_empty(&atoms->work_list)); 987 988 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 989 990 atom->runtime += delta; 991 atoms->total_runtime += delta; 992 } 993 994 static void 995 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 996 { 997 struct work_atom *atom; 998 u64 delta; 999 1000 if (list_empty(&atoms->work_list)) 1001 return; 1002 1003 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1004 1005 if (atom->state != THREAD_WAIT_CPU) 1006 return; 1007 1008 if (timestamp < atom->wake_up_time) { 1009 atom->state = THREAD_IGNORE; 1010 return; 1011 } 1012 1013 atom->state = THREAD_SCHED_IN; 1014 atom->sched_in_time = timestamp; 1015 1016 delta = atom->sched_in_time - atom->wake_up_time; 1017 atoms->total_lat += delta; 1018 if (delta > atoms->max_lat) { 1019 atoms->max_lat = delta; 1020 atoms->max_lat_at = timestamp; 1021 } 1022 atoms->nb_atoms++; 1023 } 1024 1025 static int latency_switch_event(struct perf_sched *sched, 1026 struct perf_evsel *evsel, 1027 struct perf_sample *sample, 1028 struct machine *machine) 1029 { 1030 const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), 1031 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1032 const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); 1033 struct work_atoms *out_events, *in_events; 1034 struct thread *sched_out, *sched_in; 1035 u64 timestamp0, timestamp = sample->time; 1036 int cpu = sample->cpu, err = -1; 1037 s64 delta; 1038 1039 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1040 1041 timestamp0 = sched->cpu_last_switched[cpu]; 1042 sched->cpu_last_switched[cpu] = timestamp; 1043 if (timestamp0) 1044 delta = timestamp - timestamp0; 1045 else 1046 delta = 0; 1047 1048 if (delta < 0) { 1049 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1050 return -1; 1051 } 1052 1053 sched_out = machine__findnew_thread(machine, -1, prev_pid); 1054 sched_in = machine__findnew_thread(machine, -1, next_pid); 1055 if (sched_out == NULL || sched_in == NULL) 1056 goto out_put; 1057 1058 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1059 if (!out_events) { 1060 if (thread_atoms_insert(sched, sched_out)) 1061 goto out_put; 1062 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1063 if (!out_events) { 1064 pr_err("out-event: Internal tree error"); 1065 goto out_put; 1066 } 1067 } 1068 if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp)) 1069 return -1; 1070 1071 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1072 if (!in_events) { 1073 if (thread_atoms_insert(sched, sched_in)) 1074 goto out_put; 1075 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1076 if (!in_events) { 1077 pr_err("in-event: Internal tree error"); 1078 goto out_put; 1079 } 1080 /* 1081 * Take came in we have not heard about yet, 1082 * add in an initial atom in runnable state: 1083 */ 1084 if (add_sched_out_event(in_events, 'R', timestamp)) 1085 goto out_put; 1086 } 1087 add_sched_in_event(in_events, timestamp); 1088 err = 0; 1089 out_put: 1090 thread__put(sched_out); 1091 thread__put(sched_in); 1092 return err; 1093 } 1094 1095 static int latency_runtime_event(struct perf_sched *sched, 1096 struct perf_evsel *evsel, 1097 struct perf_sample *sample, 1098 struct machine *machine) 1099 { 1100 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 1101 const u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1102 struct thread *thread = machine__findnew_thread(machine, -1, pid); 1103 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1104 u64 timestamp = sample->time; 1105 int cpu = sample->cpu, err = -1; 1106 1107 if (thread == NULL) 1108 return -1; 1109 1110 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1111 if (!atoms) { 1112 if (thread_atoms_insert(sched, thread)) 1113 goto out_put; 1114 atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1115 if (!atoms) { 1116 pr_err("in-event: Internal tree error"); 1117 goto out_put; 1118 } 1119 if (add_sched_out_event(atoms, 'R', timestamp)) 1120 goto out_put; 1121 } 1122 1123 add_runtime_event(atoms, runtime, timestamp); 1124 err = 0; 1125 out_put: 1126 thread__put(thread); 1127 return err; 1128 } 1129 1130 static int latency_wakeup_event(struct perf_sched *sched, 1131 struct perf_evsel *evsel, 1132 struct perf_sample *sample, 1133 struct machine *machine) 1134 { 1135 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 1136 struct work_atoms *atoms; 1137 struct work_atom *atom; 1138 struct thread *wakee; 1139 u64 timestamp = sample->time; 1140 int err = -1; 1141 1142 wakee = machine__findnew_thread(machine, -1, pid); 1143 if (wakee == NULL) 1144 return -1; 1145 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1146 if (!atoms) { 1147 if (thread_atoms_insert(sched, wakee)) 1148 goto out_put; 1149 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1150 if (!atoms) { 1151 pr_err("wakeup-event: Internal tree error"); 1152 goto out_put; 1153 } 1154 if (add_sched_out_event(atoms, 'S', timestamp)) 1155 goto out_put; 1156 } 1157 1158 BUG_ON(list_empty(&atoms->work_list)); 1159 1160 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1161 1162 /* 1163 * As we do not guarantee the wakeup event happens when 1164 * task is out of run queue, also may happen when task is 1165 * on run queue and wakeup only change ->state to TASK_RUNNING, 1166 * then we should not set the ->wake_up_time when wake up a 1167 * task which is on run queue. 1168 * 1169 * You WILL be missing events if you've recorded only 1170 * one CPU, or are only looking at only one, so don't 1171 * skip in this case. 1172 */ 1173 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1174 goto out_ok; 1175 1176 sched->nr_timestamps++; 1177 if (atom->sched_out_time > timestamp) { 1178 sched->nr_unordered_timestamps++; 1179 goto out_ok; 1180 } 1181 1182 atom->state = THREAD_WAIT_CPU; 1183 atom->wake_up_time = timestamp; 1184 out_ok: 1185 err = 0; 1186 out_put: 1187 thread__put(wakee); 1188 return err; 1189 } 1190 1191 static int latency_migrate_task_event(struct perf_sched *sched, 1192 struct perf_evsel *evsel, 1193 struct perf_sample *sample, 1194 struct machine *machine) 1195 { 1196 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 1197 u64 timestamp = sample->time; 1198 struct work_atoms *atoms; 1199 struct work_atom *atom; 1200 struct thread *migrant; 1201 int err = -1; 1202 1203 /* 1204 * Only need to worry about migration when profiling one CPU. 1205 */ 1206 if (sched->profile_cpu == -1) 1207 return 0; 1208 1209 migrant = machine__findnew_thread(machine, -1, pid); 1210 if (migrant == NULL) 1211 return -1; 1212 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1213 if (!atoms) { 1214 if (thread_atoms_insert(sched, migrant)) 1215 goto out_put; 1216 register_pid(sched, migrant->tid, thread__comm_str(migrant)); 1217 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1218 if (!atoms) { 1219 pr_err("migration-event: Internal tree error"); 1220 goto out_put; 1221 } 1222 if (add_sched_out_event(atoms, 'R', timestamp)) 1223 goto out_put; 1224 } 1225 1226 BUG_ON(list_empty(&atoms->work_list)); 1227 1228 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1229 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1230 1231 sched->nr_timestamps++; 1232 1233 if (atom->sched_out_time > timestamp) 1234 sched->nr_unordered_timestamps++; 1235 err = 0; 1236 out_put: 1237 thread__put(migrant); 1238 return err; 1239 } 1240 1241 static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) 1242 { 1243 int i; 1244 int ret; 1245 u64 avg; 1246 char max_lat_at[32]; 1247 1248 if (!work_list->nb_atoms) 1249 return; 1250 /* 1251 * Ignore idle threads: 1252 */ 1253 if (!strcmp(thread__comm_str(work_list->thread), "swapper")) 1254 return; 1255 1256 sched->all_runtime += work_list->total_runtime; 1257 sched->all_count += work_list->nb_atoms; 1258 1259 if (work_list->num_merged > 1) 1260 ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged); 1261 else 1262 ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid); 1263 1264 for (i = 0; i < 24 - ret; i++) 1265 printf(" "); 1266 1267 avg = work_list->total_lat / work_list->nb_atoms; 1268 timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at)); 1269 1270 printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n", 1271 (double)work_list->total_runtime / NSEC_PER_MSEC, 1272 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, 1273 (double)work_list->max_lat / NSEC_PER_MSEC, 1274 max_lat_at); 1275 } 1276 1277 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1278 { 1279 if (l->thread == r->thread) 1280 return 0; 1281 if (l->thread->tid < r->thread->tid) 1282 return -1; 1283 if (l->thread->tid > r->thread->tid) 1284 return 1; 1285 return (int)(l->thread - r->thread); 1286 } 1287 1288 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1289 { 1290 u64 avgl, avgr; 1291 1292 if (!l->nb_atoms) 1293 return -1; 1294 1295 if (!r->nb_atoms) 1296 return 1; 1297 1298 avgl = l->total_lat / l->nb_atoms; 1299 avgr = r->total_lat / r->nb_atoms; 1300 1301 if (avgl < avgr) 1302 return -1; 1303 if (avgl > avgr) 1304 return 1; 1305 1306 return 0; 1307 } 1308 1309 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1310 { 1311 if (l->max_lat < r->max_lat) 1312 return -1; 1313 if (l->max_lat > r->max_lat) 1314 return 1; 1315 1316 return 0; 1317 } 1318 1319 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1320 { 1321 if (l->nb_atoms < r->nb_atoms) 1322 return -1; 1323 if (l->nb_atoms > r->nb_atoms) 1324 return 1; 1325 1326 return 0; 1327 } 1328 1329 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1330 { 1331 if (l->total_runtime < r->total_runtime) 1332 return -1; 1333 if (l->total_runtime > r->total_runtime) 1334 return 1; 1335 1336 return 0; 1337 } 1338 1339 static int sort_dimension__add(const char *tok, struct list_head *list) 1340 { 1341 size_t i; 1342 static struct sort_dimension avg_sort_dimension = { 1343 .name = "avg", 1344 .cmp = avg_cmp, 1345 }; 1346 static struct sort_dimension max_sort_dimension = { 1347 .name = "max", 1348 .cmp = max_cmp, 1349 }; 1350 static struct sort_dimension pid_sort_dimension = { 1351 .name = "pid", 1352 .cmp = pid_cmp, 1353 }; 1354 static struct sort_dimension runtime_sort_dimension = { 1355 .name = "runtime", 1356 .cmp = runtime_cmp, 1357 }; 1358 static struct sort_dimension switch_sort_dimension = { 1359 .name = "switch", 1360 .cmp = switch_cmp, 1361 }; 1362 struct sort_dimension *available_sorts[] = { 1363 &pid_sort_dimension, 1364 &avg_sort_dimension, 1365 &max_sort_dimension, 1366 &switch_sort_dimension, 1367 &runtime_sort_dimension, 1368 }; 1369 1370 for (i = 0; i < ARRAY_SIZE(available_sorts); i++) { 1371 if (!strcmp(available_sorts[i]->name, tok)) { 1372 list_add_tail(&available_sorts[i]->list, list); 1373 1374 return 0; 1375 } 1376 } 1377 1378 return -1; 1379 } 1380 1381 static void perf_sched__sort_lat(struct perf_sched *sched) 1382 { 1383 struct rb_node *node; 1384 struct rb_root *root = &sched->atom_root; 1385 again: 1386 for (;;) { 1387 struct work_atoms *data; 1388 node = rb_first(root); 1389 if (!node) 1390 break; 1391 1392 rb_erase(node, root); 1393 data = rb_entry(node, struct work_atoms, node); 1394 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); 1395 } 1396 if (root == &sched->atom_root) { 1397 root = &sched->merged_atom_root; 1398 goto again; 1399 } 1400 } 1401 1402 static int process_sched_wakeup_event(struct perf_tool *tool, 1403 struct perf_evsel *evsel, 1404 struct perf_sample *sample, 1405 struct machine *machine) 1406 { 1407 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1408 1409 if (sched->tp_handler->wakeup_event) 1410 return sched->tp_handler->wakeup_event(sched, evsel, sample, machine); 1411 1412 return 0; 1413 } 1414 1415 union map_priv { 1416 void *ptr; 1417 bool color; 1418 }; 1419 1420 static bool thread__has_color(struct thread *thread) 1421 { 1422 union map_priv priv = { 1423 .ptr = thread__priv(thread), 1424 }; 1425 1426 return priv.color; 1427 } 1428 1429 static struct thread* 1430 map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid) 1431 { 1432 struct thread *thread = machine__findnew_thread(machine, pid, tid); 1433 union map_priv priv = { 1434 .color = false, 1435 }; 1436 1437 if (!sched->map.color_pids || !thread || thread__priv(thread)) 1438 return thread; 1439 1440 if (thread_map__has(sched->map.color_pids, tid)) 1441 priv.color = true; 1442 1443 thread__set_priv(thread, priv.ptr); 1444 return thread; 1445 } 1446 1447 static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, 1448 struct perf_sample *sample, struct machine *machine) 1449 { 1450 const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1451 struct thread *sched_in; 1452 int new_shortname; 1453 u64 timestamp0, timestamp = sample->time; 1454 s64 delta; 1455 int i, this_cpu = sample->cpu; 1456 int cpus_nr; 1457 bool new_cpu = false; 1458 const char *color = PERF_COLOR_NORMAL; 1459 char stimestamp[32]; 1460 1461 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); 1462 1463 if (this_cpu > sched->max_cpu) 1464 sched->max_cpu = this_cpu; 1465 1466 if (sched->map.comp) { 1467 cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); 1468 if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) { 1469 sched->map.comp_cpus[cpus_nr++] = this_cpu; 1470 new_cpu = true; 1471 } 1472 } else 1473 cpus_nr = sched->max_cpu; 1474 1475 timestamp0 = sched->cpu_last_switched[this_cpu]; 1476 sched->cpu_last_switched[this_cpu] = timestamp; 1477 if (timestamp0) 1478 delta = timestamp - timestamp0; 1479 else 1480 delta = 0; 1481 1482 if (delta < 0) { 1483 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1484 return -1; 1485 } 1486 1487 sched_in = map__findnew_thread(sched, machine, -1, next_pid); 1488 if (sched_in == NULL) 1489 return -1; 1490 1491 sched->curr_thread[this_cpu] = thread__get(sched_in); 1492 1493 printf(" "); 1494 1495 new_shortname = 0; 1496 if (!sched_in->shortname[0]) { 1497 if (!strcmp(thread__comm_str(sched_in), "swapper")) { 1498 /* 1499 * Don't allocate a letter-number for swapper:0 1500 * as a shortname. Instead, we use '.' for it. 1501 */ 1502 sched_in->shortname[0] = '.'; 1503 sched_in->shortname[1] = ' '; 1504 } else { 1505 sched_in->shortname[0] = sched->next_shortname1; 1506 sched_in->shortname[1] = sched->next_shortname2; 1507 1508 if (sched->next_shortname1 < 'Z') { 1509 sched->next_shortname1++; 1510 } else { 1511 sched->next_shortname1 = 'A'; 1512 if (sched->next_shortname2 < '9') 1513 sched->next_shortname2++; 1514 else 1515 sched->next_shortname2 = '0'; 1516 } 1517 } 1518 new_shortname = 1; 1519 } 1520 1521 for (i = 0; i < cpus_nr; i++) { 1522 int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i; 1523 struct thread *curr_thread = sched->curr_thread[cpu]; 1524 const char *pid_color = color; 1525 const char *cpu_color = color; 1526 1527 if (curr_thread && thread__has_color(curr_thread)) 1528 pid_color = COLOR_PIDS; 1529 1530 if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu)) 1531 continue; 1532 1533 if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu)) 1534 cpu_color = COLOR_CPUS; 1535 1536 if (cpu != this_cpu) 1537 color_fprintf(stdout, color, " "); 1538 else 1539 color_fprintf(stdout, cpu_color, "*"); 1540 1541 if (sched->curr_thread[cpu]) 1542 color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname); 1543 else 1544 color_fprintf(stdout, color, " "); 1545 } 1546 1547 if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu)) 1548 goto out; 1549 1550 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1551 color_fprintf(stdout, color, " %12s secs ", stimestamp); 1552 if (new_shortname || (verbose && sched_in->tid)) { 1553 const char *pid_color = color; 1554 1555 if (thread__has_color(sched_in)) 1556 pid_color = COLOR_PIDS; 1557 1558 color_fprintf(stdout, pid_color, "%s => %s:%d", 1559 sched_in->shortname, thread__comm_str(sched_in), sched_in->tid); 1560 } 1561 1562 if (sched->map.comp && new_cpu) 1563 color_fprintf(stdout, color, " (CPU %d)", this_cpu); 1564 1565 out: 1566 color_fprintf(stdout, color, "\n"); 1567 1568 thread__put(sched_in); 1569 1570 return 0; 1571 } 1572 1573 static int process_sched_switch_event(struct perf_tool *tool, 1574 struct perf_evsel *evsel, 1575 struct perf_sample *sample, 1576 struct machine *machine) 1577 { 1578 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1579 int this_cpu = sample->cpu, err = 0; 1580 u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), 1581 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1582 1583 if (sched->curr_pid[this_cpu] != (u32)-1) { 1584 /* 1585 * Are we trying to switch away a PID that is 1586 * not current? 1587 */ 1588 if (sched->curr_pid[this_cpu] != prev_pid) 1589 sched->nr_context_switch_bugs++; 1590 } 1591 1592 if (sched->tp_handler->switch_event) 1593 err = sched->tp_handler->switch_event(sched, evsel, sample, machine); 1594 1595 sched->curr_pid[this_cpu] = next_pid; 1596 return err; 1597 } 1598 1599 static int process_sched_runtime_event(struct perf_tool *tool, 1600 struct perf_evsel *evsel, 1601 struct perf_sample *sample, 1602 struct machine *machine) 1603 { 1604 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1605 1606 if (sched->tp_handler->runtime_event) 1607 return sched->tp_handler->runtime_event(sched, evsel, sample, machine); 1608 1609 return 0; 1610 } 1611 1612 static int perf_sched__process_fork_event(struct perf_tool *tool, 1613 union perf_event *event, 1614 struct perf_sample *sample, 1615 struct machine *machine) 1616 { 1617 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1618 1619 /* run the fork event through the perf machineruy */ 1620 perf_event__process_fork(tool, event, sample, machine); 1621 1622 /* and then run additional processing needed for this command */ 1623 if (sched->tp_handler->fork_event) 1624 return sched->tp_handler->fork_event(sched, event, machine); 1625 1626 return 0; 1627 } 1628 1629 static int process_sched_migrate_task_event(struct perf_tool *tool, 1630 struct perf_evsel *evsel, 1631 struct perf_sample *sample, 1632 struct machine *machine) 1633 { 1634 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1635 1636 if (sched->tp_handler->migrate_task_event) 1637 return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine); 1638 1639 return 0; 1640 } 1641 1642 typedef int (*tracepoint_handler)(struct perf_tool *tool, 1643 struct perf_evsel *evsel, 1644 struct perf_sample *sample, 1645 struct machine *machine); 1646 1647 static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused, 1648 union perf_event *event __maybe_unused, 1649 struct perf_sample *sample, 1650 struct perf_evsel *evsel, 1651 struct machine *machine) 1652 { 1653 int err = 0; 1654 1655 if (evsel->handler != NULL) { 1656 tracepoint_handler f = evsel->handler; 1657 err = f(tool, evsel, sample, machine); 1658 } 1659 1660 return err; 1661 } 1662 1663 static int perf_sched__read_events(struct perf_sched *sched) 1664 { 1665 const struct perf_evsel_str_handler handlers[] = { 1666 { "sched:sched_switch", process_sched_switch_event, }, 1667 { "sched:sched_stat_runtime", process_sched_runtime_event, }, 1668 { "sched:sched_wakeup", process_sched_wakeup_event, }, 1669 { "sched:sched_wakeup_new", process_sched_wakeup_event, }, 1670 { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1671 }; 1672 struct perf_session *session; 1673 struct perf_data_file file = { 1674 .path = input_name, 1675 .mode = PERF_DATA_MODE_READ, 1676 .force = sched->force, 1677 }; 1678 int rc = -1; 1679 1680 session = perf_session__new(&file, false, &sched->tool); 1681 if (session == NULL) { 1682 pr_debug("No Memory for session\n"); 1683 return -1; 1684 } 1685 1686 symbol__init(&session->header.env); 1687 1688 if (perf_session__set_tracepoints_handlers(session, handlers)) 1689 goto out_delete; 1690 1691 if (perf_session__has_traces(session, "record -R")) { 1692 int err = perf_session__process_events(session); 1693 if (err) { 1694 pr_err("Failed to process events, error %d", err); 1695 goto out_delete; 1696 } 1697 1698 sched->nr_events = session->evlist->stats.nr_events[0]; 1699 sched->nr_lost_events = session->evlist->stats.total_lost; 1700 sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; 1701 } 1702 1703 rc = 0; 1704 out_delete: 1705 perf_session__delete(session); 1706 return rc; 1707 } 1708 1709 /* 1710 * scheduling times are printed as msec.usec 1711 */ 1712 static inline void print_sched_time(unsigned long long nsecs, int width) 1713 { 1714 unsigned long msecs; 1715 unsigned long usecs; 1716 1717 msecs = nsecs / NSEC_PER_MSEC; 1718 nsecs -= msecs * NSEC_PER_MSEC; 1719 usecs = nsecs / NSEC_PER_USEC; 1720 printf("%*lu.%03lu ", width, msecs, usecs); 1721 } 1722 1723 /* 1724 * returns runtime data for event, allocating memory for it the 1725 * first time it is used. 1726 */ 1727 static struct evsel_runtime *perf_evsel__get_runtime(struct perf_evsel *evsel) 1728 { 1729 struct evsel_runtime *r = evsel->priv; 1730 1731 if (r == NULL) { 1732 r = zalloc(sizeof(struct evsel_runtime)); 1733 evsel->priv = r; 1734 } 1735 1736 return r; 1737 } 1738 1739 /* 1740 * save last time event was seen per cpu 1741 */ 1742 static void perf_evsel__save_time(struct perf_evsel *evsel, 1743 u64 timestamp, u32 cpu) 1744 { 1745 struct evsel_runtime *r = perf_evsel__get_runtime(evsel); 1746 1747 if (r == NULL) 1748 return; 1749 1750 if ((cpu >= r->ncpu) || (r->last_time == NULL)) { 1751 int i, n = __roundup_pow_of_two(cpu+1); 1752 void *p = r->last_time; 1753 1754 p = realloc(r->last_time, n * sizeof(u64)); 1755 if (!p) 1756 return; 1757 1758 r->last_time = p; 1759 for (i = r->ncpu; i < n; ++i) 1760 r->last_time[i] = (u64) 0; 1761 1762 r->ncpu = n; 1763 } 1764 1765 r->last_time[cpu] = timestamp; 1766 } 1767 1768 /* returns last time this event was seen on the given cpu */ 1769 static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu) 1770 { 1771 struct evsel_runtime *r = perf_evsel__get_runtime(evsel); 1772 1773 if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) 1774 return 0; 1775 1776 return r->last_time[cpu]; 1777 } 1778 1779 static int comm_width = 30; 1780 1781 static char *timehist_get_commstr(struct thread *thread) 1782 { 1783 static char str[32]; 1784 const char *comm = thread__comm_str(thread); 1785 pid_t tid = thread->tid; 1786 pid_t pid = thread->pid_; 1787 int n; 1788 1789 if (pid == 0) 1790 n = scnprintf(str, sizeof(str), "%s", comm); 1791 1792 else if (tid != pid) 1793 n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid); 1794 1795 else 1796 n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid); 1797 1798 if (n > comm_width) 1799 comm_width = n; 1800 1801 return str; 1802 } 1803 1804 static void timehist_header(struct perf_sched *sched) 1805 { 1806 u32 ncpus = sched->max_cpu + 1; 1807 u32 i, j; 1808 1809 printf("%15s %6s ", "time", "cpu"); 1810 1811 if (sched->show_cpu_visual) { 1812 printf(" "); 1813 for (i = 0, j = 0; i < ncpus; ++i) { 1814 printf("%x", j++); 1815 if (j > 15) 1816 j = 0; 1817 } 1818 printf(" "); 1819 } 1820 1821 printf(" %-*s %9s %9s %9s", comm_width, 1822 "task name", "wait time", "sch delay", "run time"); 1823 1824 printf("\n"); 1825 1826 /* 1827 * units row 1828 */ 1829 printf("%15s %-6s ", "", ""); 1830 1831 if (sched->show_cpu_visual) 1832 printf(" %*s ", ncpus, ""); 1833 1834 printf(" %-*s %9s %9s %9s\n", comm_width, 1835 "[tid/pid]", "(msec)", "(msec)", "(msec)"); 1836 1837 /* 1838 * separator 1839 */ 1840 printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line); 1841 1842 if (sched->show_cpu_visual) 1843 printf(" %.*s ", ncpus, graph_dotted_line); 1844 1845 printf(" %.*s %.9s %.9s %.9s", comm_width, 1846 graph_dotted_line, graph_dotted_line, graph_dotted_line, 1847 graph_dotted_line); 1848 1849 printf("\n"); 1850 } 1851 1852 static void timehist_print_sample(struct perf_sched *sched, 1853 struct perf_sample *sample, 1854 struct addr_location *al, 1855 struct thread *thread, 1856 u64 t) 1857 { 1858 struct thread_runtime *tr = thread__priv(thread); 1859 u32 max_cpus = sched->max_cpu + 1; 1860 char tstr[64]; 1861 1862 timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); 1863 printf("%15s [%04d] ", tstr, sample->cpu); 1864 1865 if (sched->show_cpu_visual) { 1866 u32 i; 1867 char c; 1868 1869 printf(" "); 1870 for (i = 0; i < max_cpus; ++i) { 1871 /* flag idle times with 'i'; others are sched events */ 1872 if (i == sample->cpu) 1873 c = (thread->tid == 0) ? 'i' : 's'; 1874 else 1875 c = ' '; 1876 printf("%c", c); 1877 } 1878 printf(" "); 1879 } 1880 1881 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 1882 1883 print_sched_time(tr->dt_wait, 6); 1884 print_sched_time(tr->dt_delay, 6); 1885 print_sched_time(tr->dt_run, 6); 1886 1887 if (sched->show_wakeups) 1888 printf(" %-*s", comm_width, ""); 1889 1890 if (thread->tid == 0) 1891 goto out; 1892 1893 if (sched->show_callchain) 1894 printf(" "); 1895 1896 sample__fprintf_sym(sample, al, 0, 1897 EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | 1898 EVSEL__PRINT_CALLCHAIN_ARROW | 1899 EVSEL__PRINT_SKIP_IGNORED, 1900 &callchain_cursor, stdout); 1901 1902 out: 1903 printf("\n"); 1904 } 1905 1906 /* 1907 * Explanation of delta-time stats: 1908 * 1909 * t = time of current schedule out event 1910 * tprev = time of previous sched out event 1911 * also time of schedule-in event for current task 1912 * last_time = time of last sched change event for current task 1913 * (i.e, time process was last scheduled out) 1914 * ready_to_run = time of wakeup for current task 1915 * 1916 * -----|------------|------------|------------|------ 1917 * last ready tprev t 1918 * time to run 1919 * 1920 * |-------- dt_wait --------| 1921 * |- dt_delay -|-- dt_run --| 1922 * 1923 * dt_run = run time of current task 1924 * dt_wait = time between last schedule out event for task and tprev 1925 * represents time spent off the cpu 1926 * dt_delay = time between wakeup and schedule-in of task 1927 */ 1928 1929 static void timehist_update_runtime_stats(struct thread_runtime *r, 1930 u64 t, u64 tprev) 1931 { 1932 r->dt_delay = 0; 1933 r->dt_wait = 0; 1934 r->dt_run = 0; 1935 if (tprev) { 1936 r->dt_run = t - tprev; 1937 if (r->ready_to_run) { 1938 if (r->ready_to_run > tprev) 1939 pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); 1940 else 1941 r->dt_delay = tprev - r->ready_to_run; 1942 } 1943 1944 if (r->last_time > tprev) 1945 pr_debug("time travel: last sched out time for task > previous sched_switch event\n"); 1946 else if (r->last_time) 1947 r->dt_wait = tprev - r->last_time; 1948 } 1949 1950 update_stats(&r->run_stats, r->dt_run); 1951 r->total_run_time += r->dt_run; 1952 } 1953 1954 static bool is_idle_sample(struct perf_sample *sample, 1955 struct perf_evsel *evsel) 1956 { 1957 /* pid 0 == swapper == idle task */ 1958 if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) 1959 return perf_evsel__intval(evsel, sample, "prev_pid") == 0; 1960 1961 return sample->pid == 0; 1962 } 1963 1964 static void save_task_callchain(struct perf_sched *sched, 1965 struct perf_sample *sample, 1966 struct perf_evsel *evsel, 1967 struct machine *machine) 1968 { 1969 struct callchain_cursor *cursor = &callchain_cursor; 1970 struct thread *thread; 1971 1972 /* want main thread for process - has maps */ 1973 thread = machine__findnew_thread(machine, sample->pid, sample->pid); 1974 if (thread == NULL) { 1975 pr_debug("Failed to get thread for pid %d.\n", sample->pid); 1976 return; 1977 } 1978 1979 if (!symbol_conf.use_callchain || sample->callchain == NULL) 1980 return; 1981 1982 if (thread__resolve_callchain(thread, cursor, evsel, sample, 1983 NULL, NULL, sched->max_stack + 2) != 0) { 1984 if (verbose) 1985 error("Failed to resolve callchain. Skipping\n"); 1986 1987 return; 1988 } 1989 1990 callchain_cursor_commit(cursor); 1991 1992 while (true) { 1993 struct callchain_cursor_node *node; 1994 struct symbol *sym; 1995 1996 node = callchain_cursor_current(cursor); 1997 if (node == NULL) 1998 break; 1999 2000 sym = node->sym; 2001 if (sym && sym->name) { 2002 if (!strcmp(sym->name, "schedule") || 2003 !strcmp(sym->name, "__schedule") || 2004 !strcmp(sym->name, "preempt_schedule")) 2005 sym->ignore = 1; 2006 } 2007 2008 callchain_cursor_advance(cursor); 2009 } 2010 } 2011 2012 static int init_idle_thread(struct thread *thread) 2013 { 2014 struct idle_thread_runtime *itr; 2015 2016 thread__set_comm(thread, idle_comm, 0); 2017 2018 itr = zalloc(sizeof(*itr)); 2019 if (itr == NULL) 2020 return -ENOMEM; 2021 2022 init_stats(&itr->tr.run_stats); 2023 callchain_init(&itr->callchain); 2024 callchain_cursor_reset(&itr->cursor); 2025 thread__set_priv(thread, itr); 2026 2027 return 0; 2028 } 2029 2030 /* 2031 * Track idle stats per cpu by maintaining a local thread 2032 * struct for the idle task on each cpu. 2033 */ 2034 static int init_idle_threads(int ncpu) 2035 { 2036 int i, ret; 2037 2038 idle_threads = zalloc(ncpu * sizeof(struct thread *)); 2039 if (!idle_threads) 2040 return -ENOMEM; 2041 2042 idle_max_cpu = ncpu; 2043 2044 /* allocate the actual thread struct if needed */ 2045 for (i = 0; i < ncpu; ++i) { 2046 idle_threads[i] = thread__new(0, 0); 2047 if (idle_threads[i] == NULL) 2048 return -ENOMEM; 2049 2050 ret = init_idle_thread(idle_threads[i]); 2051 if (ret < 0) 2052 return ret; 2053 } 2054 2055 return 0; 2056 } 2057 2058 static void free_idle_threads(void) 2059 { 2060 int i; 2061 2062 if (idle_threads == NULL) 2063 return; 2064 2065 for (i = 0; i < idle_max_cpu; ++i) { 2066 if ((idle_threads[i])) 2067 thread__delete(idle_threads[i]); 2068 } 2069 2070 free(idle_threads); 2071 } 2072 2073 static struct thread *get_idle_thread(int cpu) 2074 { 2075 /* 2076 * expand/allocate array of pointers to local thread 2077 * structs if needed 2078 */ 2079 if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) { 2080 int i, j = __roundup_pow_of_two(cpu+1); 2081 void *p; 2082 2083 p = realloc(idle_threads, j * sizeof(struct thread *)); 2084 if (!p) 2085 return NULL; 2086 2087 idle_threads = (struct thread **) p; 2088 for (i = idle_max_cpu; i < j; ++i) 2089 idle_threads[i] = NULL; 2090 2091 idle_max_cpu = j; 2092 } 2093 2094 /* allocate a new thread struct if needed */ 2095 if (idle_threads[cpu] == NULL) { 2096 idle_threads[cpu] = thread__new(0, 0); 2097 2098 if (idle_threads[cpu]) { 2099 if (init_idle_thread(idle_threads[cpu]) < 0) 2100 return NULL; 2101 } 2102 } 2103 2104 return idle_threads[cpu]; 2105 } 2106 2107 static void save_idle_callchain(struct idle_thread_runtime *itr, 2108 struct perf_sample *sample) 2109 { 2110 if (!symbol_conf.use_callchain || sample->callchain == NULL) 2111 return; 2112 2113 callchain_cursor__copy(&itr->cursor, &callchain_cursor); 2114 } 2115 2116 /* 2117 * handle runtime stats saved per thread 2118 */ 2119 static struct thread_runtime *thread__init_runtime(struct thread *thread) 2120 { 2121 struct thread_runtime *r; 2122 2123 r = zalloc(sizeof(struct thread_runtime)); 2124 if (!r) 2125 return NULL; 2126 2127 init_stats(&r->run_stats); 2128 thread__set_priv(thread, r); 2129 2130 return r; 2131 } 2132 2133 static struct thread_runtime *thread__get_runtime(struct thread *thread) 2134 { 2135 struct thread_runtime *tr; 2136 2137 tr = thread__priv(thread); 2138 if (tr == NULL) { 2139 tr = thread__init_runtime(thread); 2140 if (tr == NULL) 2141 pr_debug("Failed to malloc memory for runtime data.\n"); 2142 } 2143 2144 return tr; 2145 } 2146 2147 static struct thread *timehist_get_thread(struct perf_sched *sched, 2148 struct perf_sample *sample, 2149 struct machine *machine, 2150 struct perf_evsel *evsel) 2151 { 2152 struct thread *thread; 2153 2154 if (is_idle_sample(sample, evsel)) { 2155 thread = get_idle_thread(sample->cpu); 2156 if (thread == NULL) 2157 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2158 2159 } else { 2160 /* there were samples with tid 0 but non-zero pid */ 2161 thread = machine__findnew_thread(machine, sample->pid, 2162 sample->tid ?: sample->pid); 2163 if (thread == NULL) { 2164 pr_debug("Failed to get thread for tid %d. skipping sample.\n", 2165 sample->tid); 2166 } 2167 2168 save_task_callchain(sched, sample, evsel, machine); 2169 if (sched->idle_hist) { 2170 struct thread *idle; 2171 struct idle_thread_runtime *itr; 2172 2173 idle = get_idle_thread(sample->cpu); 2174 if (idle == NULL) { 2175 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2176 return NULL; 2177 } 2178 2179 itr = thread__priv(idle); 2180 if (itr == NULL) 2181 return NULL; 2182 2183 itr->last_thread = thread; 2184 2185 /* copy task callchain when entering to idle */ 2186 if (perf_evsel__intval(evsel, sample, "next_pid") == 0) 2187 save_idle_callchain(itr, sample); 2188 } 2189 } 2190 2191 return thread; 2192 } 2193 2194 static bool timehist_skip_sample(struct perf_sched *sched, 2195 struct thread *thread, 2196 struct perf_evsel *evsel, 2197 struct perf_sample *sample) 2198 { 2199 bool rc = false; 2200 2201 if (thread__is_filtered(thread)) { 2202 rc = true; 2203 sched->skipped_samples++; 2204 } 2205 2206 if (sched->idle_hist) { 2207 if (strcmp(perf_evsel__name(evsel), "sched:sched_switch")) 2208 rc = true; 2209 else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 && 2210 perf_evsel__intval(evsel, sample, "next_pid") != 0) 2211 rc = true; 2212 } 2213 2214 return rc; 2215 } 2216 2217 static void timehist_print_wakeup_event(struct perf_sched *sched, 2218 struct perf_evsel *evsel, 2219 struct perf_sample *sample, 2220 struct machine *machine, 2221 struct thread *awakened) 2222 { 2223 struct thread *thread; 2224 char tstr[64]; 2225 2226 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2227 if (thread == NULL) 2228 return; 2229 2230 /* show wakeup unless both awakee and awaker are filtered */ 2231 if (timehist_skip_sample(sched, thread, evsel, sample) && 2232 timehist_skip_sample(sched, awakened, evsel, sample)) { 2233 return; 2234 } 2235 2236 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2237 printf("%15s [%04d] ", tstr, sample->cpu); 2238 if (sched->show_cpu_visual) 2239 printf(" %*s ", sched->max_cpu + 1, ""); 2240 2241 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2242 2243 /* dt spacer */ 2244 printf(" %9s %9s %9s ", "", "", ""); 2245 2246 printf("awakened: %s", timehist_get_commstr(awakened)); 2247 2248 printf("\n"); 2249 } 2250 2251 static int timehist_sched_wakeup_event(struct perf_tool *tool, 2252 union perf_event *event __maybe_unused, 2253 struct perf_evsel *evsel, 2254 struct perf_sample *sample, 2255 struct machine *machine) 2256 { 2257 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2258 struct thread *thread; 2259 struct thread_runtime *tr = NULL; 2260 /* want pid of awakened task not pid in sample */ 2261 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 2262 2263 thread = machine__findnew_thread(machine, 0, pid); 2264 if (thread == NULL) 2265 return -1; 2266 2267 tr = thread__get_runtime(thread); 2268 if (tr == NULL) 2269 return -1; 2270 2271 if (tr->ready_to_run == 0) 2272 tr->ready_to_run = sample->time; 2273 2274 /* show wakeups if requested */ 2275 if (sched->show_wakeups && 2276 !perf_time__skip_sample(&sched->ptime, sample->time)) 2277 timehist_print_wakeup_event(sched, evsel, sample, machine, thread); 2278 2279 return 0; 2280 } 2281 2282 static void timehist_print_migration_event(struct perf_sched *sched, 2283 struct perf_evsel *evsel, 2284 struct perf_sample *sample, 2285 struct machine *machine, 2286 struct thread *migrated) 2287 { 2288 struct thread *thread; 2289 char tstr[64]; 2290 u32 max_cpus = sched->max_cpu + 1; 2291 u32 ocpu, dcpu; 2292 2293 if (sched->summary_only) 2294 return; 2295 2296 max_cpus = sched->max_cpu + 1; 2297 ocpu = perf_evsel__intval(evsel, sample, "orig_cpu"); 2298 dcpu = perf_evsel__intval(evsel, sample, "dest_cpu"); 2299 2300 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2301 if (thread == NULL) 2302 return; 2303 2304 if (timehist_skip_sample(sched, thread, evsel, sample) && 2305 timehist_skip_sample(sched, migrated, evsel, sample)) { 2306 return; 2307 } 2308 2309 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2310 printf("%15s [%04d] ", tstr, sample->cpu); 2311 2312 if (sched->show_cpu_visual) { 2313 u32 i; 2314 char c; 2315 2316 printf(" "); 2317 for (i = 0; i < max_cpus; ++i) { 2318 c = (i == sample->cpu) ? 'm' : ' '; 2319 printf("%c", c); 2320 } 2321 printf(" "); 2322 } 2323 2324 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2325 2326 /* dt spacer */ 2327 printf(" %9s %9s %9s ", "", "", ""); 2328 2329 printf("migrated: %s", timehist_get_commstr(migrated)); 2330 printf(" cpu %d => %d", ocpu, dcpu); 2331 2332 printf("\n"); 2333 } 2334 2335 static int timehist_migrate_task_event(struct perf_tool *tool, 2336 union perf_event *event __maybe_unused, 2337 struct perf_evsel *evsel, 2338 struct perf_sample *sample, 2339 struct machine *machine) 2340 { 2341 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2342 struct thread *thread; 2343 struct thread_runtime *tr = NULL; 2344 /* want pid of migrated task not pid in sample */ 2345 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 2346 2347 thread = machine__findnew_thread(machine, 0, pid); 2348 if (thread == NULL) 2349 return -1; 2350 2351 tr = thread__get_runtime(thread); 2352 if (tr == NULL) 2353 return -1; 2354 2355 tr->migrations++; 2356 2357 /* show migrations if requested */ 2358 timehist_print_migration_event(sched, evsel, sample, machine, thread); 2359 2360 return 0; 2361 } 2362 2363 static int timehist_sched_change_event(struct perf_tool *tool, 2364 union perf_event *event, 2365 struct perf_evsel *evsel, 2366 struct perf_sample *sample, 2367 struct machine *machine) 2368 { 2369 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2370 struct perf_time_interval *ptime = &sched->ptime; 2371 struct addr_location al; 2372 struct thread *thread; 2373 struct thread_runtime *tr = NULL; 2374 u64 tprev, t = sample->time; 2375 int rc = 0; 2376 2377 if (machine__resolve(machine, &al, sample) < 0) { 2378 pr_err("problem processing %d event. skipping it\n", 2379 event->header.type); 2380 rc = -1; 2381 goto out; 2382 } 2383 2384 thread = timehist_get_thread(sched, sample, machine, evsel); 2385 if (thread == NULL) { 2386 rc = -1; 2387 goto out; 2388 } 2389 2390 if (timehist_skip_sample(sched, thread, evsel, sample)) 2391 goto out; 2392 2393 tr = thread__get_runtime(thread); 2394 if (tr == NULL) { 2395 rc = -1; 2396 goto out; 2397 } 2398 2399 tprev = perf_evsel__get_time(evsel, sample->cpu); 2400 2401 /* 2402 * If start time given: 2403 * - sample time is under window user cares about - skip sample 2404 * - tprev is under window user cares about - reset to start of window 2405 */ 2406 if (ptime->start && ptime->start > t) 2407 goto out; 2408 2409 if (tprev && ptime->start > tprev) 2410 tprev = ptime->start; 2411 2412 /* 2413 * If end time given: 2414 * - previous sched event is out of window - we are done 2415 * - sample time is beyond window user cares about - reset it 2416 * to close out stats for time window interest 2417 */ 2418 if (ptime->end) { 2419 if (tprev > ptime->end) 2420 goto out; 2421 2422 if (t > ptime->end) 2423 t = ptime->end; 2424 } 2425 2426 if (!sched->idle_hist || thread->tid == 0) { 2427 timehist_update_runtime_stats(tr, t, tprev); 2428 2429 if (sched->idle_hist) { 2430 struct idle_thread_runtime *itr = (void *)tr; 2431 struct thread_runtime *last_tr; 2432 2433 BUG_ON(thread->tid != 0); 2434 2435 if (itr->last_thread == NULL) 2436 goto out; 2437 2438 /* add current idle time as last thread's runtime */ 2439 last_tr = thread__get_runtime(itr->last_thread); 2440 if (last_tr == NULL) 2441 goto out; 2442 2443 timehist_update_runtime_stats(last_tr, t, tprev); 2444 /* 2445 * remove delta time of last thread as it's not updated 2446 * and otherwise it will show an invalid value next 2447 * time. we only care total run time and run stat. 2448 */ 2449 last_tr->dt_run = 0; 2450 last_tr->dt_wait = 0; 2451 last_tr->dt_delay = 0; 2452 2453 if (itr->cursor.nr) 2454 callchain_append(&itr->callchain, &itr->cursor, t - tprev); 2455 2456 itr->last_thread = NULL; 2457 } 2458 } 2459 2460 if (!sched->summary_only) 2461 timehist_print_sample(sched, sample, &al, thread, t); 2462 2463 out: 2464 if (sched->hist_time.start == 0 && t >= ptime->start) 2465 sched->hist_time.start = t; 2466 if (ptime->end == 0 || t <= ptime->end) 2467 sched->hist_time.end = t; 2468 2469 if (tr) { 2470 /* time of this sched_switch event becomes last time task seen */ 2471 tr->last_time = sample->time; 2472 2473 /* sched out event for task so reset ready to run time */ 2474 tr->ready_to_run = 0; 2475 } 2476 2477 perf_evsel__save_time(evsel, sample->time, sample->cpu); 2478 2479 return rc; 2480 } 2481 2482 static int timehist_sched_switch_event(struct perf_tool *tool, 2483 union perf_event *event, 2484 struct perf_evsel *evsel, 2485 struct perf_sample *sample, 2486 struct machine *machine __maybe_unused) 2487 { 2488 return timehist_sched_change_event(tool, event, evsel, sample, machine); 2489 } 2490 2491 static int process_lost(struct perf_tool *tool __maybe_unused, 2492 union perf_event *event, 2493 struct perf_sample *sample, 2494 struct machine *machine __maybe_unused) 2495 { 2496 char tstr[64]; 2497 2498 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2499 printf("%15s ", tstr); 2500 printf("lost %" PRIu64 " events on cpu %d\n", event->lost.lost, sample->cpu); 2501 2502 return 0; 2503 } 2504 2505 2506 static void print_thread_runtime(struct thread *t, 2507 struct thread_runtime *r) 2508 { 2509 double mean = avg_stats(&r->run_stats); 2510 float stddev; 2511 2512 printf("%*s %5d %9" PRIu64 " ", 2513 comm_width, timehist_get_commstr(t), t->ppid, 2514 (u64) r->run_stats.n); 2515 2516 print_sched_time(r->total_run_time, 8); 2517 stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean); 2518 print_sched_time(r->run_stats.min, 6); 2519 printf(" "); 2520 print_sched_time((u64) mean, 6); 2521 printf(" "); 2522 print_sched_time(r->run_stats.max, 6); 2523 printf(" "); 2524 printf("%5.2f", stddev); 2525 printf(" %5" PRIu64, r->migrations); 2526 printf("\n"); 2527 } 2528 2529 struct total_run_stats { 2530 u64 sched_count; 2531 u64 task_count; 2532 u64 total_run_time; 2533 }; 2534 2535 static int __show_thread_runtime(struct thread *t, void *priv) 2536 { 2537 struct total_run_stats *stats = priv; 2538 struct thread_runtime *r; 2539 2540 if (thread__is_filtered(t)) 2541 return 0; 2542 2543 r = thread__priv(t); 2544 if (r && r->run_stats.n) { 2545 stats->task_count++; 2546 stats->sched_count += r->run_stats.n; 2547 stats->total_run_time += r->total_run_time; 2548 print_thread_runtime(t, r); 2549 } 2550 2551 return 0; 2552 } 2553 2554 static int show_thread_runtime(struct thread *t, void *priv) 2555 { 2556 if (t->dead) 2557 return 0; 2558 2559 return __show_thread_runtime(t, priv); 2560 } 2561 2562 static int show_deadthread_runtime(struct thread *t, void *priv) 2563 { 2564 if (!t->dead) 2565 return 0; 2566 2567 return __show_thread_runtime(t, priv); 2568 } 2569 2570 static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) 2571 { 2572 const char *sep = " <- "; 2573 struct callchain_list *chain; 2574 size_t ret = 0; 2575 char bf[1024]; 2576 bool first; 2577 2578 if (node == NULL) 2579 return 0; 2580 2581 ret = callchain__fprintf_folded(fp, node->parent); 2582 first = (ret == 0); 2583 2584 list_for_each_entry(chain, &node->val, list) { 2585 if (chain->ip >= PERF_CONTEXT_MAX) 2586 continue; 2587 if (chain->ms.sym && chain->ms.sym->ignore) 2588 continue; 2589 ret += fprintf(fp, "%s%s", first ? "" : sep, 2590 callchain_list__sym_name(chain, bf, sizeof(bf), 2591 false)); 2592 first = false; 2593 } 2594 2595 return ret; 2596 } 2597 2598 static size_t timehist_print_idlehist_callchain(struct rb_root *root) 2599 { 2600 size_t ret = 0; 2601 FILE *fp = stdout; 2602 struct callchain_node *chain; 2603 struct rb_node *rb_node = rb_first(root); 2604 2605 printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); 2606 printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, 2607 graph_dotted_line); 2608 2609 while (rb_node) { 2610 chain = rb_entry(rb_node, struct callchain_node, rb_node); 2611 rb_node = rb_next(rb_node); 2612 2613 ret += fprintf(fp, " "); 2614 print_sched_time(chain->hit, 12); 2615 ret += 16; /* print_sched_time returns 2nd arg + 4 */ 2616 ret += fprintf(fp, " %8d ", chain->count); 2617 ret += callchain__fprintf_folded(fp, chain); 2618 ret += fprintf(fp, "\n"); 2619 } 2620 2621 return ret; 2622 } 2623 2624 static void timehist_print_summary(struct perf_sched *sched, 2625 struct perf_session *session) 2626 { 2627 struct machine *m = &session->machines.host; 2628 struct total_run_stats totals; 2629 u64 task_count; 2630 struct thread *t; 2631 struct thread_runtime *r; 2632 int i; 2633 u64 hist_time = sched->hist_time.end - sched->hist_time.start; 2634 2635 memset(&totals, 0, sizeof(totals)); 2636 2637 if (sched->idle_hist) { 2638 printf("\nIdle-time summary\n"); 2639 printf("%*s parent sched-out ", comm_width, "comm"); 2640 printf(" idle-time min-idle avg-idle max-idle stddev migrations\n"); 2641 } else { 2642 printf("\nRuntime summary\n"); 2643 printf("%*s parent sched-in ", comm_width, "comm"); 2644 printf(" run-time min-run avg-run max-run stddev migrations\n"); 2645 } 2646 printf("%*s (count) ", comm_width, ""); 2647 printf(" (msec) (msec) (msec) (msec) %%\n"); 2648 printf("%.117s\n", graph_dotted_line); 2649 2650 machine__for_each_thread(m, show_thread_runtime, &totals); 2651 task_count = totals.task_count; 2652 if (!task_count) 2653 printf("<no still running tasks>\n"); 2654 2655 printf("\nTerminated tasks:\n"); 2656 machine__for_each_thread(m, show_deadthread_runtime, &totals); 2657 if (task_count == totals.task_count) 2658 printf("<no terminated tasks>\n"); 2659 2660 /* CPU idle stats not tracked when samples were skipped */ 2661 if (sched->skipped_samples && !sched->idle_hist) 2662 return; 2663 2664 printf("\nIdle stats:\n"); 2665 for (i = 0; i < idle_max_cpu; ++i) { 2666 t = idle_threads[i]; 2667 if (!t) 2668 continue; 2669 2670 r = thread__priv(t); 2671 if (r && r->run_stats.n) { 2672 totals.sched_count += r->run_stats.n; 2673 printf(" CPU %2d idle for ", i); 2674 print_sched_time(r->total_run_time, 6); 2675 printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time); 2676 } else 2677 printf(" CPU %2d idle entire time window\n", i); 2678 } 2679 2680 if (sched->idle_hist && symbol_conf.use_callchain) { 2681 callchain_param.mode = CHAIN_FOLDED; 2682 callchain_param.value = CCVAL_PERIOD; 2683 2684 callchain_register_param(&callchain_param); 2685 2686 printf("\nIdle stats by callchain:\n"); 2687 for (i = 0; i < idle_max_cpu; ++i) { 2688 struct idle_thread_runtime *itr; 2689 2690 t = idle_threads[i]; 2691 if (!t) 2692 continue; 2693 2694 itr = thread__priv(t); 2695 if (itr == NULL) 2696 continue; 2697 2698 callchain_param.sort(&itr->sorted_root, &itr->callchain, 2699 0, &callchain_param); 2700 2701 printf(" CPU %2d:", i); 2702 print_sched_time(itr->tr.total_run_time, 6); 2703 printf(" msec\n"); 2704 timehist_print_idlehist_callchain(&itr->sorted_root); 2705 printf("\n"); 2706 } 2707 } 2708 2709 printf("\n" 2710 " Total number of unique tasks: %" PRIu64 "\n" 2711 "Total number of context switches: %" PRIu64 "\n", 2712 totals.task_count, totals.sched_count); 2713 2714 printf(" Total run time (msec): "); 2715 print_sched_time(totals.total_run_time, 2); 2716 printf("\n"); 2717 2718 printf(" Total scheduling time (msec): "); 2719 print_sched_time(hist_time, 2); 2720 printf(" (x %d)\n", sched->max_cpu); 2721 } 2722 2723 typedef int (*sched_handler)(struct perf_tool *tool, 2724 union perf_event *event, 2725 struct perf_evsel *evsel, 2726 struct perf_sample *sample, 2727 struct machine *machine); 2728 2729 static int perf_timehist__process_sample(struct perf_tool *tool, 2730 union perf_event *event, 2731 struct perf_sample *sample, 2732 struct perf_evsel *evsel, 2733 struct machine *machine) 2734 { 2735 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2736 int err = 0; 2737 int this_cpu = sample->cpu; 2738 2739 if (this_cpu > sched->max_cpu) 2740 sched->max_cpu = this_cpu; 2741 2742 if (evsel->handler != NULL) { 2743 sched_handler f = evsel->handler; 2744 2745 err = f(tool, event, evsel, sample, machine); 2746 } 2747 2748 return err; 2749 } 2750 2751 static int timehist_check_attr(struct perf_sched *sched, 2752 struct perf_evlist *evlist) 2753 { 2754 struct perf_evsel *evsel; 2755 struct evsel_runtime *er; 2756 2757 list_for_each_entry(evsel, &evlist->entries, node) { 2758 er = perf_evsel__get_runtime(evsel); 2759 if (er == NULL) { 2760 pr_err("Failed to allocate memory for evsel runtime data\n"); 2761 return -1; 2762 } 2763 2764 if (sched->show_callchain && 2765 !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) { 2766 pr_info("Samples do not have callchains.\n"); 2767 sched->show_callchain = 0; 2768 symbol_conf.use_callchain = 0; 2769 } 2770 } 2771 2772 return 0; 2773 } 2774 2775 static int perf_sched__timehist(struct perf_sched *sched) 2776 { 2777 const struct perf_evsel_str_handler handlers[] = { 2778 { "sched:sched_switch", timehist_sched_switch_event, }, 2779 { "sched:sched_wakeup", timehist_sched_wakeup_event, }, 2780 { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, 2781 }; 2782 const struct perf_evsel_str_handler migrate_handlers[] = { 2783 { "sched:sched_migrate_task", timehist_migrate_task_event, }, 2784 }; 2785 struct perf_data_file file = { 2786 .path = input_name, 2787 .mode = PERF_DATA_MODE_READ, 2788 .force = sched->force, 2789 }; 2790 2791 struct perf_session *session; 2792 struct perf_evlist *evlist; 2793 int err = -1; 2794 2795 /* 2796 * event handlers for timehist option 2797 */ 2798 sched->tool.sample = perf_timehist__process_sample; 2799 sched->tool.mmap = perf_event__process_mmap; 2800 sched->tool.comm = perf_event__process_comm; 2801 sched->tool.exit = perf_event__process_exit; 2802 sched->tool.fork = perf_event__process_fork; 2803 sched->tool.lost = process_lost; 2804 sched->tool.attr = perf_event__process_attr; 2805 sched->tool.tracing_data = perf_event__process_tracing_data; 2806 sched->tool.build_id = perf_event__process_build_id; 2807 2808 sched->tool.ordered_events = true; 2809 sched->tool.ordering_requires_timestamps = true; 2810 2811 symbol_conf.use_callchain = sched->show_callchain; 2812 2813 session = perf_session__new(&file, false, &sched->tool); 2814 if (session == NULL) 2815 return -ENOMEM; 2816 2817 evlist = session->evlist; 2818 2819 symbol__init(&session->header.env); 2820 2821 if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) { 2822 pr_err("Invalid time string\n"); 2823 return -EINVAL; 2824 } 2825 2826 if (timehist_check_attr(sched, evlist) != 0) 2827 goto out; 2828 2829 setup_pager(); 2830 2831 /* setup per-evsel handlers */ 2832 if (perf_session__set_tracepoints_handlers(session, handlers)) 2833 goto out; 2834 2835 /* sched_switch event at a minimum needs to exist */ 2836 if (!perf_evlist__find_tracepoint_by_name(session->evlist, 2837 "sched:sched_switch")) { 2838 pr_err("No sched_switch events found. Have you run 'perf sched record'?\n"); 2839 goto out; 2840 } 2841 2842 if (sched->show_migrations && 2843 perf_session__set_tracepoints_handlers(session, migrate_handlers)) 2844 goto out; 2845 2846 /* pre-allocate struct for per-CPU idle stats */ 2847 sched->max_cpu = session->header.env.nr_cpus_online; 2848 if (sched->max_cpu == 0) 2849 sched->max_cpu = 4; 2850 if (init_idle_threads(sched->max_cpu)) 2851 goto out; 2852 2853 /* summary_only implies summary option, but don't overwrite summary if set */ 2854 if (sched->summary_only) 2855 sched->summary = sched->summary_only; 2856 2857 if (!sched->summary_only) 2858 timehist_header(sched); 2859 2860 err = perf_session__process_events(session); 2861 if (err) { 2862 pr_err("Failed to process events, error %d", err); 2863 goto out; 2864 } 2865 2866 sched->nr_events = evlist->stats.nr_events[0]; 2867 sched->nr_lost_events = evlist->stats.total_lost; 2868 sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST]; 2869 2870 if (sched->summary) 2871 timehist_print_summary(sched, session); 2872 2873 out: 2874 free_idle_threads(); 2875 perf_session__delete(session); 2876 2877 return err; 2878 } 2879 2880 2881 static void print_bad_events(struct perf_sched *sched) 2882 { 2883 if (sched->nr_unordered_timestamps && sched->nr_timestamps) { 2884 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 2885 (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0, 2886 sched->nr_unordered_timestamps, sched->nr_timestamps); 2887 } 2888 if (sched->nr_lost_events && sched->nr_events) { 2889 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 2890 (double)sched->nr_lost_events/(double)sched->nr_events * 100.0, 2891 sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks); 2892 } 2893 if (sched->nr_context_switch_bugs && sched->nr_timestamps) { 2894 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 2895 (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0, 2896 sched->nr_context_switch_bugs, sched->nr_timestamps); 2897 if (sched->nr_lost_events) 2898 printf(" (due to lost events?)"); 2899 printf("\n"); 2900 } 2901 } 2902 2903 static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data) 2904 { 2905 struct rb_node **new = &(root->rb_node), *parent = NULL; 2906 struct work_atoms *this; 2907 const char *comm = thread__comm_str(data->thread), *this_comm; 2908 2909 while (*new) { 2910 int cmp; 2911 2912 this = container_of(*new, struct work_atoms, node); 2913 parent = *new; 2914 2915 this_comm = thread__comm_str(this->thread); 2916 cmp = strcmp(comm, this_comm); 2917 if (cmp > 0) { 2918 new = &((*new)->rb_left); 2919 } else if (cmp < 0) { 2920 new = &((*new)->rb_right); 2921 } else { 2922 this->num_merged++; 2923 this->total_runtime += data->total_runtime; 2924 this->nb_atoms += data->nb_atoms; 2925 this->total_lat += data->total_lat; 2926 list_splice(&data->work_list, &this->work_list); 2927 if (this->max_lat < data->max_lat) { 2928 this->max_lat = data->max_lat; 2929 this->max_lat_at = data->max_lat_at; 2930 } 2931 zfree(&data); 2932 return; 2933 } 2934 } 2935 2936 data->num_merged++; 2937 rb_link_node(&data->node, parent, new); 2938 rb_insert_color(&data->node, root); 2939 } 2940 2941 static void perf_sched__merge_lat(struct perf_sched *sched) 2942 { 2943 struct work_atoms *data; 2944 struct rb_node *node; 2945 2946 if (sched->skip_merge) 2947 return; 2948 2949 while ((node = rb_first(&sched->atom_root))) { 2950 rb_erase(node, &sched->atom_root); 2951 data = rb_entry(node, struct work_atoms, node); 2952 __merge_work_atoms(&sched->merged_atom_root, data); 2953 } 2954 } 2955 2956 static int perf_sched__lat(struct perf_sched *sched) 2957 { 2958 struct rb_node *next; 2959 2960 setup_pager(); 2961 2962 if (perf_sched__read_events(sched)) 2963 return -1; 2964 2965 perf_sched__merge_lat(sched); 2966 perf_sched__sort_lat(sched); 2967 2968 printf("\n -----------------------------------------------------------------------------------------------------------------\n"); 2969 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); 2970 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 2971 2972 next = rb_first(&sched->sorted_atom_root); 2973 2974 while (next) { 2975 struct work_atoms *work_list; 2976 2977 work_list = rb_entry(next, struct work_atoms, node); 2978 output_lat_thread(sched, work_list); 2979 next = rb_next(next); 2980 thread__zput(work_list->thread); 2981 } 2982 2983 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 2984 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 2985 (double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count); 2986 2987 printf(" ---------------------------------------------------\n"); 2988 2989 print_bad_events(sched); 2990 printf("\n"); 2991 2992 return 0; 2993 } 2994 2995 static int setup_map_cpus(struct perf_sched *sched) 2996 { 2997 struct cpu_map *map; 2998 2999 sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); 3000 3001 if (sched->map.comp) { 3002 sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int)); 3003 if (!sched->map.comp_cpus) 3004 return -1; 3005 } 3006 3007 if (!sched->map.cpus_str) 3008 return 0; 3009 3010 map = cpu_map__new(sched->map.cpus_str); 3011 if (!map) { 3012 pr_err("failed to get cpus map from %s\n", sched->map.cpus_str); 3013 return -1; 3014 } 3015 3016 sched->map.cpus = map; 3017 return 0; 3018 } 3019 3020 static int setup_color_pids(struct perf_sched *sched) 3021 { 3022 struct thread_map *map; 3023 3024 if (!sched->map.color_pids_str) 3025 return 0; 3026 3027 map = thread_map__new_by_tid_str(sched->map.color_pids_str); 3028 if (!map) { 3029 pr_err("failed to get thread map from %s\n", sched->map.color_pids_str); 3030 return -1; 3031 } 3032 3033 sched->map.color_pids = map; 3034 return 0; 3035 } 3036 3037 static int setup_color_cpus(struct perf_sched *sched) 3038 { 3039 struct cpu_map *map; 3040 3041 if (!sched->map.color_cpus_str) 3042 return 0; 3043 3044 map = cpu_map__new(sched->map.color_cpus_str); 3045 if (!map) { 3046 pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str); 3047 return -1; 3048 } 3049 3050 sched->map.color_cpus = map; 3051 return 0; 3052 } 3053 3054 static int perf_sched__map(struct perf_sched *sched) 3055 { 3056 if (setup_map_cpus(sched)) 3057 return -1; 3058 3059 if (setup_color_pids(sched)) 3060 return -1; 3061 3062 if (setup_color_cpus(sched)) 3063 return -1; 3064 3065 setup_pager(); 3066 if (perf_sched__read_events(sched)) 3067 return -1; 3068 print_bad_events(sched); 3069 return 0; 3070 } 3071 3072 static int perf_sched__replay(struct perf_sched *sched) 3073 { 3074 unsigned long i; 3075 3076 calibrate_run_measurement_overhead(sched); 3077 calibrate_sleep_measurement_overhead(sched); 3078 3079 test_calibrations(sched); 3080 3081 if (perf_sched__read_events(sched)) 3082 return -1; 3083 3084 printf("nr_run_events: %ld\n", sched->nr_run_events); 3085 printf("nr_sleep_events: %ld\n", sched->nr_sleep_events); 3086 printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events); 3087 3088 if (sched->targetless_wakeups) 3089 printf("target-less wakeups: %ld\n", sched->targetless_wakeups); 3090 if (sched->multitarget_wakeups) 3091 printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups); 3092 if (sched->nr_run_events_optimized) 3093 printf("run atoms optimized: %ld\n", 3094 sched->nr_run_events_optimized); 3095 3096 print_task_traces(sched); 3097 add_cross_task_wakeups(sched); 3098 3099 create_tasks(sched); 3100 printf("------------------------------------------------------------\n"); 3101 for (i = 0; i < sched->replay_repeat; i++) 3102 run_one_test(sched); 3103 3104 return 0; 3105 } 3106 3107 static void setup_sorting(struct perf_sched *sched, const struct option *options, 3108 const char * const usage_msg[]) 3109 { 3110 char *tmp, *tok, *str = strdup(sched->sort_order); 3111 3112 for (tok = strtok_r(str, ", ", &tmp); 3113 tok; tok = strtok_r(NULL, ", ", &tmp)) { 3114 if (sort_dimension__add(tok, &sched->sort_list) < 0) { 3115 usage_with_options_msg(usage_msg, options, 3116 "Unknown --sort key: `%s'", tok); 3117 } 3118 } 3119 3120 free(str); 3121 3122 sort_dimension__add("pid", &sched->cmp_pid); 3123 } 3124 3125 static int __cmd_record(int argc, const char **argv) 3126 { 3127 unsigned int rec_argc, i, j; 3128 const char **rec_argv; 3129 const char * const record_args[] = { 3130 "record", 3131 "-a", 3132 "-R", 3133 "-m", "1024", 3134 "-c", "1", 3135 "-e", "sched:sched_switch", 3136 "-e", "sched:sched_stat_wait", 3137 "-e", "sched:sched_stat_sleep", 3138 "-e", "sched:sched_stat_iowait", 3139 "-e", "sched:sched_stat_runtime", 3140 "-e", "sched:sched_process_fork", 3141 "-e", "sched:sched_wakeup", 3142 "-e", "sched:sched_wakeup_new", 3143 "-e", "sched:sched_migrate_task", 3144 }; 3145 3146 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 3147 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 3148 3149 if (rec_argv == NULL) 3150 return -ENOMEM; 3151 3152 for (i = 0; i < ARRAY_SIZE(record_args); i++) 3153 rec_argv[i] = strdup(record_args[i]); 3154 3155 for (j = 1; j < (unsigned int)argc; j++, i++) 3156 rec_argv[i] = argv[j]; 3157 3158 BUG_ON(i != rec_argc); 3159 3160 return cmd_record(i, rec_argv, NULL); 3161 } 3162 3163 int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) 3164 { 3165 const char default_sort_order[] = "avg, max, switch, runtime"; 3166 struct perf_sched sched = { 3167 .tool = { 3168 .sample = perf_sched__process_tracepoint_sample, 3169 .comm = perf_event__process_comm, 3170 .lost = perf_event__process_lost, 3171 .fork = perf_sched__process_fork_event, 3172 .ordered_events = true, 3173 }, 3174 .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), 3175 .sort_list = LIST_HEAD_INIT(sched.sort_list), 3176 .start_work_mutex = PTHREAD_MUTEX_INITIALIZER, 3177 .work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER, 3178 .sort_order = default_sort_order, 3179 .replay_repeat = 10, 3180 .profile_cpu = -1, 3181 .next_shortname1 = 'A', 3182 .next_shortname2 = '0', 3183 .skip_merge = 0, 3184 .show_callchain = 1, 3185 .max_stack = 5, 3186 }; 3187 const struct option sched_options[] = { 3188 OPT_STRING('i', "input", &input_name, "file", 3189 "input file name"), 3190 OPT_INCR('v', "verbose", &verbose, 3191 "be more verbose (show symbol address, etc)"), 3192 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 3193 "dump raw trace in ASCII"), 3194 OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), 3195 OPT_END() 3196 }; 3197 const struct option latency_options[] = { 3198 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", 3199 "sort by key(s): runtime, switch, avg, max"), 3200 OPT_INTEGER('C', "CPU", &sched.profile_cpu, 3201 "CPU to profile on"), 3202 OPT_BOOLEAN('p', "pids", &sched.skip_merge, 3203 "latency stats per pid instead of per comm"), 3204 OPT_PARENT(sched_options) 3205 }; 3206 const struct option replay_options[] = { 3207 OPT_UINTEGER('r', "repeat", &sched.replay_repeat, 3208 "repeat the workload replay N times (-1: infinite)"), 3209 OPT_PARENT(sched_options) 3210 }; 3211 const struct option map_options[] = { 3212 OPT_BOOLEAN(0, "compact", &sched.map.comp, 3213 "map output in compact mode"), 3214 OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids", 3215 "highlight given pids in map"), 3216 OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus", 3217 "highlight given CPUs in map"), 3218 OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", 3219 "display given CPUs in map"), 3220 OPT_PARENT(sched_options) 3221 }; 3222 const struct option timehist_options[] = { 3223 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 3224 "file", "vmlinux pathname"), 3225 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 3226 "file", "kallsyms pathname"), 3227 OPT_BOOLEAN('g', "call-graph", &sched.show_callchain, 3228 "Display call chains if present (default on)"), 3229 OPT_UINTEGER(0, "max-stack", &sched.max_stack, 3230 "Maximum number of functions to display backtrace."), 3231 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 3232 "Look for files with symbols relative to this directory"), 3233 OPT_BOOLEAN('s', "summary", &sched.summary_only, 3234 "Show only syscall summary with statistics"), 3235 OPT_BOOLEAN('S', "with-summary", &sched.summary, 3236 "Show all syscalls and summary with statistics"), 3237 OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), 3238 OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), 3239 OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), 3240 OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), 3241 OPT_STRING(0, "time", &sched.time_str, "str", 3242 "Time span for analysis (start,stop)"), 3243 OPT_PARENT(sched_options) 3244 }; 3245 3246 const char * const latency_usage[] = { 3247 "perf sched latency [<options>]", 3248 NULL 3249 }; 3250 const char * const replay_usage[] = { 3251 "perf sched replay [<options>]", 3252 NULL 3253 }; 3254 const char * const map_usage[] = { 3255 "perf sched map [<options>]", 3256 NULL 3257 }; 3258 const char * const timehist_usage[] = { 3259 "perf sched timehist [<options>]", 3260 NULL 3261 }; 3262 const char *const sched_subcommands[] = { "record", "latency", "map", 3263 "replay", "script", 3264 "timehist", NULL }; 3265 const char *sched_usage[] = { 3266 NULL, 3267 NULL 3268 }; 3269 struct trace_sched_handler lat_ops = { 3270 .wakeup_event = latency_wakeup_event, 3271 .switch_event = latency_switch_event, 3272 .runtime_event = latency_runtime_event, 3273 .migrate_task_event = latency_migrate_task_event, 3274 }; 3275 struct trace_sched_handler map_ops = { 3276 .switch_event = map_switch_event, 3277 }; 3278 struct trace_sched_handler replay_ops = { 3279 .wakeup_event = replay_wakeup_event, 3280 .switch_event = replay_switch_event, 3281 .fork_event = replay_fork_event, 3282 }; 3283 unsigned int i; 3284 3285 for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++) 3286 sched.curr_pid[i] = -1; 3287 3288 argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands, 3289 sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); 3290 if (!argc) 3291 usage_with_options(sched_usage, sched_options); 3292 3293 /* 3294 * Aliased to 'perf script' for now: 3295 */ 3296 if (!strcmp(argv[0], "script")) 3297 return cmd_script(argc, argv, prefix); 3298 3299 if (!strncmp(argv[0], "rec", 3)) { 3300 return __cmd_record(argc, argv); 3301 } else if (!strncmp(argv[0], "lat", 3)) { 3302 sched.tp_handler = &lat_ops; 3303 if (argc > 1) { 3304 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 3305 if (argc) 3306 usage_with_options(latency_usage, latency_options); 3307 } 3308 setup_sorting(&sched, latency_options, latency_usage); 3309 return perf_sched__lat(&sched); 3310 } else if (!strcmp(argv[0], "map")) { 3311 if (argc) { 3312 argc = parse_options(argc, argv, map_options, map_usage, 0); 3313 if (argc) 3314 usage_with_options(map_usage, map_options); 3315 } 3316 sched.tp_handler = &map_ops; 3317 setup_sorting(&sched, latency_options, latency_usage); 3318 return perf_sched__map(&sched); 3319 } else if (!strncmp(argv[0], "rep", 3)) { 3320 sched.tp_handler = &replay_ops; 3321 if (argc) { 3322 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 3323 if (argc) 3324 usage_with_options(replay_usage, replay_options); 3325 } 3326 return perf_sched__replay(&sched); 3327 } else if (!strcmp(argv[0], "timehist")) { 3328 if (argc) { 3329 argc = parse_options(argc, argv, timehist_options, 3330 timehist_usage, 0); 3331 if (argc) 3332 usage_with_options(timehist_usage, timehist_options); 3333 } 3334 if (sched.show_wakeups && sched.summary_only) { 3335 pr_err(" Error: -s and -w are mutually exclusive.\n"); 3336 parse_options_usage(timehist_usage, timehist_options, "s", true); 3337 parse_options_usage(NULL, timehist_options, "w", true); 3338 return -EINVAL; 3339 } 3340 3341 return perf_sched__timehist(&sched); 3342 } else { 3343 usage_with_options(sched_usage, sched_options); 3344 } 3345 3346 return 0; 3347 } 3348