1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/evlist.h" 6 #include "util/cache.h" 7 #include "util/evsel.h" 8 #include "util/symbol.h" 9 #include "util/thread.h" 10 #include "util/header.h" 11 #include "util/session.h" 12 #include "util/tool.h" 13 #include "util/cloexec.h" 14 #include "util/thread_map.h" 15 #include "util/color.h" 16 #include "util/stat.h" 17 #include "util/callchain.h" 18 #include "util/time-utils.h" 19 20 #include <subcmd/parse-options.h> 21 #include "util/trace-event.h" 22 23 #include "util/debug.h" 24 25 #include <linux/log2.h> 26 #include <sys/prctl.h> 27 #include <sys/resource.h> 28 29 #include <semaphore.h> 30 #include <pthread.h> 31 #include <math.h> 32 #include <api/fs/fs.h> 33 #include <linux/time64.h> 34 35 #define PR_SET_NAME 15 /* Set process name */ 36 #define MAX_CPUS 4096 37 #define COMM_LEN 20 38 #define SYM_LEN 129 39 #define MAX_PID 1024000 40 41 struct sched_atom; 42 43 struct task_desc { 44 unsigned long nr; 45 unsigned long pid; 46 char comm[COMM_LEN]; 47 48 unsigned long nr_events; 49 unsigned long curr_event; 50 struct sched_atom **atoms; 51 52 pthread_t thread; 53 sem_t sleep_sem; 54 55 sem_t ready_for_work; 56 sem_t work_done_sem; 57 58 u64 cpu_usage; 59 }; 60 61 enum sched_event_type { 62 SCHED_EVENT_RUN, 63 SCHED_EVENT_SLEEP, 64 SCHED_EVENT_WAKEUP, 65 SCHED_EVENT_MIGRATION, 66 }; 67 68 struct sched_atom { 69 enum sched_event_type type; 70 int specific_wait; 71 u64 timestamp; 72 u64 duration; 73 unsigned long nr; 74 sem_t *wait_sem; 75 struct task_desc *wakee; 76 }; 77 78 #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP" 79 80 enum thread_state { 81 THREAD_SLEEPING = 0, 82 THREAD_WAIT_CPU, 83 THREAD_SCHED_IN, 84 THREAD_IGNORE 85 }; 86 87 struct work_atom { 88 struct list_head list; 89 enum thread_state state; 90 u64 sched_out_time; 91 u64 wake_up_time; 92 u64 sched_in_time; 93 u64 runtime; 94 }; 95 96 struct work_atoms { 97 struct list_head work_list; 98 struct thread *thread; 99 struct rb_node node; 100 u64 max_lat; 101 u64 max_lat_at; 102 u64 total_lat; 103 u64 nb_atoms; 104 u64 total_runtime; 105 int num_merged; 106 }; 107 108 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 109 110 struct perf_sched; 111 112 struct trace_sched_handler { 113 int (*switch_event)(struct perf_sched *sched, struct perf_evsel *evsel, 114 struct perf_sample *sample, struct machine *machine); 115 116 int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel, 117 struct perf_sample *sample, struct machine *machine); 118 119 int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel, 120 struct perf_sample *sample, struct machine *machine); 121 122 /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */ 123 int (*fork_event)(struct perf_sched *sched, union perf_event *event, 124 struct machine *machine); 125 126 int (*migrate_task_event)(struct perf_sched *sched, 127 struct perf_evsel *evsel, 128 struct perf_sample *sample, 129 struct machine *machine); 130 }; 131 132 #define COLOR_PIDS PERF_COLOR_BLUE 133 #define COLOR_CPUS PERF_COLOR_BG_RED 134 135 struct perf_sched_map { 136 DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); 137 int *comp_cpus; 138 bool comp; 139 struct thread_map *color_pids; 140 const char *color_pids_str; 141 struct cpu_map *color_cpus; 142 const char *color_cpus_str; 143 struct cpu_map *cpus; 144 const char *cpus_str; 145 }; 146 147 struct perf_sched { 148 struct perf_tool tool; 149 const char *sort_order; 150 unsigned long nr_tasks; 151 struct task_desc **pid_to_task; 152 struct task_desc **tasks; 153 const struct trace_sched_handler *tp_handler; 154 pthread_mutex_t start_work_mutex; 155 pthread_mutex_t work_done_wait_mutex; 156 int profile_cpu; 157 /* 158 * Track the current task - that way we can know whether there's any 159 * weird events, such as a task being switched away that is not current. 160 */ 161 int max_cpu; 162 u32 curr_pid[MAX_CPUS]; 163 struct thread *curr_thread[MAX_CPUS]; 164 char next_shortname1; 165 char next_shortname2; 166 unsigned int replay_repeat; 167 unsigned long nr_run_events; 168 unsigned long nr_sleep_events; 169 unsigned long nr_wakeup_events; 170 unsigned long nr_sleep_corrections; 171 unsigned long nr_run_events_optimized; 172 unsigned long targetless_wakeups; 173 unsigned long multitarget_wakeups; 174 unsigned long nr_runs; 175 unsigned long nr_timestamps; 176 unsigned long nr_unordered_timestamps; 177 unsigned long nr_context_switch_bugs; 178 unsigned long nr_events; 179 unsigned long nr_lost_chunks; 180 unsigned long nr_lost_events; 181 u64 run_measurement_overhead; 182 u64 sleep_measurement_overhead; 183 u64 start_time; 184 u64 cpu_usage; 185 u64 runavg_cpu_usage; 186 u64 parent_cpu_usage; 187 u64 runavg_parent_cpu_usage; 188 u64 sum_runtime; 189 u64 sum_fluct; 190 u64 run_avg; 191 u64 all_runtime; 192 u64 all_count; 193 u64 cpu_last_switched[MAX_CPUS]; 194 struct rb_root atom_root, sorted_atom_root, merged_atom_root; 195 struct list_head sort_list, cmp_pid; 196 bool force; 197 bool skip_merge; 198 struct perf_sched_map map; 199 200 /* options for timehist command */ 201 bool summary; 202 bool summary_only; 203 bool idle_hist; 204 bool show_callchain; 205 unsigned int max_stack; 206 bool show_cpu_visual; 207 bool show_wakeups; 208 bool show_migrations; 209 u64 skipped_samples; 210 const char *time_str; 211 struct perf_time_interval ptime; 212 }; 213 214 /* per thread run time data */ 215 struct thread_runtime { 216 u64 last_time; /* time of previous sched in/out event */ 217 u64 dt_run; /* run time */ 218 u64 dt_wait; /* time between CPU access (off cpu) */ 219 u64 dt_delay; /* time between wakeup and sched-in */ 220 u64 ready_to_run; /* time of wakeup */ 221 222 struct stats run_stats; 223 u64 total_run_time; 224 225 u64 migrations; 226 }; 227 228 /* per event run time data */ 229 struct evsel_runtime { 230 u64 *last_time; /* time this event was last seen per cpu */ 231 u32 ncpu; /* highest cpu slot allocated */ 232 }; 233 234 /* per cpu idle time data */ 235 struct idle_thread_runtime { 236 struct thread_runtime tr; 237 struct thread *last_thread; 238 struct rb_root sorted_root; 239 struct callchain_root callchain; 240 struct callchain_cursor cursor; 241 }; 242 243 /* track idle times per cpu */ 244 static struct thread **idle_threads; 245 static int idle_max_cpu; 246 static char idle_comm[] = "<idle>"; 247 248 static u64 get_nsecs(void) 249 { 250 struct timespec ts; 251 252 clock_gettime(CLOCK_MONOTONIC, &ts); 253 254 return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; 255 } 256 257 static void burn_nsecs(struct perf_sched *sched, u64 nsecs) 258 { 259 u64 T0 = get_nsecs(), T1; 260 261 do { 262 T1 = get_nsecs(); 263 } while (T1 + sched->run_measurement_overhead < T0 + nsecs); 264 } 265 266 static void sleep_nsecs(u64 nsecs) 267 { 268 struct timespec ts; 269 270 ts.tv_nsec = nsecs % 999999999; 271 ts.tv_sec = nsecs / 999999999; 272 273 nanosleep(&ts, NULL); 274 } 275 276 static void calibrate_run_measurement_overhead(struct perf_sched *sched) 277 { 278 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 279 int i; 280 281 for (i = 0; i < 10; i++) { 282 T0 = get_nsecs(); 283 burn_nsecs(sched, 0); 284 T1 = get_nsecs(); 285 delta = T1-T0; 286 min_delta = min(min_delta, delta); 287 } 288 sched->run_measurement_overhead = min_delta; 289 290 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 291 } 292 293 static void calibrate_sleep_measurement_overhead(struct perf_sched *sched) 294 { 295 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 296 int i; 297 298 for (i = 0; i < 10; i++) { 299 T0 = get_nsecs(); 300 sleep_nsecs(10000); 301 T1 = get_nsecs(); 302 delta = T1-T0; 303 min_delta = min(min_delta, delta); 304 } 305 min_delta -= 10000; 306 sched->sleep_measurement_overhead = min_delta; 307 308 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 309 } 310 311 static struct sched_atom * 312 get_new_event(struct task_desc *task, u64 timestamp) 313 { 314 struct sched_atom *event = zalloc(sizeof(*event)); 315 unsigned long idx = task->nr_events; 316 size_t size; 317 318 event->timestamp = timestamp; 319 event->nr = idx; 320 321 task->nr_events++; 322 size = sizeof(struct sched_atom *) * task->nr_events; 323 task->atoms = realloc(task->atoms, size); 324 BUG_ON(!task->atoms); 325 326 task->atoms[idx] = event; 327 328 return event; 329 } 330 331 static struct sched_atom *last_event(struct task_desc *task) 332 { 333 if (!task->nr_events) 334 return NULL; 335 336 return task->atoms[task->nr_events - 1]; 337 } 338 339 static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task, 340 u64 timestamp, u64 duration) 341 { 342 struct sched_atom *event, *curr_event = last_event(task); 343 344 /* 345 * optimize an existing RUN event by merging this one 346 * to it: 347 */ 348 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 349 sched->nr_run_events_optimized++; 350 curr_event->duration += duration; 351 return; 352 } 353 354 event = get_new_event(task, timestamp); 355 356 event->type = SCHED_EVENT_RUN; 357 event->duration = duration; 358 359 sched->nr_run_events++; 360 } 361 362 static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task, 363 u64 timestamp, struct task_desc *wakee) 364 { 365 struct sched_atom *event, *wakee_event; 366 367 event = get_new_event(task, timestamp); 368 event->type = SCHED_EVENT_WAKEUP; 369 event->wakee = wakee; 370 371 wakee_event = last_event(wakee); 372 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 373 sched->targetless_wakeups++; 374 return; 375 } 376 if (wakee_event->wait_sem) { 377 sched->multitarget_wakeups++; 378 return; 379 } 380 381 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 382 sem_init(wakee_event->wait_sem, 0, 0); 383 wakee_event->specific_wait = 1; 384 event->wait_sem = wakee_event->wait_sem; 385 386 sched->nr_wakeup_events++; 387 } 388 389 static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, 390 u64 timestamp, u64 task_state __maybe_unused) 391 { 392 struct sched_atom *event = get_new_event(task, timestamp); 393 394 event->type = SCHED_EVENT_SLEEP; 395 396 sched->nr_sleep_events++; 397 } 398 399 static struct task_desc *register_pid(struct perf_sched *sched, 400 unsigned long pid, const char *comm) 401 { 402 struct task_desc *task; 403 static int pid_max; 404 405 if (sched->pid_to_task == NULL) { 406 if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) 407 pid_max = MAX_PID; 408 BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); 409 } 410 if (pid >= (unsigned long)pid_max) { 411 BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) * 412 sizeof(struct task_desc *))) == NULL); 413 while (pid >= (unsigned long)pid_max) 414 sched->pid_to_task[pid_max++] = NULL; 415 } 416 417 task = sched->pid_to_task[pid]; 418 419 if (task) 420 return task; 421 422 task = zalloc(sizeof(*task)); 423 task->pid = pid; 424 task->nr = sched->nr_tasks; 425 strcpy(task->comm, comm); 426 /* 427 * every task starts in sleeping state - this gets ignored 428 * if there's no wakeup pointing to this sleep state: 429 */ 430 add_sched_event_sleep(sched, task, 0, 0); 431 432 sched->pid_to_task[pid] = task; 433 sched->nr_tasks++; 434 sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *)); 435 BUG_ON(!sched->tasks); 436 sched->tasks[task->nr] = task; 437 438 if (verbose) 439 printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm); 440 441 return task; 442 } 443 444 445 static void print_task_traces(struct perf_sched *sched) 446 { 447 struct task_desc *task; 448 unsigned long i; 449 450 for (i = 0; i < sched->nr_tasks; i++) { 451 task = sched->tasks[i]; 452 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 453 task->nr, task->comm, task->pid, task->nr_events); 454 } 455 } 456 457 static void add_cross_task_wakeups(struct perf_sched *sched) 458 { 459 struct task_desc *task1, *task2; 460 unsigned long i, j; 461 462 for (i = 0; i < sched->nr_tasks; i++) { 463 task1 = sched->tasks[i]; 464 j = i + 1; 465 if (j == sched->nr_tasks) 466 j = 0; 467 task2 = sched->tasks[j]; 468 add_sched_event_wakeup(sched, task1, 0, task2); 469 } 470 } 471 472 static void perf_sched__process_event(struct perf_sched *sched, 473 struct sched_atom *atom) 474 { 475 int ret = 0; 476 477 switch (atom->type) { 478 case SCHED_EVENT_RUN: 479 burn_nsecs(sched, atom->duration); 480 break; 481 case SCHED_EVENT_SLEEP: 482 if (atom->wait_sem) 483 ret = sem_wait(atom->wait_sem); 484 BUG_ON(ret); 485 break; 486 case SCHED_EVENT_WAKEUP: 487 if (atom->wait_sem) 488 ret = sem_post(atom->wait_sem); 489 BUG_ON(ret); 490 break; 491 case SCHED_EVENT_MIGRATION: 492 break; 493 default: 494 BUG_ON(1); 495 } 496 } 497 498 static u64 get_cpu_usage_nsec_parent(void) 499 { 500 struct rusage ru; 501 u64 sum; 502 int err; 503 504 err = getrusage(RUSAGE_SELF, &ru); 505 BUG_ON(err); 506 507 sum = ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC; 508 sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC; 509 510 return sum; 511 } 512 513 static int self_open_counters(struct perf_sched *sched, unsigned long cur_task) 514 { 515 struct perf_event_attr attr; 516 char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE]; 517 int fd; 518 struct rlimit limit; 519 bool need_privilege = false; 520 521 memset(&attr, 0, sizeof(attr)); 522 523 attr.type = PERF_TYPE_SOFTWARE; 524 attr.config = PERF_COUNT_SW_TASK_CLOCK; 525 526 force_again: 527 fd = sys_perf_event_open(&attr, 0, -1, -1, 528 perf_event_open_cloexec_flag()); 529 530 if (fd < 0) { 531 if (errno == EMFILE) { 532 if (sched->force) { 533 BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1); 534 limit.rlim_cur += sched->nr_tasks - cur_task; 535 if (limit.rlim_cur > limit.rlim_max) { 536 limit.rlim_max = limit.rlim_cur; 537 need_privilege = true; 538 } 539 if (setrlimit(RLIMIT_NOFILE, &limit) == -1) { 540 if (need_privilege && errno == EPERM) 541 strcpy(info, "Need privilege\n"); 542 } else 543 goto force_again; 544 } else 545 strcpy(info, "Have a try with -f option\n"); 546 } 547 pr_err("Error: sys_perf_event_open() syscall returned " 548 "with %d (%s)\n%s", fd, 549 str_error_r(errno, sbuf, sizeof(sbuf)), info); 550 exit(EXIT_FAILURE); 551 } 552 return fd; 553 } 554 555 static u64 get_cpu_usage_nsec_self(int fd) 556 { 557 u64 runtime; 558 int ret; 559 560 ret = read(fd, &runtime, sizeof(runtime)); 561 BUG_ON(ret != sizeof(runtime)); 562 563 return runtime; 564 } 565 566 struct sched_thread_parms { 567 struct task_desc *task; 568 struct perf_sched *sched; 569 int fd; 570 }; 571 572 static void *thread_func(void *ctx) 573 { 574 struct sched_thread_parms *parms = ctx; 575 struct task_desc *this_task = parms->task; 576 struct perf_sched *sched = parms->sched; 577 u64 cpu_usage_0, cpu_usage_1; 578 unsigned long i, ret; 579 char comm2[22]; 580 int fd = parms->fd; 581 582 zfree(&parms); 583 584 sprintf(comm2, ":%s", this_task->comm); 585 prctl(PR_SET_NAME, comm2); 586 if (fd < 0) 587 return NULL; 588 again: 589 ret = sem_post(&this_task->ready_for_work); 590 BUG_ON(ret); 591 ret = pthread_mutex_lock(&sched->start_work_mutex); 592 BUG_ON(ret); 593 ret = pthread_mutex_unlock(&sched->start_work_mutex); 594 BUG_ON(ret); 595 596 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 597 598 for (i = 0; i < this_task->nr_events; i++) { 599 this_task->curr_event = i; 600 perf_sched__process_event(sched, this_task->atoms[i]); 601 } 602 603 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 604 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 605 ret = sem_post(&this_task->work_done_sem); 606 BUG_ON(ret); 607 608 ret = pthread_mutex_lock(&sched->work_done_wait_mutex); 609 BUG_ON(ret); 610 ret = pthread_mutex_unlock(&sched->work_done_wait_mutex); 611 BUG_ON(ret); 612 613 goto again; 614 } 615 616 static void create_tasks(struct perf_sched *sched) 617 { 618 struct task_desc *task; 619 pthread_attr_t attr; 620 unsigned long i; 621 int err; 622 623 err = pthread_attr_init(&attr); 624 BUG_ON(err); 625 err = pthread_attr_setstacksize(&attr, 626 (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); 627 BUG_ON(err); 628 err = pthread_mutex_lock(&sched->start_work_mutex); 629 BUG_ON(err); 630 err = pthread_mutex_lock(&sched->work_done_wait_mutex); 631 BUG_ON(err); 632 for (i = 0; i < sched->nr_tasks; i++) { 633 struct sched_thread_parms *parms = malloc(sizeof(*parms)); 634 BUG_ON(parms == NULL); 635 parms->task = task = sched->tasks[i]; 636 parms->sched = sched; 637 parms->fd = self_open_counters(sched, i); 638 sem_init(&task->sleep_sem, 0, 0); 639 sem_init(&task->ready_for_work, 0, 0); 640 sem_init(&task->work_done_sem, 0, 0); 641 task->curr_event = 0; 642 err = pthread_create(&task->thread, &attr, thread_func, parms); 643 BUG_ON(err); 644 } 645 } 646 647 static void wait_for_tasks(struct perf_sched *sched) 648 { 649 u64 cpu_usage_0, cpu_usage_1; 650 struct task_desc *task; 651 unsigned long i, ret; 652 653 sched->start_time = get_nsecs(); 654 sched->cpu_usage = 0; 655 pthread_mutex_unlock(&sched->work_done_wait_mutex); 656 657 for (i = 0; i < sched->nr_tasks; i++) { 658 task = sched->tasks[i]; 659 ret = sem_wait(&task->ready_for_work); 660 BUG_ON(ret); 661 sem_init(&task->ready_for_work, 0, 0); 662 } 663 ret = pthread_mutex_lock(&sched->work_done_wait_mutex); 664 BUG_ON(ret); 665 666 cpu_usage_0 = get_cpu_usage_nsec_parent(); 667 668 pthread_mutex_unlock(&sched->start_work_mutex); 669 670 for (i = 0; i < sched->nr_tasks; i++) { 671 task = sched->tasks[i]; 672 ret = sem_wait(&task->work_done_sem); 673 BUG_ON(ret); 674 sem_init(&task->work_done_sem, 0, 0); 675 sched->cpu_usage += task->cpu_usage; 676 task->cpu_usage = 0; 677 } 678 679 cpu_usage_1 = get_cpu_usage_nsec_parent(); 680 if (!sched->runavg_cpu_usage) 681 sched->runavg_cpu_usage = sched->cpu_usage; 682 sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat; 683 684 sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 685 if (!sched->runavg_parent_cpu_usage) 686 sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; 687 sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) + 688 sched->parent_cpu_usage)/sched->replay_repeat; 689 690 ret = pthread_mutex_lock(&sched->start_work_mutex); 691 BUG_ON(ret); 692 693 for (i = 0; i < sched->nr_tasks; i++) { 694 task = sched->tasks[i]; 695 sem_init(&task->sleep_sem, 0, 0); 696 task->curr_event = 0; 697 } 698 } 699 700 static void run_one_test(struct perf_sched *sched) 701 { 702 u64 T0, T1, delta, avg_delta, fluct; 703 704 T0 = get_nsecs(); 705 wait_for_tasks(sched); 706 T1 = get_nsecs(); 707 708 delta = T1 - T0; 709 sched->sum_runtime += delta; 710 sched->nr_runs++; 711 712 avg_delta = sched->sum_runtime / sched->nr_runs; 713 if (delta < avg_delta) 714 fluct = avg_delta - delta; 715 else 716 fluct = delta - avg_delta; 717 sched->sum_fluct += fluct; 718 if (!sched->run_avg) 719 sched->run_avg = delta; 720 sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat; 721 722 printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC); 723 724 printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC); 725 726 printf("cpu: %0.2f / %0.2f", 727 (double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC); 728 729 #if 0 730 /* 731 * rusage statistics done by the parent, these are less 732 * accurate than the sched->sum_exec_runtime based statistics: 733 */ 734 printf(" [%0.2f / %0.2f]", 735 (double)sched->parent_cpu_usage / NSEC_PER_MSEC, 736 (double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC); 737 #endif 738 739 printf("\n"); 740 741 if (sched->nr_sleep_corrections) 742 printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections); 743 sched->nr_sleep_corrections = 0; 744 } 745 746 static void test_calibrations(struct perf_sched *sched) 747 { 748 u64 T0, T1; 749 750 T0 = get_nsecs(); 751 burn_nsecs(sched, NSEC_PER_MSEC); 752 T1 = get_nsecs(); 753 754 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 755 756 T0 = get_nsecs(); 757 sleep_nsecs(NSEC_PER_MSEC); 758 T1 = get_nsecs(); 759 760 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 761 } 762 763 static int 764 replay_wakeup_event(struct perf_sched *sched, 765 struct perf_evsel *evsel, struct perf_sample *sample, 766 struct machine *machine __maybe_unused) 767 { 768 const char *comm = perf_evsel__strval(evsel, sample, "comm"); 769 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 770 struct task_desc *waker, *wakee; 771 772 if (verbose) { 773 printf("sched_wakeup event %p\n", evsel); 774 775 printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid); 776 } 777 778 waker = register_pid(sched, sample->tid, "<unknown>"); 779 wakee = register_pid(sched, pid, comm); 780 781 add_sched_event_wakeup(sched, waker, sample->time, wakee); 782 return 0; 783 } 784 785 static int replay_switch_event(struct perf_sched *sched, 786 struct perf_evsel *evsel, 787 struct perf_sample *sample, 788 struct machine *machine __maybe_unused) 789 { 790 const char *prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"), 791 *next_comm = perf_evsel__strval(evsel, sample, "next_comm"); 792 const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), 793 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 794 const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); 795 struct task_desc *prev, __maybe_unused *next; 796 u64 timestamp0, timestamp = sample->time; 797 int cpu = sample->cpu; 798 s64 delta; 799 800 if (verbose) 801 printf("sched_switch event %p\n", evsel); 802 803 if (cpu >= MAX_CPUS || cpu < 0) 804 return 0; 805 806 timestamp0 = sched->cpu_last_switched[cpu]; 807 if (timestamp0) 808 delta = timestamp - timestamp0; 809 else 810 delta = 0; 811 812 if (delta < 0) { 813 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 814 return -1; 815 } 816 817 pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 818 prev_comm, prev_pid, next_comm, next_pid, delta); 819 820 prev = register_pid(sched, prev_pid, prev_comm); 821 next = register_pid(sched, next_pid, next_comm); 822 823 sched->cpu_last_switched[cpu] = timestamp; 824 825 add_sched_event_run(sched, prev, timestamp, delta); 826 add_sched_event_sleep(sched, prev, timestamp, prev_state); 827 828 return 0; 829 } 830 831 static int replay_fork_event(struct perf_sched *sched, 832 union perf_event *event, 833 struct machine *machine) 834 { 835 struct thread *child, *parent; 836 837 child = machine__findnew_thread(machine, event->fork.pid, 838 event->fork.tid); 839 parent = machine__findnew_thread(machine, event->fork.ppid, 840 event->fork.ptid); 841 842 if (child == NULL || parent == NULL) { 843 pr_debug("thread does not exist on fork event: child %p, parent %p\n", 844 child, parent); 845 goto out_put; 846 } 847 848 if (verbose) { 849 printf("fork event\n"); 850 printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid); 851 printf("... child: %s/%d\n", thread__comm_str(child), child->tid); 852 } 853 854 register_pid(sched, parent->tid, thread__comm_str(parent)); 855 register_pid(sched, child->tid, thread__comm_str(child)); 856 out_put: 857 thread__put(child); 858 thread__put(parent); 859 return 0; 860 } 861 862 struct sort_dimension { 863 const char *name; 864 sort_fn_t cmp; 865 struct list_head list; 866 }; 867 868 static int 869 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 870 { 871 struct sort_dimension *sort; 872 int ret = 0; 873 874 BUG_ON(list_empty(list)); 875 876 list_for_each_entry(sort, list, list) { 877 ret = sort->cmp(l, r); 878 if (ret) 879 return ret; 880 } 881 882 return ret; 883 } 884 885 static struct work_atoms * 886 thread_atoms_search(struct rb_root *root, struct thread *thread, 887 struct list_head *sort_list) 888 { 889 struct rb_node *node = root->rb_node; 890 struct work_atoms key = { .thread = thread }; 891 892 while (node) { 893 struct work_atoms *atoms; 894 int cmp; 895 896 atoms = container_of(node, struct work_atoms, node); 897 898 cmp = thread_lat_cmp(sort_list, &key, atoms); 899 if (cmp > 0) 900 node = node->rb_left; 901 else if (cmp < 0) 902 node = node->rb_right; 903 else { 904 BUG_ON(thread != atoms->thread); 905 return atoms; 906 } 907 } 908 return NULL; 909 } 910 911 static void 912 __thread_latency_insert(struct rb_root *root, struct work_atoms *data, 913 struct list_head *sort_list) 914 { 915 struct rb_node **new = &(root->rb_node), *parent = NULL; 916 917 while (*new) { 918 struct work_atoms *this; 919 int cmp; 920 921 this = container_of(*new, struct work_atoms, node); 922 parent = *new; 923 924 cmp = thread_lat_cmp(sort_list, data, this); 925 926 if (cmp > 0) 927 new = &((*new)->rb_left); 928 else 929 new = &((*new)->rb_right); 930 } 931 932 rb_link_node(&data->node, parent, new); 933 rb_insert_color(&data->node, root); 934 } 935 936 static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) 937 { 938 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 939 if (!atoms) { 940 pr_err("No memory at %s\n", __func__); 941 return -1; 942 } 943 944 atoms->thread = thread__get(thread); 945 INIT_LIST_HEAD(&atoms->work_list); 946 __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); 947 return 0; 948 } 949 950 static char sched_out_state(u64 prev_state) 951 { 952 const char *str = TASK_STATE_TO_CHAR_STR; 953 954 return str[prev_state]; 955 } 956 957 static int 958 add_sched_out_event(struct work_atoms *atoms, 959 char run_state, 960 u64 timestamp) 961 { 962 struct work_atom *atom = zalloc(sizeof(*atom)); 963 if (!atom) { 964 pr_err("Non memory at %s", __func__); 965 return -1; 966 } 967 968 atom->sched_out_time = timestamp; 969 970 if (run_state == 'R') { 971 atom->state = THREAD_WAIT_CPU; 972 atom->wake_up_time = atom->sched_out_time; 973 } 974 975 list_add_tail(&atom->list, &atoms->work_list); 976 return 0; 977 } 978 979 static void 980 add_runtime_event(struct work_atoms *atoms, u64 delta, 981 u64 timestamp __maybe_unused) 982 { 983 struct work_atom *atom; 984 985 BUG_ON(list_empty(&atoms->work_list)); 986 987 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 988 989 atom->runtime += delta; 990 atoms->total_runtime += delta; 991 } 992 993 static void 994 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 995 { 996 struct work_atom *atom; 997 u64 delta; 998 999 if (list_empty(&atoms->work_list)) 1000 return; 1001 1002 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1003 1004 if (atom->state != THREAD_WAIT_CPU) 1005 return; 1006 1007 if (timestamp < atom->wake_up_time) { 1008 atom->state = THREAD_IGNORE; 1009 return; 1010 } 1011 1012 atom->state = THREAD_SCHED_IN; 1013 atom->sched_in_time = timestamp; 1014 1015 delta = atom->sched_in_time - atom->wake_up_time; 1016 atoms->total_lat += delta; 1017 if (delta > atoms->max_lat) { 1018 atoms->max_lat = delta; 1019 atoms->max_lat_at = timestamp; 1020 } 1021 atoms->nb_atoms++; 1022 } 1023 1024 static int latency_switch_event(struct perf_sched *sched, 1025 struct perf_evsel *evsel, 1026 struct perf_sample *sample, 1027 struct machine *machine) 1028 { 1029 const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), 1030 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1031 const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); 1032 struct work_atoms *out_events, *in_events; 1033 struct thread *sched_out, *sched_in; 1034 u64 timestamp0, timestamp = sample->time; 1035 int cpu = sample->cpu, err = -1; 1036 s64 delta; 1037 1038 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1039 1040 timestamp0 = sched->cpu_last_switched[cpu]; 1041 sched->cpu_last_switched[cpu] = timestamp; 1042 if (timestamp0) 1043 delta = timestamp - timestamp0; 1044 else 1045 delta = 0; 1046 1047 if (delta < 0) { 1048 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1049 return -1; 1050 } 1051 1052 sched_out = machine__findnew_thread(machine, -1, prev_pid); 1053 sched_in = machine__findnew_thread(machine, -1, next_pid); 1054 if (sched_out == NULL || sched_in == NULL) 1055 goto out_put; 1056 1057 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1058 if (!out_events) { 1059 if (thread_atoms_insert(sched, sched_out)) 1060 goto out_put; 1061 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1062 if (!out_events) { 1063 pr_err("out-event: Internal tree error"); 1064 goto out_put; 1065 } 1066 } 1067 if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp)) 1068 return -1; 1069 1070 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1071 if (!in_events) { 1072 if (thread_atoms_insert(sched, sched_in)) 1073 goto out_put; 1074 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1075 if (!in_events) { 1076 pr_err("in-event: Internal tree error"); 1077 goto out_put; 1078 } 1079 /* 1080 * Take came in we have not heard about yet, 1081 * add in an initial atom in runnable state: 1082 */ 1083 if (add_sched_out_event(in_events, 'R', timestamp)) 1084 goto out_put; 1085 } 1086 add_sched_in_event(in_events, timestamp); 1087 err = 0; 1088 out_put: 1089 thread__put(sched_out); 1090 thread__put(sched_in); 1091 return err; 1092 } 1093 1094 static int latency_runtime_event(struct perf_sched *sched, 1095 struct perf_evsel *evsel, 1096 struct perf_sample *sample, 1097 struct machine *machine) 1098 { 1099 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 1100 const u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1101 struct thread *thread = machine__findnew_thread(machine, -1, pid); 1102 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1103 u64 timestamp = sample->time; 1104 int cpu = sample->cpu, err = -1; 1105 1106 if (thread == NULL) 1107 return -1; 1108 1109 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1110 if (!atoms) { 1111 if (thread_atoms_insert(sched, thread)) 1112 goto out_put; 1113 atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1114 if (!atoms) { 1115 pr_err("in-event: Internal tree error"); 1116 goto out_put; 1117 } 1118 if (add_sched_out_event(atoms, 'R', timestamp)) 1119 goto out_put; 1120 } 1121 1122 add_runtime_event(atoms, runtime, timestamp); 1123 err = 0; 1124 out_put: 1125 thread__put(thread); 1126 return err; 1127 } 1128 1129 static int latency_wakeup_event(struct perf_sched *sched, 1130 struct perf_evsel *evsel, 1131 struct perf_sample *sample, 1132 struct machine *machine) 1133 { 1134 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 1135 struct work_atoms *atoms; 1136 struct work_atom *atom; 1137 struct thread *wakee; 1138 u64 timestamp = sample->time; 1139 int err = -1; 1140 1141 wakee = machine__findnew_thread(machine, -1, pid); 1142 if (wakee == NULL) 1143 return -1; 1144 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1145 if (!atoms) { 1146 if (thread_atoms_insert(sched, wakee)) 1147 goto out_put; 1148 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1149 if (!atoms) { 1150 pr_err("wakeup-event: Internal tree error"); 1151 goto out_put; 1152 } 1153 if (add_sched_out_event(atoms, 'S', timestamp)) 1154 goto out_put; 1155 } 1156 1157 BUG_ON(list_empty(&atoms->work_list)); 1158 1159 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1160 1161 /* 1162 * As we do not guarantee the wakeup event happens when 1163 * task is out of run queue, also may happen when task is 1164 * on run queue and wakeup only change ->state to TASK_RUNNING, 1165 * then we should not set the ->wake_up_time when wake up a 1166 * task which is on run queue. 1167 * 1168 * You WILL be missing events if you've recorded only 1169 * one CPU, or are only looking at only one, so don't 1170 * skip in this case. 1171 */ 1172 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1173 goto out_ok; 1174 1175 sched->nr_timestamps++; 1176 if (atom->sched_out_time > timestamp) { 1177 sched->nr_unordered_timestamps++; 1178 goto out_ok; 1179 } 1180 1181 atom->state = THREAD_WAIT_CPU; 1182 atom->wake_up_time = timestamp; 1183 out_ok: 1184 err = 0; 1185 out_put: 1186 thread__put(wakee); 1187 return err; 1188 } 1189 1190 static int latency_migrate_task_event(struct perf_sched *sched, 1191 struct perf_evsel *evsel, 1192 struct perf_sample *sample, 1193 struct machine *machine) 1194 { 1195 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 1196 u64 timestamp = sample->time; 1197 struct work_atoms *atoms; 1198 struct work_atom *atom; 1199 struct thread *migrant; 1200 int err = -1; 1201 1202 /* 1203 * Only need to worry about migration when profiling one CPU. 1204 */ 1205 if (sched->profile_cpu == -1) 1206 return 0; 1207 1208 migrant = machine__findnew_thread(machine, -1, pid); 1209 if (migrant == NULL) 1210 return -1; 1211 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1212 if (!atoms) { 1213 if (thread_atoms_insert(sched, migrant)) 1214 goto out_put; 1215 register_pid(sched, migrant->tid, thread__comm_str(migrant)); 1216 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1217 if (!atoms) { 1218 pr_err("migration-event: Internal tree error"); 1219 goto out_put; 1220 } 1221 if (add_sched_out_event(atoms, 'R', timestamp)) 1222 goto out_put; 1223 } 1224 1225 BUG_ON(list_empty(&atoms->work_list)); 1226 1227 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1228 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1229 1230 sched->nr_timestamps++; 1231 1232 if (atom->sched_out_time > timestamp) 1233 sched->nr_unordered_timestamps++; 1234 err = 0; 1235 out_put: 1236 thread__put(migrant); 1237 return err; 1238 } 1239 1240 static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) 1241 { 1242 int i; 1243 int ret; 1244 u64 avg; 1245 char max_lat_at[32]; 1246 1247 if (!work_list->nb_atoms) 1248 return; 1249 /* 1250 * Ignore idle threads: 1251 */ 1252 if (!strcmp(thread__comm_str(work_list->thread), "swapper")) 1253 return; 1254 1255 sched->all_runtime += work_list->total_runtime; 1256 sched->all_count += work_list->nb_atoms; 1257 1258 if (work_list->num_merged > 1) 1259 ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged); 1260 else 1261 ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid); 1262 1263 for (i = 0; i < 24 - ret; i++) 1264 printf(" "); 1265 1266 avg = work_list->total_lat / work_list->nb_atoms; 1267 timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at)); 1268 1269 printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n", 1270 (double)work_list->total_runtime / NSEC_PER_MSEC, 1271 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, 1272 (double)work_list->max_lat / NSEC_PER_MSEC, 1273 max_lat_at); 1274 } 1275 1276 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1277 { 1278 if (l->thread == r->thread) 1279 return 0; 1280 if (l->thread->tid < r->thread->tid) 1281 return -1; 1282 if (l->thread->tid > r->thread->tid) 1283 return 1; 1284 return (int)(l->thread - r->thread); 1285 } 1286 1287 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1288 { 1289 u64 avgl, avgr; 1290 1291 if (!l->nb_atoms) 1292 return -1; 1293 1294 if (!r->nb_atoms) 1295 return 1; 1296 1297 avgl = l->total_lat / l->nb_atoms; 1298 avgr = r->total_lat / r->nb_atoms; 1299 1300 if (avgl < avgr) 1301 return -1; 1302 if (avgl > avgr) 1303 return 1; 1304 1305 return 0; 1306 } 1307 1308 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1309 { 1310 if (l->max_lat < r->max_lat) 1311 return -1; 1312 if (l->max_lat > r->max_lat) 1313 return 1; 1314 1315 return 0; 1316 } 1317 1318 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1319 { 1320 if (l->nb_atoms < r->nb_atoms) 1321 return -1; 1322 if (l->nb_atoms > r->nb_atoms) 1323 return 1; 1324 1325 return 0; 1326 } 1327 1328 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1329 { 1330 if (l->total_runtime < r->total_runtime) 1331 return -1; 1332 if (l->total_runtime > r->total_runtime) 1333 return 1; 1334 1335 return 0; 1336 } 1337 1338 static int sort_dimension__add(const char *tok, struct list_head *list) 1339 { 1340 size_t i; 1341 static struct sort_dimension avg_sort_dimension = { 1342 .name = "avg", 1343 .cmp = avg_cmp, 1344 }; 1345 static struct sort_dimension max_sort_dimension = { 1346 .name = "max", 1347 .cmp = max_cmp, 1348 }; 1349 static struct sort_dimension pid_sort_dimension = { 1350 .name = "pid", 1351 .cmp = pid_cmp, 1352 }; 1353 static struct sort_dimension runtime_sort_dimension = { 1354 .name = "runtime", 1355 .cmp = runtime_cmp, 1356 }; 1357 static struct sort_dimension switch_sort_dimension = { 1358 .name = "switch", 1359 .cmp = switch_cmp, 1360 }; 1361 struct sort_dimension *available_sorts[] = { 1362 &pid_sort_dimension, 1363 &avg_sort_dimension, 1364 &max_sort_dimension, 1365 &switch_sort_dimension, 1366 &runtime_sort_dimension, 1367 }; 1368 1369 for (i = 0; i < ARRAY_SIZE(available_sorts); i++) { 1370 if (!strcmp(available_sorts[i]->name, tok)) { 1371 list_add_tail(&available_sorts[i]->list, list); 1372 1373 return 0; 1374 } 1375 } 1376 1377 return -1; 1378 } 1379 1380 static void perf_sched__sort_lat(struct perf_sched *sched) 1381 { 1382 struct rb_node *node; 1383 struct rb_root *root = &sched->atom_root; 1384 again: 1385 for (;;) { 1386 struct work_atoms *data; 1387 node = rb_first(root); 1388 if (!node) 1389 break; 1390 1391 rb_erase(node, root); 1392 data = rb_entry(node, struct work_atoms, node); 1393 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); 1394 } 1395 if (root == &sched->atom_root) { 1396 root = &sched->merged_atom_root; 1397 goto again; 1398 } 1399 } 1400 1401 static int process_sched_wakeup_event(struct perf_tool *tool, 1402 struct perf_evsel *evsel, 1403 struct perf_sample *sample, 1404 struct machine *machine) 1405 { 1406 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1407 1408 if (sched->tp_handler->wakeup_event) 1409 return sched->tp_handler->wakeup_event(sched, evsel, sample, machine); 1410 1411 return 0; 1412 } 1413 1414 union map_priv { 1415 void *ptr; 1416 bool color; 1417 }; 1418 1419 static bool thread__has_color(struct thread *thread) 1420 { 1421 union map_priv priv = { 1422 .ptr = thread__priv(thread), 1423 }; 1424 1425 return priv.color; 1426 } 1427 1428 static struct thread* 1429 map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid) 1430 { 1431 struct thread *thread = machine__findnew_thread(machine, pid, tid); 1432 union map_priv priv = { 1433 .color = false, 1434 }; 1435 1436 if (!sched->map.color_pids || !thread || thread__priv(thread)) 1437 return thread; 1438 1439 if (thread_map__has(sched->map.color_pids, tid)) 1440 priv.color = true; 1441 1442 thread__set_priv(thread, priv.ptr); 1443 return thread; 1444 } 1445 1446 static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, 1447 struct perf_sample *sample, struct machine *machine) 1448 { 1449 const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1450 struct thread *sched_in; 1451 int new_shortname; 1452 u64 timestamp0, timestamp = sample->time; 1453 s64 delta; 1454 int i, this_cpu = sample->cpu; 1455 int cpus_nr; 1456 bool new_cpu = false; 1457 const char *color = PERF_COLOR_NORMAL; 1458 char stimestamp[32]; 1459 1460 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); 1461 1462 if (this_cpu > sched->max_cpu) 1463 sched->max_cpu = this_cpu; 1464 1465 if (sched->map.comp) { 1466 cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); 1467 if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) { 1468 sched->map.comp_cpus[cpus_nr++] = this_cpu; 1469 new_cpu = true; 1470 } 1471 } else 1472 cpus_nr = sched->max_cpu; 1473 1474 timestamp0 = sched->cpu_last_switched[this_cpu]; 1475 sched->cpu_last_switched[this_cpu] = timestamp; 1476 if (timestamp0) 1477 delta = timestamp - timestamp0; 1478 else 1479 delta = 0; 1480 1481 if (delta < 0) { 1482 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1483 return -1; 1484 } 1485 1486 sched_in = map__findnew_thread(sched, machine, -1, next_pid); 1487 if (sched_in == NULL) 1488 return -1; 1489 1490 sched->curr_thread[this_cpu] = thread__get(sched_in); 1491 1492 printf(" "); 1493 1494 new_shortname = 0; 1495 if (!sched_in->shortname[0]) { 1496 if (!strcmp(thread__comm_str(sched_in), "swapper")) { 1497 /* 1498 * Don't allocate a letter-number for swapper:0 1499 * as a shortname. Instead, we use '.' for it. 1500 */ 1501 sched_in->shortname[0] = '.'; 1502 sched_in->shortname[1] = ' '; 1503 } else { 1504 sched_in->shortname[0] = sched->next_shortname1; 1505 sched_in->shortname[1] = sched->next_shortname2; 1506 1507 if (sched->next_shortname1 < 'Z') { 1508 sched->next_shortname1++; 1509 } else { 1510 sched->next_shortname1 = 'A'; 1511 if (sched->next_shortname2 < '9') 1512 sched->next_shortname2++; 1513 else 1514 sched->next_shortname2 = '0'; 1515 } 1516 } 1517 new_shortname = 1; 1518 } 1519 1520 for (i = 0; i < cpus_nr; i++) { 1521 int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i; 1522 struct thread *curr_thread = sched->curr_thread[cpu]; 1523 const char *pid_color = color; 1524 const char *cpu_color = color; 1525 1526 if (curr_thread && thread__has_color(curr_thread)) 1527 pid_color = COLOR_PIDS; 1528 1529 if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu)) 1530 continue; 1531 1532 if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu)) 1533 cpu_color = COLOR_CPUS; 1534 1535 if (cpu != this_cpu) 1536 color_fprintf(stdout, color, " "); 1537 else 1538 color_fprintf(stdout, cpu_color, "*"); 1539 1540 if (sched->curr_thread[cpu]) 1541 color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname); 1542 else 1543 color_fprintf(stdout, color, " "); 1544 } 1545 1546 if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu)) 1547 goto out; 1548 1549 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1550 color_fprintf(stdout, color, " %12s secs ", stimestamp); 1551 if (new_shortname || (verbose && sched_in->tid)) { 1552 const char *pid_color = color; 1553 1554 if (thread__has_color(sched_in)) 1555 pid_color = COLOR_PIDS; 1556 1557 color_fprintf(stdout, pid_color, "%s => %s:%d", 1558 sched_in->shortname, thread__comm_str(sched_in), sched_in->tid); 1559 } 1560 1561 if (sched->map.comp && new_cpu) 1562 color_fprintf(stdout, color, " (CPU %d)", this_cpu); 1563 1564 out: 1565 color_fprintf(stdout, color, "\n"); 1566 1567 thread__put(sched_in); 1568 1569 return 0; 1570 } 1571 1572 static int process_sched_switch_event(struct perf_tool *tool, 1573 struct perf_evsel *evsel, 1574 struct perf_sample *sample, 1575 struct machine *machine) 1576 { 1577 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1578 int this_cpu = sample->cpu, err = 0; 1579 u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), 1580 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1581 1582 if (sched->curr_pid[this_cpu] != (u32)-1) { 1583 /* 1584 * Are we trying to switch away a PID that is 1585 * not current? 1586 */ 1587 if (sched->curr_pid[this_cpu] != prev_pid) 1588 sched->nr_context_switch_bugs++; 1589 } 1590 1591 if (sched->tp_handler->switch_event) 1592 err = sched->tp_handler->switch_event(sched, evsel, sample, machine); 1593 1594 sched->curr_pid[this_cpu] = next_pid; 1595 return err; 1596 } 1597 1598 static int process_sched_runtime_event(struct perf_tool *tool, 1599 struct perf_evsel *evsel, 1600 struct perf_sample *sample, 1601 struct machine *machine) 1602 { 1603 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1604 1605 if (sched->tp_handler->runtime_event) 1606 return sched->tp_handler->runtime_event(sched, evsel, sample, machine); 1607 1608 return 0; 1609 } 1610 1611 static int perf_sched__process_fork_event(struct perf_tool *tool, 1612 union perf_event *event, 1613 struct perf_sample *sample, 1614 struct machine *machine) 1615 { 1616 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1617 1618 /* run the fork event through the perf machineruy */ 1619 perf_event__process_fork(tool, event, sample, machine); 1620 1621 /* and then run additional processing needed for this command */ 1622 if (sched->tp_handler->fork_event) 1623 return sched->tp_handler->fork_event(sched, event, machine); 1624 1625 return 0; 1626 } 1627 1628 static int process_sched_migrate_task_event(struct perf_tool *tool, 1629 struct perf_evsel *evsel, 1630 struct perf_sample *sample, 1631 struct machine *machine) 1632 { 1633 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1634 1635 if (sched->tp_handler->migrate_task_event) 1636 return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine); 1637 1638 return 0; 1639 } 1640 1641 typedef int (*tracepoint_handler)(struct perf_tool *tool, 1642 struct perf_evsel *evsel, 1643 struct perf_sample *sample, 1644 struct machine *machine); 1645 1646 static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused, 1647 union perf_event *event __maybe_unused, 1648 struct perf_sample *sample, 1649 struct perf_evsel *evsel, 1650 struct machine *machine) 1651 { 1652 int err = 0; 1653 1654 if (evsel->handler != NULL) { 1655 tracepoint_handler f = evsel->handler; 1656 err = f(tool, evsel, sample, machine); 1657 } 1658 1659 return err; 1660 } 1661 1662 static int perf_sched__read_events(struct perf_sched *sched) 1663 { 1664 const struct perf_evsel_str_handler handlers[] = { 1665 { "sched:sched_switch", process_sched_switch_event, }, 1666 { "sched:sched_stat_runtime", process_sched_runtime_event, }, 1667 { "sched:sched_wakeup", process_sched_wakeup_event, }, 1668 { "sched:sched_wakeup_new", process_sched_wakeup_event, }, 1669 { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1670 }; 1671 struct perf_session *session; 1672 struct perf_data_file file = { 1673 .path = input_name, 1674 .mode = PERF_DATA_MODE_READ, 1675 .force = sched->force, 1676 }; 1677 int rc = -1; 1678 1679 session = perf_session__new(&file, false, &sched->tool); 1680 if (session == NULL) { 1681 pr_debug("No Memory for session\n"); 1682 return -1; 1683 } 1684 1685 symbol__init(&session->header.env); 1686 1687 if (perf_session__set_tracepoints_handlers(session, handlers)) 1688 goto out_delete; 1689 1690 if (perf_session__has_traces(session, "record -R")) { 1691 int err = perf_session__process_events(session); 1692 if (err) { 1693 pr_err("Failed to process events, error %d", err); 1694 goto out_delete; 1695 } 1696 1697 sched->nr_events = session->evlist->stats.nr_events[0]; 1698 sched->nr_lost_events = session->evlist->stats.total_lost; 1699 sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; 1700 } 1701 1702 rc = 0; 1703 out_delete: 1704 perf_session__delete(session); 1705 return rc; 1706 } 1707 1708 /* 1709 * scheduling times are printed as msec.usec 1710 */ 1711 static inline void print_sched_time(unsigned long long nsecs, int width) 1712 { 1713 unsigned long msecs; 1714 unsigned long usecs; 1715 1716 msecs = nsecs / NSEC_PER_MSEC; 1717 nsecs -= msecs * NSEC_PER_MSEC; 1718 usecs = nsecs / NSEC_PER_USEC; 1719 printf("%*lu.%03lu ", width, msecs, usecs); 1720 } 1721 1722 /* 1723 * returns runtime data for event, allocating memory for it the 1724 * first time it is used. 1725 */ 1726 static struct evsel_runtime *perf_evsel__get_runtime(struct perf_evsel *evsel) 1727 { 1728 struct evsel_runtime *r = evsel->priv; 1729 1730 if (r == NULL) { 1731 r = zalloc(sizeof(struct evsel_runtime)); 1732 evsel->priv = r; 1733 } 1734 1735 return r; 1736 } 1737 1738 /* 1739 * save last time event was seen per cpu 1740 */ 1741 static void perf_evsel__save_time(struct perf_evsel *evsel, 1742 u64 timestamp, u32 cpu) 1743 { 1744 struct evsel_runtime *r = perf_evsel__get_runtime(evsel); 1745 1746 if (r == NULL) 1747 return; 1748 1749 if ((cpu >= r->ncpu) || (r->last_time == NULL)) { 1750 int i, n = __roundup_pow_of_two(cpu+1); 1751 void *p = r->last_time; 1752 1753 p = realloc(r->last_time, n * sizeof(u64)); 1754 if (!p) 1755 return; 1756 1757 r->last_time = p; 1758 for (i = r->ncpu; i < n; ++i) 1759 r->last_time[i] = (u64) 0; 1760 1761 r->ncpu = n; 1762 } 1763 1764 r->last_time[cpu] = timestamp; 1765 } 1766 1767 /* returns last time this event was seen on the given cpu */ 1768 static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu) 1769 { 1770 struct evsel_runtime *r = perf_evsel__get_runtime(evsel); 1771 1772 if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) 1773 return 0; 1774 1775 return r->last_time[cpu]; 1776 } 1777 1778 static int comm_width = 30; 1779 1780 static char *timehist_get_commstr(struct thread *thread) 1781 { 1782 static char str[32]; 1783 const char *comm = thread__comm_str(thread); 1784 pid_t tid = thread->tid; 1785 pid_t pid = thread->pid_; 1786 int n; 1787 1788 if (pid == 0) 1789 n = scnprintf(str, sizeof(str), "%s", comm); 1790 1791 else if (tid != pid) 1792 n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid); 1793 1794 else 1795 n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid); 1796 1797 if (n > comm_width) 1798 comm_width = n; 1799 1800 return str; 1801 } 1802 1803 static void timehist_header(struct perf_sched *sched) 1804 { 1805 u32 ncpus = sched->max_cpu + 1; 1806 u32 i, j; 1807 1808 printf("%15s %6s ", "time", "cpu"); 1809 1810 if (sched->show_cpu_visual) { 1811 printf(" "); 1812 for (i = 0, j = 0; i < ncpus; ++i) { 1813 printf("%x", j++); 1814 if (j > 15) 1815 j = 0; 1816 } 1817 printf(" "); 1818 } 1819 1820 printf(" %-*s %9s %9s %9s", comm_width, 1821 "task name", "wait time", "sch delay", "run time"); 1822 1823 printf("\n"); 1824 1825 /* 1826 * units row 1827 */ 1828 printf("%15s %-6s ", "", ""); 1829 1830 if (sched->show_cpu_visual) 1831 printf(" %*s ", ncpus, ""); 1832 1833 printf(" %-*s %9s %9s %9s\n", comm_width, 1834 "[tid/pid]", "(msec)", "(msec)", "(msec)"); 1835 1836 /* 1837 * separator 1838 */ 1839 printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line); 1840 1841 if (sched->show_cpu_visual) 1842 printf(" %.*s ", ncpus, graph_dotted_line); 1843 1844 printf(" %.*s %.9s %.9s %.9s", comm_width, 1845 graph_dotted_line, graph_dotted_line, graph_dotted_line, 1846 graph_dotted_line); 1847 1848 printf("\n"); 1849 } 1850 1851 static void timehist_print_sample(struct perf_sched *sched, 1852 struct perf_sample *sample, 1853 struct addr_location *al, 1854 struct thread *thread, 1855 u64 t) 1856 { 1857 struct thread_runtime *tr = thread__priv(thread); 1858 u32 max_cpus = sched->max_cpu + 1; 1859 char tstr[64]; 1860 1861 timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); 1862 printf("%15s [%04d] ", tstr, sample->cpu); 1863 1864 if (sched->show_cpu_visual) { 1865 u32 i; 1866 char c; 1867 1868 printf(" "); 1869 for (i = 0; i < max_cpus; ++i) { 1870 /* flag idle times with 'i'; others are sched events */ 1871 if (i == sample->cpu) 1872 c = (thread->tid == 0) ? 'i' : 's'; 1873 else 1874 c = ' '; 1875 printf("%c", c); 1876 } 1877 printf(" "); 1878 } 1879 1880 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 1881 1882 print_sched_time(tr->dt_wait, 6); 1883 print_sched_time(tr->dt_delay, 6); 1884 print_sched_time(tr->dt_run, 6); 1885 1886 if (sched->show_wakeups) 1887 printf(" %-*s", comm_width, ""); 1888 1889 if (thread->tid == 0) 1890 goto out; 1891 1892 if (sched->show_callchain) 1893 printf(" "); 1894 1895 sample__fprintf_sym(sample, al, 0, 1896 EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | 1897 EVSEL__PRINT_CALLCHAIN_ARROW | 1898 EVSEL__PRINT_SKIP_IGNORED, 1899 &callchain_cursor, stdout); 1900 1901 out: 1902 printf("\n"); 1903 } 1904 1905 /* 1906 * Explanation of delta-time stats: 1907 * 1908 * t = time of current schedule out event 1909 * tprev = time of previous sched out event 1910 * also time of schedule-in event for current task 1911 * last_time = time of last sched change event for current task 1912 * (i.e, time process was last scheduled out) 1913 * ready_to_run = time of wakeup for current task 1914 * 1915 * -----|------------|------------|------------|------ 1916 * last ready tprev t 1917 * time to run 1918 * 1919 * |-------- dt_wait --------| 1920 * |- dt_delay -|-- dt_run --| 1921 * 1922 * dt_run = run time of current task 1923 * dt_wait = time between last schedule out event for task and tprev 1924 * represents time spent off the cpu 1925 * dt_delay = time between wakeup and schedule-in of task 1926 */ 1927 1928 static void timehist_update_runtime_stats(struct thread_runtime *r, 1929 u64 t, u64 tprev) 1930 { 1931 r->dt_delay = 0; 1932 r->dt_wait = 0; 1933 r->dt_run = 0; 1934 if (tprev) { 1935 r->dt_run = t - tprev; 1936 if (r->ready_to_run) { 1937 if (r->ready_to_run > tprev) 1938 pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); 1939 else 1940 r->dt_delay = tprev - r->ready_to_run; 1941 } 1942 1943 if (r->last_time > tprev) 1944 pr_debug("time travel: last sched out time for task > previous sched_switch event\n"); 1945 else if (r->last_time) 1946 r->dt_wait = tprev - r->last_time; 1947 } 1948 1949 update_stats(&r->run_stats, r->dt_run); 1950 r->total_run_time += r->dt_run; 1951 } 1952 1953 static bool is_idle_sample(struct perf_sample *sample, 1954 struct perf_evsel *evsel) 1955 { 1956 /* pid 0 == swapper == idle task */ 1957 if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) 1958 return perf_evsel__intval(evsel, sample, "prev_pid") == 0; 1959 1960 return sample->pid == 0; 1961 } 1962 1963 static void save_task_callchain(struct perf_sched *sched, 1964 struct perf_sample *sample, 1965 struct perf_evsel *evsel, 1966 struct machine *machine) 1967 { 1968 struct callchain_cursor *cursor = &callchain_cursor; 1969 struct thread *thread; 1970 1971 /* want main thread for process - has maps */ 1972 thread = machine__findnew_thread(machine, sample->pid, sample->pid); 1973 if (thread == NULL) { 1974 pr_debug("Failed to get thread for pid %d.\n", sample->pid); 1975 return; 1976 } 1977 1978 if (!symbol_conf.use_callchain || sample->callchain == NULL) 1979 return; 1980 1981 if (thread__resolve_callchain(thread, cursor, evsel, sample, 1982 NULL, NULL, sched->max_stack + 2) != 0) { 1983 if (verbose) 1984 error("Failed to resolve callchain. Skipping\n"); 1985 1986 return; 1987 } 1988 1989 callchain_cursor_commit(cursor); 1990 1991 while (true) { 1992 struct callchain_cursor_node *node; 1993 struct symbol *sym; 1994 1995 node = callchain_cursor_current(cursor); 1996 if (node == NULL) 1997 break; 1998 1999 sym = node->sym; 2000 if (sym && sym->name) { 2001 if (!strcmp(sym->name, "schedule") || 2002 !strcmp(sym->name, "__schedule") || 2003 !strcmp(sym->name, "preempt_schedule")) 2004 sym->ignore = 1; 2005 } 2006 2007 callchain_cursor_advance(cursor); 2008 } 2009 } 2010 2011 static int init_idle_thread(struct thread *thread) 2012 { 2013 struct idle_thread_runtime *itr; 2014 2015 thread__set_comm(thread, idle_comm, 0); 2016 2017 itr = zalloc(sizeof(*itr)); 2018 if (itr == NULL) 2019 return -ENOMEM; 2020 2021 init_stats(&itr->tr.run_stats); 2022 callchain_init(&itr->callchain); 2023 callchain_cursor_reset(&itr->cursor); 2024 thread__set_priv(thread, itr); 2025 2026 return 0; 2027 } 2028 2029 /* 2030 * Track idle stats per cpu by maintaining a local thread 2031 * struct for the idle task on each cpu. 2032 */ 2033 static int init_idle_threads(int ncpu) 2034 { 2035 int i, ret; 2036 2037 idle_threads = zalloc(ncpu * sizeof(struct thread *)); 2038 if (!idle_threads) 2039 return -ENOMEM; 2040 2041 idle_max_cpu = ncpu; 2042 2043 /* allocate the actual thread struct if needed */ 2044 for (i = 0; i < ncpu; ++i) { 2045 idle_threads[i] = thread__new(0, 0); 2046 if (idle_threads[i] == NULL) 2047 return -ENOMEM; 2048 2049 ret = init_idle_thread(idle_threads[i]); 2050 if (ret < 0) 2051 return ret; 2052 } 2053 2054 return 0; 2055 } 2056 2057 static void free_idle_threads(void) 2058 { 2059 int i; 2060 2061 if (idle_threads == NULL) 2062 return; 2063 2064 for (i = 0; i < idle_max_cpu; ++i) { 2065 if ((idle_threads[i])) 2066 thread__delete(idle_threads[i]); 2067 } 2068 2069 free(idle_threads); 2070 } 2071 2072 static struct thread *get_idle_thread(int cpu) 2073 { 2074 /* 2075 * expand/allocate array of pointers to local thread 2076 * structs if needed 2077 */ 2078 if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) { 2079 int i, j = __roundup_pow_of_two(cpu+1); 2080 void *p; 2081 2082 p = realloc(idle_threads, j * sizeof(struct thread *)); 2083 if (!p) 2084 return NULL; 2085 2086 idle_threads = (struct thread **) p; 2087 for (i = idle_max_cpu; i < j; ++i) 2088 idle_threads[i] = NULL; 2089 2090 idle_max_cpu = j; 2091 } 2092 2093 /* allocate a new thread struct if needed */ 2094 if (idle_threads[cpu] == NULL) { 2095 idle_threads[cpu] = thread__new(0, 0); 2096 2097 if (idle_threads[cpu]) { 2098 if (init_idle_thread(idle_threads[cpu]) < 0) 2099 return NULL; 2100 } 2101 } 2102 2103 return idle_threads[cpu]; 2104 } 2105 2106 static void save_idle_callchain(struct idle_thread_runtime *itr, 2107 struct perf_sample *sample) 2108 { 2109 if (!symbol_conf.use_callchain || sample->callchain == NULL) 2110 return; 2111 2112 callchain_cursor__copy(&itr->cursor, &callchain_cursor); 2113 } 2114 2115 /* 2116 * handle runtime stats saved per thread 2117 */ 2118 static struct thread_runtime *thread__init_runtime(struct thread *thread) 2119 { 2120 struct thread_runtime *r; 2121 2122 r = zalloc(sizeof(struct thread_runtime)); 2123 if (!r) 2124 return NULL; 2125 2126 init_stats(&r->run_stats); 2127 thread__set_priv(thread, r); 2128 2129 return r; 2130 } 2131 2132 static struct thread_runtime *thread__get_runtime(struct thread *thread) 2133 { 2134 struct thread_runtime *tr; 2135 2136 tr = thread__priv(thread); 2137 if (tr == NULL) { 2138 tr = thread__init_runtime(thread); 2139 if (tr == NULL) 2140 pr_debug("Failed to malloc memory for runtime data.\n"); 2141 } 2142 2143 return tr; 2144 } 2145 2146 static struct thread *timehist_get_thread(struct perf_sched *sched, 2147 struct perf_sample *sample, 2148 struct machine *machine, 2149 struct perf_evsel *evsel) 2150 { 2151 struct thread *thread; 2152 2153 if (is_idle_sample(sample, evsel)) { 2154 thread = get_idle_thread(sample->cpu); 2155 if (thread == NULL) 2156 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2157 2158 } else { 2159 /* there were samples with tid 0 but non-zero pid */ 2160 thread = machine__findnew_thread(machine, sample->pid, 2161 sample->tid ?: sample->pid); 2162 if (thread == NULL) { 2163 pr_debug("Failed to get thread for tid %d. skipping sample.\n", 2164 sample->tid); 2165 } 2166 2167 save_task_callchain(sched, sample, evsel, machine); 2168 if (sched->idle_hist) { 2169 struct thread *idle; 2170 struct idle_thread_runtime *itr; 2171 2172 idle = get_idle_thread(sample->cpu); 2173 if (idle == NULL) { 2174 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2175 return NULL; 2176 } 2177 2178 itr = thread__priv(idle); 2179 if (itr == NULL) 2180 return NULL; 2181 2182 itr->last_thread = thread; 2183 2184 /* copy task callchain when entering to idle */ 2185 if (perf_evsel__intval(evsel, sample, "next_pid") == 0) 2186 save_idle_callchain(itr, sample); 2187 } 2188 } 2189 2190 return thread; 2191 } 2192 2193 static bool timehist_skip_sample(struct perf_sched *sched, 2194 struct thread *thread, 2195 struct perf_evsel *evsel, 2196 struct perf_sample *sample) 2197 { 2198 bool rc = false; 2199 2200 if (thread__is_filtered(thread)) { 2201 rc = true; 2202 sched->skipped_samples++; 2203 } 2204 2205 if (sched->idle_hist) { 2206 if (strcmp(perf_evsel__name(evsel), "sched:sched_switch")) 2207 rc = true; 2208 else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 && 2209 perf_evsel__intval(evsel, sample, "next_pid") != 0) 2210 rc = true; 2211 } 2212 2213 return rc; 2214 } 2215 2216 static void timehist_print_wakeup_event(struct perf_sched *sched, 2217 struct perf_evsel *evsel, 2218 struct perf_sample *sample, 2219 struct machine *machine, 2220 struct thread *awakened) 2221 { 2222 struct thread *thread; 2223 char tstr[64]; 2224 2225 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2226 if (thread == NULL) 2227 return; 2228 2229 /* show wakeup unless both awakee and awaker are filtered */ 2230 if (timehist_skip_sample(sched, thread, evsel, sample) && 2231 timehist_skip_sample(sched, awakened, evsel, sample)) { 2232 return; 2233 } 2234 2235 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2236 printf("%15s [%04d] ", tstr, sample->cpu); 2237 if (sched->show_cpu_visual) 2238 printf(" %*s ", sched->max_cpu + 1, ""); 2239 2240 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2241 2242 /* dt spacer */ 2243 printf(" %9s %9s %9s ", "", "", ""); 2244 2245 printf("awakened: %s", timehist_get_commstr(awakened)); 2246 2247 printf("\n"); 2248 } 2249 2250 static int timehist_sched_wakeup_event(struct perf_tool *tool, 2251 union perf_event *event __maybe_unused, 2252 struct perf_evsel *evsel, 2253 struct perf_sample *sample, 2254 struct machine *machine) 2255 { 2256 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2257 struct thread *thread; 2258 struct thread_runtime *tr = NULL; 2259 /* want pid of awakened task not pid in sample */ 2260 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 2261 2262 thread = machine__findnew_thread(machine, 0, pid); 2263 if (thread == NULL) 2264 return -1; 2265 2266 tr = thread__get_runtime(thread); 2267 if (tr == NULL) 2268 return -1; 2269 2270 if (tr->ready_to_run == 0) 2271 tr->ready_to_run = sample->time; 2272 2273 /* show wakeups if requested */ 2274 if (sched->show_wakeups && 2275 !perf_time__skip_sample(&sched->ptime, sample->time)) 2276 timehist_print_wakeup_event(sched, evsel, sample, machine, thread); 2277 2278 return 0; 2279 } 2280 2281 static void timehist_print_migration_event(struct perf_sched *sched, 2282 struct perf_evsel *evsel, 2283 struct perf_sample *sample, 2284 struct machine *machine, 2285 struct thread *migrated) 2286 { 2287 struct thread *thread; 2288 char tstr[64]; 2289 u32 max_cpus = sched->max_cpu + 1; 2290 u32 ocpu, dcpu; 2291 2292 if (sched->summary_only) 2293 return; 2294 2295 max_cpus = sched->max_cpu + 1; 2296 ocpu = perf_evsel__intval(evsel, sample, "orig_cpu"); 2297 dcpu = perf_evsel__intval(evsel, sample, "dest_cpu"); 2298 2299 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2300 if (thread == NULL) 2301 return; 2302 2303 if (timehist_skip_sample(sched, thread, evsel, sample) && 2304 timehist_skip_sample(sched, migrated, evsel, sample)) { 2305 return; 2306 } 2307 2308 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2309 printf("%15s [%04d] ", tstr, sample->cpu); 2310 2311 if (sched->show_cpu_visual) { 2312 u32 i; 2313 char c; 2314 2315 printf(" "); 2316 for (i = 0; i < max_cpus; ++i) { 2317 c = (i == sample->cpu) ? 'm' : ' '; 2318 printf("%c", c); 2319 } 2320 printf(" "); 2321 } 2322 2323 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2324 2325 /* dt spacer */ 2326 printf(" %9s %9s %9s ", "", "", ""); 2327 2328 printf("migrated: %s", timehist_get_commstr(migrated)); 2329 printf(" cpu %d => %d", ocpu, dcpu); 2330 2331 printf("\n"); 2332 } 2333 2334 static int timehist_migrate_task_event(struct perf_tool *tool, 2335 union perf_event *event __maybe_unused, 2336 struct perf_evsel *evsel, 2337 struct perf_sample *sample, 2338 struct machine *machine) 2339 { 2340 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2341 struct thread *thread; 2342 struct thread_runtime *tr = NULL; 2343 /* want pid of migrated task not pid in sample */ 2344 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 2345 2346 thread = machine__findnew_thread(machine, 0, pid); 2347 if (thread == NULL) 2348 return -1; 2349 2350 tr = thread__get_runtime(thread); 2351 if (tr == NULL) 2352 return -1; 2353 2354 tr->migrations++; 2355 2356 /* show migrations if requested */ 2357 timehist_print_migration_event(sched, evsel, sample, machine, thread); 2358 2359 return 0; 2360 } 2361 2362 static int timehist_sched_change_event(struct perf_tool *tool, 2363 union perf_event *event, 2364 struct perf_evsel *evsel, 2365 struct perf_sample *sample, 2366 struct machine *machine) 2367 { 2368 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2369 struct perf_time_interval *ptime = &sched->ptime; 2370 struct addr_location al; 2371 struct thread *thread; 2372 struct thread_runtime *tr = NULL; 2373 u64 tprev, t = sample->time; 2374 int rc = 0; 2375 2376 if (machine__resolve(machine, &al, sample) < 0) { 2377 pr_err("problem processing %d event. skipping it\n", 2378 event->header.type); 2379 rc = -1; 2380 goto out; 2381 } 2382 2383 thread = timehist_get_thread(sched, sample, machine, evsel); 2384 if (thread == NULL) { 2385 rc = -1; 2386 goto out; 2387 } 2388 2389 if (timehist_skip_sample(sched, thread, evsel, sample)) 2390 goto out; 2391 2392 tr = thread__get_runtime(thread); 2393 if (tr == NULL) { 2394 rc = -1; 2395 goto out; 2396 } 2397 2398 tprev = perf_evsel__get_time(evsel, sample->cpu); 2399 2400 /* 2401 * If start time given: 2402 * - sample time is under window user cares about - skip sample 2403 * - tprev is under window user cares about - reset to start of window 2404 */ 2405 if (ptime->start && ptime->start > t) 2406 goto out; 2407 2408 if (tprev && ptime->start > tprev) 2409 tprev = ptime->start; 2410 2411 /* 2412 * If end time given: 2413 * - previous sched event is out of window - we are done 2414 * - sample time is beyond window user cares about - reset it 2415 * to close out stats for time window interest 2416 */ 2417 if (ptime->end) { 2418 if (tprev > ptime->end) 2419 goto out; 2420 2421 if (t > ptime->end) 2422 t = ptime->end; 2423 } 2424 2425 if (!sched->idle_hist || thread->tid == 0) { 2426 timehist_update_runtime_stats(tr, t, tprev); 2427 2428 if (sched->idle_hist) { 2429 struct idle_thread_runtime *itr = (void *)tr; 2430 struct thread_runtime *last_tr; 2431 2432 BUG_ON(thread->tid != 0); 2433 2434 if (itr->last_thread == NULL) 2435 goto out; 2436 2437 /* add current idle time as last thread's runtime */ 2438 last_tr = thread__get_runtime(itr->last_thread); 2439 if (last_tr == NULL) 2440 goto out; 2441 2442 timehist_update_runtime_stats(last_tr, t, tprev); 2443 /* 2444 * remove delta time of last thread as it's not updated 2445 * and otherwise it will show an invalid value next 2446 * time. we only care total run time and run stat. 2447 */ 2448 last_tr->dt_run = 0; 2449 last_tr->dt_wait = 0; 2450 last_tr->dt_delay = 0; 2451 2452 if (itr->cursor.nr) 2453 callchain_append(&itr->callchain, &itr->cursor, t - tprev); 2454 2455 itr->last_thread = NULL; 2456 } 2457 } 2458 2459 if (!sched->summary_only) 2460 timehist_print_sample(sched, sample, &al, thread, t); 2461 2462 out: 2463 if (tr) { 2464 /* time of this sched_switch event becomes last time task seen */ 2465 tr->last_time = sample->time; 2466 2467 /* sched out event for task so reset ready to run time */ 2468 tr->ready_to_run = 0; 2469 } 2470 2471 perf_evsel__save_time(evsel, sample->time, sample->cpu); 2472 2473 return rc; 2474 } 2475 2476 static int timehist_sched_switch_event(struct perf_tool *tool, 2477 union perf_event *event, 2478 struct perf_evsel *evsel, 2479 struct perf_sample *sample, 2480 struct machine *machine __maybe_unused) 2481 { 2482 return timehist_sched_change_event(tool, event, evsel, sample, machine); 2483 } 2484 2485 static int process_lost(struct perf_tool *tool __maybe_unused, 2486 union perf_event *event, 2487 struct perf_sample *sample, 2488 struct machine *machine __maybe_unused) 2489 { 2490 char tstr[64]; 2491 2492 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2493 printf("%15s ", tstr); 2494 printf("lost %" PRIu64 " events on cpu %d\n", event->lost.lost, sample->cpu); 2495 2496 return 0; 2497 } 2498 2499 2500 static void print_thread_runtime(struct thread *t, 2501 struct thread_runtime *r) 2502 { 2503 double mean = avg_stats(&r->run_stats); 2504 float stddev; 2505 2506 printf("%*s %5d %9" PRIu64 " ", 2507 comm_width, timehist_get_commstr(t), t->ppid, 2508 (u64) r->run_stats.n); 2509 2510 print_sched_time(r->total_run_time, 8); 2511 stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean); 2512 print_sched_time(r->run_stats.min, 6); 2513 printf(" "); 2514 print_sched_time((u64) mean, 6); 2515 printf(" "); 2516 print_sched_time(r->run_stats.max, 6); 2517 printf(" "); 2518 printf("%5.2f", stddev); 2519 printf(" %5" PRIu64, r->migrations); 2520 printf("\n"); 2521 } 2522 2523 struct total_run_stats { 2524 u64 sched_count; 2525 u64 task_count; 2526 u64 total_run_time; 2527 }; 2528 2529 static int __show_thread_runtime(struct thread *t, void *priv) 2530 { 2531 struct total_run_stats *stats = priv; 2532 struct thread_runtime *r; 2533 2534 if (thread__is_filtered(t)) 2535 return 0; 2536 2537 r = thread__priv(t); 2538 if (r && r->run_stats.n) { 2539 stats->task_count++; 2540 stats->sched_count += r->run_stats.n; 2541 stats->total_run_time += r->total_run_time; 2542 print_thread_runtime(t, r); 2543 } 2544 2545 return 0; 2546 } 2547 2548 static int show_thread_runtime(struct thread *t, void *priv) 2549 { 2550 if (t->dead) 2551 return 0; 2552 2553 return __show_thread_runtime(t, priv); 2554 } 2555 2556 static int show_deadthread_runtime(struct thread *t, void *priv) 2557 { 2558 if (!t->dead) 2559 return 0; 2560 2561 return __show_thread_runtime(t, priv); 2562 } 2563 2564 static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) 2565 { 2566 const char *sep = " <- "; 2567 struct callchain_list *chain; 2568 size_t ret = 0; 2569 char bf[1024]; 2570 bool first; 2571 2572 if (node == NULL) 2573 return 0; 2574 2575 ret = callchain__fprintf_folded(fp, node->parent); 2576 first = (ret == 0); 2577 2578 list_for_each_entry(chain, &node->val, list) { 2579 if (chain->ip >= PERF_CONTEXT_MAX) 2580 continue; 2581 if (chain->ms.sym && chain->ms.sym->ignore) 2582 continue; 2583 ret += fprintf(fp, "%s%s", first ? "" : sep, 2584 callchain_list__sym_name(chain, bf, sizeof(bf), 2585 false)); 2586 first = false; 2587 } 2588 2589 return ret; 2590 } 2591 2592 static size_t timehist_print_idlehist_callchain(struct rb_root *root) 2593 { 2594 size_t ret = 0; 2595 FILE *fp = stdout; 2596 struct callchain_node *chain; 2597 struct rb_node *rb_node = rb_first(root); 2598 2599 printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); 2600 printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, 2601 graph_dotted_line); 2602 2603 while (rb_node) { 2604 chain = rb_entry(rb_node, struct callchain_node, rb_node); 2605 rb_node = rb_next(rb_node); 2606 2607 ret += fprintf(fp, " "); 2608 print_sched_time(chain->hit, 12); 2609 ret += 16; /* print_sched_time returns 2nd arg + 4 */ 2610 ret += fprintf(fp, " %8d ", chain->count); 2611 ret += callchain__fprintf_folded(fp, chain); 2612 ret += fprintf(fp, "\n"); 2613 } 2614 2615 return ret; 2616 } 2617 2618 static void timehist_print_summary(struct perf_sched *sched, 2619 struct perf_session *session) 2620 { 2621 struct machine *m = &session->machines.host; 2622 struct total_run_stats totals; 2623 u64 task_count; 2624 struct thread *t; 2625 struct thread_runtime *r; 2626 int i; 2627 2628 memset(&totals, 0, sizeof(totals)); 2629 2630 if (sched->idle_hist) { 2631 printf("\nIdle-time summary\n"); 2632 printf("%*s parent sched-out ", comm_width, "comm"); 2633 printf(" idle-time min-idle avg-idle max-idle stddev migrations\n"); 2634 } else { 2635 printf("\nRuntime summary\n"); 2636 printf("%*s parent sched-in ", comm_width, "comm"); 2637 printf(" run-time min-run avg-run max-run stddev migrations\n"); 2638 } 2639 printf("%*s (count) ", comm_width, ""); 2640 printf(" (msec) (msec) (msec) (msec) %%\n"); 2641 printf("%.117s\n", graph_dotted_line); 2642 2643 machine__for_each_thread(m, show_thread_runtime, &totals); 2644 task_count = totals.task_count; 2645 if (!task_count) 2646 printf("<no still running tasks>\n"); 2647 2648 printf("\nTerminated tasks:\n"); 2649 machine__for_each_thread(m, show_deadthread_runtime, &totals); 2650 if (task_count == totals.task_count) 2651 printf("<no terminated tasks>\n"); 2652 2653 /* CPU idle stats not tracked when samples were skipped */ 2654 if (sched->skipped_samples && !sched->idle_hist) 2655 return; 2656 2657 printf("\nIdle stats:\n"); 2658 for (i = 0; i < idle_max_cpu; ++i) { 2659 t = idle_threads[i]; 2660 if (!t) 2661 continue; 2662 2663 r = thread__priv(t); 2664 if (r && r->run_stats.n) { 2665 totals.sched_count += r->run_stats.n; 2666 printf(" CPU %2d idle for ", i); 2667 print_sched_time(r->total_run_time, 6); 2668 printf(" msec\n"); 2669 } else 2670 printf(" CPU %2d idle entire time window\n", i); 2671 } 2672 2673 if (sched->idle_hist && symbol_conf.use_callchain) { 2674 callchain_param.mode = CHAIN_FOLDED; 2675 callchain_param.value = CCVAL_PERIOD; 2676 2677 callchain_register_param(&callchain_param); 2678 2679 printf("\nIdle stats by callchain:\n"); 2680 for (i = 0; i < idle_max_cpu; ++i) { 2681 struct idle_thread_runtime *itr; 2682 2683 t = idle_threads[i]; 2684 if (!t) 2685 continue; 2686 2687 itr = thread__priv(t); 2688 if (itr == NULL) 2689 continue; 2690 2691 callchain_param.sort(&itr->sorted_root, &itr->callchain, 2692 0, &callchain_param); 2693 2694 printf(" CPU %2d:", i); 2695 print_sched_time(itr->tr.total_run_time, 6); 2696 printf(" msec\n"); 2697 timehist_print_idlehist_callchain(&itr->sorted_root); 2698 printf("\n"); 2699 } 2700 } 2701 2702 printf("\n" 2703 " Total number of unique tasks: %" PRIu64 "\n" 2704 "Total number of context switches: %" PRIu64 "\n" 2705 " Total run time (msec): ", 2706 totals.task_count, totals.sched_count); 2707 2708 print_sched_time(totals.total_run_time, 2); 2709 printf("\n"); 2710 } 2711 2712 typedef int (*sched_handler)(struct perf_tool *tool, 2713 union perf_event *event, 2714 struct perf_evsel *evsel, 2715 struct perf_sample *sample, 2716 struct machine *machine); 2717 2718 static int perf_timehist__process_sample(struct perf_tool *tool, 2719 union perf_event *event, 2720 struct perf_sample *sample, 2721 struct perf_evsel *evsel, 2722 struct machine *machine) 2723 { 2724 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2725 int err = 0; 2726 int this_cpu = sample->cpu; 2727 2728 if (this_cpu > sched->max_cpu) 2729 sched->max_cpu = this_cpu; 2730 2731 if (evsel->handler != NULL) { 2732 sched_handler f = evsel->handler; 2733 2734 err = f(tool, event, evsel, sample, machine); 2735 } 2736 2737 return err; 2738 } 2739 2740 static int timehist_check_attr(struct perf_sched *sched, 2741 struct perf_evlist *evlist) 2742 { 2743 struct perf_evsel *evsel; 2744 struct evsel_runtime *er; 2745 2746 list_for_each_entry(evsel, &evlist->entries, node) { 2747 er = perf_evsel__get_runtime(evsel); 2748 if (er == NULL) { 2749 pr_err("Failed to allocate memory for evsel runtime data\n"); 2750 return -1; 2751 } 2752 2753 if (sched->show_callchain && 2754 !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) { 2755 pr_info("Samples do not have callchains.\n"); 2756 sched->show_callchain = 0; 2757 symbol_conf.use_callchain = 0; 2758 } 2759 } 2760 2761 return 0; 2762 } 2763 2764 static int perf_sched__timehist(struct perf_sched *sched) 2765 { 2766 const struct perf_evsel_str_handler handlers[] = { 2767 { "sched:sched_switch", timehist_sched_switch_event, }, 2768 { "sched:sched_wakeup", timehist_sched_wakeup_event, }, 2769 { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, 2770 }; 2771 const struct perf_evsel_str_handler migrate_handlers[] = { 2772 { "sched:sched_migrate_task", timehist_migrate_task_event, }, 2773 }; 2774 struct perf_data_file file = { 2775 .path = input_name, 2776 .mode = PERF_DATA_MODE_READ, 2777 .force = sched->force, 2778 }; 2779 2780 struct perf_session *session; 2781 struct perf_evlist *evlist; 2782 int err = -1; 2783 2784 /* 2785 * event handlers for timehist option 2786 */ 2787 sched->tool.sample = perf_timehist__process_sample; 2788 sched->tool.mmap = perf_event__process_mmap; 2789 sched->tool.comm = perf_event__process_comm; 2790 sched->tool.exit = perf_event__process_exit; 2791 sched->tool.fork = perf_event__process_fork; 2792 sched->tool.lost = process_lost; 2793 sched->tool.attr = perf_event__process_attr; 2794 sched->tool.tracing_data = perf_event__process_tracing_data; 2795 sched->tool.build_id = perf_event__process_build_id; 2796 2797 sched->tool.ordered_events = true; 2798 sched->tool.ordering_requires_timestamps = true; 2799 2800 symbol_conf.use_callchain = sched->show_callchain; 2801 2802 session = perf_session__new(&file, false, &sched->tool); 2803 if (session == NULL) 2804 return -ENOMEM; 2805 2806 evlist = session->evlist; 2807 2808 symbol__init(&session->header.env); 2809 2810 if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) { 2811 pr_err("Invalid time string\n"); 2812 return -EINVAL; 2813 } 2814 2815 if (timehist_check_attr(sched, evlist) != 0) 2816 goto out; 2817 2818 setup_pager(); 2819 2820 /* setup per-evsel handlers */ 2821 if (perf_session__set_tracepoints_handlers(session, handlers)) 2822 goto out; 2823 2824 /* sched_switch event at a minimum needs to exist */ 2825 if (!perf_evlist__find_tracepoint_by_name(session->evlist, 2826 "sched:sched_switch")) { 2827 pr_err("No sched_switch events found. Have you run 'perf sched record'?\n"); 2828 goto out; 2829 } 2830 2831 if (sched->show_migrations && 2832 perf_session__set_tracepoints_handlers(session, migrate_handlers)) 2833 goto out; 2834 2835 /* pre-allocate struct for per-CPU idle stats */ 2836 sched->max_cpu = session->header.env.nr_cpus_online; 2837 if (sched->max_cpu == 0) 2838 sched->max_cpu = 4; 2839 if (init_idle_threads(sched->max_cpu)) 2840 goto out; 2841 2842 /* summary_only implies summary option, but don't overwrite summary if set */ 2843 if (sched->summary_only) 2844 sched->summary = sched->summary_only; 2845 2846 if (!sched->summary_only) 2847 timehist_header(sched); 2848 2849 err = perf_session__process_events(session); 2850 if (err) { 2851 pr_err("Failed to process events, error %d", err); 2852 goto out; 2853 } 2854 2855 sched->nr_events = evlist->stats.nr_events[0]; 2856 sched->nr_lost_events = evlist->stats.total_lost; 2857 sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST]; 2858 2859 if (sched->summary) 2860 timehist_print_summary(sched, session); 2861 2862 out: 2863 free_idle_threads(); 2864 perf_session__delete(session); 2865 2866 return err; 2867 } 2868 2869 2870 static void print_bad_events(struct perf_sched *sched) 2871 { 2872 if (sched->nr_unordered_timestamps && sched->nr_timestamps) { 2873 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 2874 (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0, 2875 sched->nr_unordered_timestamps, sched->nr_timestamps); 2876 } 2877 if (sched->nr_lost_events && sched->nr_events) { 2878 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 2879 (double)sched->nr_lost_events/(double)sched->nr_events * 100.0, 2880 sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks); 2881 } 2882 if (sched->nr_context_switch_bugs && sched->nr_timestamps) { 2883 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 2884 (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0, 2885 sched->nr_context_switch_bugs, sched->nr_timestamps); 2886 if (sched->nr_lost_events) 2887 printf(" (due to lost events?)"); 2888 printf("\n"); 2889 } 2890 } 2891 2892 static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data) 2893 { 2894 struct rb_node **new = &(root->rb_node), *parent = NULL; 2895 struct work_atoms *this; 2896 const char *comm = thread__comm_str(data->thread), *this_comm; 2897 2898 while (*new) { 2899 int cmp; 2900 2901 this = container_of(*new, struct work_atoms, node); 2902 parent = *new; 2903 2904 this_comm = thread__comm_str(this->thread); 2905 cmp = strcmp(comm, this_comm); 2906 if (cmp > 0) { 2907 new = &((*new)->rb_left); 2908 } else if (cmp < 0) { 2909 new = &((*new)->rb_right); 2910 } else { 2911 this->num_merged++; 2912 this->total_runtime += data->total_runtime; 2913 this->nb_atoms += data->nb_atoms; 2914 this->total_lat += data->total_lat; 2915 list_splice(&data->work_list, &this->work_list); 2916 if (this->max_lat < data->max_lat) { 2917 this->max_lat = data->max_lat; 2918 this->max_lat_at = data->max_lat_at; 2919 } 2920 zfree(&data); 2921 return; 2922 } 2923 } 2924 2925 data->num_merged++; 2926 rb_link_node(&data->node, parent, new); 2927 rb_insert_color(&data->node, root); 2928 } 2929 2930 static void perf_sched__merge_lat(struct perf_sched *sched) 2931 { 2932 struct work_atoms *data; 2933 struct rb_node *node; 2934 2935 if (sched->skip_merge) 2936 return; 2937 2938 while ((node = rb_first(&sched->atom_root))) { 2939 rb_erase(node, &sched->atom_root); 2940 data = rb_entry(node, struct work_atoms, node); 2941 __merge_work_atoms(&sched->merged_atom_root, data); 2942 } 2943 } 2944 2945 static int perf_sched__lat(struct perf_sched *sched) 2946 { 2947 struct rb_node *next; 2948 2949 setup_pager(); 2950 2951 if (perf_sched__read_events(sched)) 2952 return -1; 2953 2954 perf_sched__merge_lat(sched); 2955 perf_sched__sort_lat(sched); 2956 2957 printf("\n -----------------------------------------------------------------------------------------------------------------\n"); 2958 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); 2959 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 2960 2961 next = rb_first(&sched->sorted_atom_root); 2962 2963 while (next) { 2964 struct work_atoms *work_list; 2965 2966 work_list = rb_entry(next, struct work_atoms, node); 2967 output_lat_thread(sched, work_list); 2968 next = rb_next(next); 2969 thread__zput(work_list->thread); 2970 } 2971 2972 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 2973 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 2974 (double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count); 2975 2976 printf(" ---------------------------------------------------\n"); 2977 2978 print_bad_events(sched); 2979 printf("\n"); 2980 2981 return 0; 2982 } 2983 2984 static int setup_map_cpus(struct perf_sched *sched) 2985 { 2986 struct cpu_map *map; 2987 2988 sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); 2989 2990 if (sched->map.comp) { 2991 sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int)); 2992 if (!sched->map.comp_cpus) 2993 return -1; 2994 } 2995 2996 if (!sched->map.cpus_str) 2997 return 0; 2998 2999 map = cpu_map__new(sched->map.cpus_str); 3000 if (!map) { 3001 pr_err("failed to get cpus map from %s\n", sched->map.cpus_str); 3002 return -1; 3003 } 3004 3005 sched->map.cpus = map; 3006 return 0; 3007 } 3008 3009 static int setup_color_pids(struct perf_sched *sched) 3010 { 3011 struct thread_map *map; 3012 3013 if (!sched->map.color_pids_str) 3014 return 0; 3015 3016 map = thread_map__new_by_tid_str(sched->map.color_pids_str); 3017 if (!map) { 3018 pr_err("failed to get thread map from %s\n", sched->map.color_pids_str); 3019 return -1; 3020 } 3021 3022 sched->map.color_pids = map; 3023 return 0; 3024 } 3025 3026 static int setup_color_cpus(struct perf_sched *sched) 3027 { 3028 struct cpu_map *map; 3029 3030 if (!sched->map.color_cpus_str) 3031 return 0; 3032 3033 map = cpu_map__new(sched->map.color_cpus_str); 3034 if (!map) { 3035 pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str); 3036 return -1; 3037 } 3038 3039 sched->map.color_cpus = map; 3040 return 0; 3041 } 3042 3043 static int perf_sched__map(struct perf_sched *sched) 3044 { 3045 if (setup_map_cpus(sched)) 3046 return -1; 3047 3048 if (setup_color_pids(sched)) 3049 return -1; 3050 3051 if (setup_color_cpus(sched)) 3052 return -1; 3053 3054 setup_pager(); 3055 if (perf_sched__read_events(sched)) 3056 return -1; 3057 print_bad_events(sched); 3058 return 0; 3059 } 3060 3061 static int perf_sched__replay(struct perf_sched *sched) 3062 { 3063 unsigned long i; 3064 3065 calibrate_run_measurement_overhead(sched); 3066 calibrate_sleep_measurement_overhead(sched); 3067 3068 test_calibrations(sched); 3069 3070 if (perf_sched__read_events(sched)) 3071 return -1; 3072 3073 printf("nr_run_events: %ld\n", sched->nr_run_events); 3074 printf("nr_sleep_events: %ld\n", sched->nr_sleep_events); 3075 printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events); 3076 3077 if (sched->targetless_wakeups) 3078 printf("target-less wakeups: %ld\n", sched->targetless_wakeups); 3079 if (sched->multitarget_wakeups) 3080 printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups); 3081 if (sched->nr_run_events_optimized) 3082 printf("run atoms optimized: %ld\n", 3083 sched->nr_run_events_optimized); 3084 3085 print_task_traces(sched); 3086 add_cross_task_wakeups(sched); 3087 3088 create_tasks(sched); 3089 printf("------------------------------------------------------------\n"); 3090 for (i = 0; i < sched->replay_repeat; i++) 3091 run_one_test(sched); 3092 3093 return 0; 3094 } 3095 3096 static void setup_sorting(struct perf_sched *sched, const struct option *options, 3097 const char * const usage_msg[]) 3098 { 3099 char *tmp, *tok, *str = strdup(sched->sort_order); 3100 3101 for (tok = strtok_r(str, ", ", &tmp); 3102 tok; tok = strtok_r(NULL, ", ", &tmp)) { 3103 if (sort_dimension__add(tok, &sched->sort_list) < 0) { 3104 usage_with_options_msg(usage_msg, options, 3105 "Unknown --sort key: `%s'", tok); 3106 } 3107 } 3108 3109 free(str); 3110 3111 sort_dimension__add("pid", &sched->cmp_pid); 3112 } 3113 3114 static int __cmd_record(int argc, const char **argv) 3115 { 3116 unsigned int rec_argc, i, j; 3117 const char **rec_argv; 3118 const char * const record_args[] = { 3119 "record", 3120 "-a", 3121 "-R", 3122 "-m", "1024", 3123 "-c", "1", 3124 "-e", "sched:sched_switch", 3125 "-e", "sched:sched_stat_wait", 3126 "-e", "sched:sched_stat_sleep", 3127 "-e", "sched:sched_stat_iowait", 3128 "-e", "sched:sched_stat_runtime", 3129 "-e", "sched:sched_process_fork", 3130 "-e", "sched:sched_wakeup", 3131 "-e", "sched:sched_wakeup_new", 3132 "-e", "sched:sched_migrate_task", 3133 }; 3134 3135 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 3136 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 3137 3138 if (rec_argv == NULL) 3139 return -ENOMEM; 3140 3141 for (i = 0; i < ARRAY_SIZE(record_args); i++) 3142 rec_argv[i] = strdup(record_args[i]); 3143 3144 for (j = 1; j < (unsigned int)argc; j++, i++) 3145 rec_argv[i] = argv[j]; 3146 3147 BUG_ON(i != rec_argc); 3148 3149 return cmd_record(i, rec_argv, NULL); 3150 } 3151 3152 int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) 3153 { 3154 const char default_sort_order[] = "avg, max, switch, runtime"; 3155 struct perf_sched sched = { 3156 .tool = { 3157 .sample = perf_sched__process_tracepoint_sample, 3158 .comm = perf_event__process_comm, 3159 .lost = perf_event__process_lost, 3160 .fork = perf_sched__process_fork_event, 3161 .ordered_events = true, 3162 }, 3163 .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), 3164 .sort_list = LIST_HEAD_INIT(sched.sort_list), 3165 .start_work_mutex = PTHREAD_MUTEX_INITIALIZER, 3166 .work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER, 3167 .sort_order = default_sort_order, 3168 .replay_repeat = 10, 3169 .profile_cpu = -1, 3170 .next_shortname1 = 'A', 3171 .next_shortname2 = '0', 3172 .skip_merge = 0, 3173 .show_callchain = 1, 3174 .max_stack = 5, 3175 }; 3176 const struct option sched_options[] = { 3177 OPT_STRING('i', "input", &input_name, "file", 3178 "input file name"), 3179 OPT_INCR('v', "verbose", &verbose, 3180 "be more verbose (show symbol address, etc)"), 3181 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 3182 "dump raw trace in ASCII"), 3183 OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), 3184 OPT_END() 3185 }; 3186 const struct option latency_options[] = { 3187 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", 3188 "sort by key(s): runtime, switch, avg, max"), 3189 OPT_INTEGER('C', "CPU", &sched.profile_cpu, 3190 "CPU to profile on"), 3191 OPT_BOOLEAN('p', "pids", &sched.skip_merge, 3192 "latency stats per pid instead of per comm"), 3193 OPT_PARENT(sched_options) 3194 }; 3195 const struct option replay_options[] = { 3196 OPT_UINTEGER('r', "repeat", &sched.replay_repeat, 3197 "repeat the workload replay N times (-1: infinite)"), 3198 OPT_PARENT(sched_options) 3199 }; 3200 const struct option map_options[] = { 3201 OPT_BOOLEAN(0, "compact", &sched.map.comp, 3202 "map output in compact mode"), 3203 OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids", 3204 "highlight given pids in map"), 3205 OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus", 3206 "highlight given CPUs in map"), 3207 OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", 3208 "display given CPUs in map"), 3209 OPT_PARENT(sched_options) 3210 }; 3211 const struct option timehist_options[] = { 3212 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 3213 "file", "vmlinux pathname"), 3214 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 3215 "file", "kallsyms pathname"), 3216 OPT_BOOLEAN('g', "call-graph", &sched.show_callchain, 3217 "Display call chains if present (default on)"), 3218 OPT_UINTEGER(0, "max-stack", &sched.max_stack, 3219 "Maximum number of functions to display backtrace."), 3220 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 3221 "Look for files with symbols relative to this directory"), 3222 OPT_BOOLEAN('s', "summary", &sched.summary_only, 3223 "Show only syscall summary with statistics"), 3224 OPT_BOOLEAN('S', "with-summary", &sched.summary, 3225 "Show all syscalls and summary with statistics"), 3226 OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), 3227 OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), 3228 OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), 3229 OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), 3230 OPT_STRING(0, "time", &sched.time_str, "str", 3231 "Time span for analysis (start,stop)"), 3232 OPT_PARENT(sched_options) 3233 }; 3234 3235 const char * const latency_usage[] = { 3236 "perf sched latency [<options>]", 3237 NULL 3238 }; 3239 const char * const replay_usage[] = { 3240 "perf sched replay [<options>]", 3241 NULL 3242 }; 3243 const char * const map_usage[] = { 3244 "perf sched map [<options>]", 3245 NULL 3246 }; 3247 const char * const timehist_usage[] = { 3248 "perf sched timehist [<options>]", 3249 NULL 3250 }; 3251 const char *const sched_subcommands[] = { "record", "latency", "map", 3252 "replay", "script", 3253 "timehist", NULL }; 3254 const char *sched_usage[] = { 3255 NULL, 3256 NULL 3257 }; 3258 struct trace_sched_handler lat_ops = { 3259 .wakeup_event = latency_wakeup_event, 3260 .switch_event = latency_switch_event, 3261 .runtime_event = latency_runtime_event, 3262 .migrate_task_event = latency_migrate_task_event, 3263 }; 3264 struct trace_sched_handler map_ops = { 3265 .switch_event = map_switch_event, 3266 }; 3267 struct trace_sched_handler replay_ops = { 3268 .wakeup_event = replay_wakeup_event, 3269 .switch_event = replay_switch_event, 3270 .fork_event = replay_fork_event, 3271 }; 3272 unsigned int i; 3273 3274 for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++) 3275 sched.curr_pid[i] = -1; 3276 3277 argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands, 3278 sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); 3279 if (!argc) 3280 usage_with_options(sched_usage, sched_options); 3281 3282 /* 3283 * Aliased to 'perf script' for now: 3284 */ 3285 if (!strcmp(argv[0], "script")) 3286 return cmd_script(argc, argv, prefix); 3287 3288 if (!strncmp(argv[0], "rec", 3)) { 3289 return __cmd_record(argc, argv); 3290 } else if (!strncmp(argv[0], "lat", 3)) { 3291 sched.tp_handler = &lat_ops; 3292 if (argc > 1) { 3293 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 3294 if (argc) 3295 usage_with_options(latency_usage, latency_options); 3296 } 3297 setup_sorting(&sched, latency_options, latency_usage); 3298 return perf_sched__lat(&sched); 3299 } else if (!strcmp(argv[0], "map")) { 3300 if (argc) { 3301 argc = parse_options(argc, argv, map_options, map_usage, 0); 3302 if (argc) 3303 usage_with_options(map_usage, map_options); 3304 } 3305 sched.tp_handler = &map_ops; 3306 setup_sorting(&sched, latency_options, latency_usage); 3307 return perf_sched__map(&sched); 3308 } else if (!strncmp(argv[0], "rep", 3)) { 3309 sched.tp_handler = &replay_ops; 3310 if (argc) { 3311 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 3312 if (argc) 3313 usage_with_options(replay_usage, replay_options); 3314 } 3315 return perf_sched__replay(&sched); 3316 } else if (!strcmp(argv[0], "timehist")) { 3317 if (argc) { 3318 argc = parse_options(argc, argv, timehist_options, 3319 timehist_usage, 0); 3320 if (argc) 3321 usage_with_options(timehist_usage, timehist_options); 3322 } 3323 if (sched.show_wakeups && sched.summary_only) { 3324 pr_err(" Error: -s and -w are mutually exclusive.\n"); 3325 parse_options_usage(timehist_usage, timehist_options, "s", true); 3326 parse_options_usage(NULL, timehist_options, "w", true); 3327 return -EINVAL; 3328 } 3329 3330 return perf_sched__timehist(&sched); 3331 } else { 3332 usage_with_options(sched_usage, sched_options); 3333 } 3334 3335 return 0; 3336 } 3337