1 #include "builtin.h" 2 #include "util/color.h" 3 #include "util/evlist.h" 4 #include "util/machine.h" 5 #include "util/thread.h" 6 #include "util/parse-options.h" 7 #include "util/thread_map.h" 8 #include "event-parse.h" 9 10 #include <libaudit.h> 11 #include <stdlib.h> 12 13 static struct syscall_fmt { 14 const char *name; 15 const char *alias; 16 bool errmsg; 17 bool timeout; 18 } syscall_fmts[] = { 19 { .name = "access", .errmsg = true, }, 20 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, 21 { .name = "fstat", .errmsg = true, .alias = "newfstat", }, 22 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, 23 { .name = "futex", .errmsg = true, }, 24 { .name = "open", .errmsg = true, }, 25 { .name = "poll", .errmsg = true, .timeout = true, }, 26 { .name = "ppoll", .errmsg = true, .timeout = true, }, 27 { .name = "read", .errmsg = true, }, 28 { .name = "recvfrom", .errmsg = true, }, 29 { .name = "select", .errmsg = true, .timeout = true, }, 30 { .name = "socket", .errmsg = true, }, 31 { .name = "stat", .errmsg = true, .alias = "newstat", }, 32 }; 33 34 static int syscall_fmt__cmp(const void *name, const void *fmtp) 35 { 36 const struct syscall_fmt *fmt = fmtp; 37 return strcmp(name, fmt->name); 38 } 39 40 static struct syscall_fmt *syscall_fmt__find(const char *name) 41 { 42 const int nmemb = ARRAY_SIZE(syscall_fmts); 43 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 44 } 45 46 struct syscall { 47 struct event_format *tp_format; 48 const char *name; 49 struct syscall_fmt *fmt; 50 }; 51 52 static size_t fprintf_duration(unsigned long t, FILE *fp) 53 { 54 double duration = (double)t / NSEC_PER_MSEC; 55 size_t printed = fprintf(fp, "("); 56 57 if (duration >= 1.0) 58 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 59 else if (duration >= 0.01) 60 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 61 else 62 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 63 return printed + fprintf(stdout, "): "); 64 } 65 66 struct thread_trace { 67 u64 entry_time; 68 u64 exit_time; 69 bool entry_pending; 70 unsigned long nr_events; 71 char *entry_str; 72 double runtime_ms; 73 }; 74 75 static struct thread_trace *thread_trace__new(void) 76 { 77 return zalloc(sizeof(struct thread_trace)); 78 } 79 80 static struct thread_trace *thread__trace(struct thread *thread) 81 { 82 struct thread_trace *ttrace; 83 84 if (thread == NULL) 85 goto fail; 86 87 if (thread->priv == NULL) 88 thread->priv = thread_trace__new(); 89 90 if (thread->priv == NULL) 91 goto fail; 92 93 ttrace = thread->priv; 94 ++ttrace->nr_events; 95 96 return ttrace; 97 fail: 98 color_fprintf(stdout, PERF_COLOR_RED, 99 "WARNING: not enough memory, dropping samples!\n"); 100 return NULL; 101 } 102 103 struct trace { 104 int audit_machine; 105 struct { 106 int max; 107 struct syscall *table; 108 } syscalls; 109 struct perf_record_opts opts; 110 struct machine host; 111 u64 base_time; 112 unsigned long nr_events; 113 bool sched; 114 bool multiple_threads; 115 double duration_filter; 116 double runtime_ms; 117 }; 118 119 static bool trace__filter_duration(struct trace *trace, double t) 120 { 121 return t < (trace->duration_filter * NSEC_PER_MSEC); 122 } 123 124 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 125 { 126 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 127 128 return fprintf(fp, "%10.3f ", ts); 129 } 130 131 static bool done = false; 132 133 static void sig_handler(int sig __maybe_unused) 134 { 135 done = true; 136 } 137 138 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 139 u64 duration, u64 tstamp, FILE *fp) 140 { 141 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 142 printed += fprintf_duration(duration, fp); 143 144 if (trace->multiple_threads) 145 printed += fprintf(fp, "%d ", thread->pid); 146 147 return printed; 148 } 149 150 static int trace__process_event(struct machine *machine, union perf_event *event) 151 { 152 int ret = 0; 153 154 switch (event->header.type) { 155 case PERF_RECORD_LOST: 156 color_fprintf(stdout, PERF_COLOR_RED, 157 "LOST %" PRIu64 " events!\n", event->lost.lost); 158 ret = machine__process_lost_event(machine, event); 159 default: 160 ret = machine__process_event(machine, event); 161 break; 162 } 163 164 return ret; 165 } 166 167 static int trace__tool_process(struct perf_tool *tool __maybe_unused, 168 union perf_event *event, 169 struct perf_sample *sample __maybe_unused, 170 struct machine *machine) 171 { 172 return trace__process_event(machine, event); 173 } 174 175 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 176 { 177 int err = symbol__init(); 178 179 if (err) 180 return err; 181 182 machine__init(&trace->host, "", HOST_KERNEL_ID); 183 machine__create_kernel_maps(&trace->host); 184 185 if (perf_target__has_task(&trace->opts.target)) { 186 err = perf_event__synthesize_thread_map(NULL, evlist->threads, 187 trace__tool_process, 188 &trace->host); 189 } else { 190 err = perf_event__synthesize_threads(NULL, trace__tool_process, 191 &trace->host); 192 } 193 194 if (err) 195 symbol__exit(); 196 197 return err; 198 } 199 200 static int trace__read_syscall_info(struct trace *trace, int id) 201 { 202 char tp_name[128]; 203 struct syscall *sc; 204 const char *name = audit_syscall_to_name(id, trace->audit_machine); 205 206 if (name == NULL) 207 return -1; 208 209 if (id > trace->syscalls.max) { 210 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 211 212 if (nsyscalls == NULL) 213 return -1; 214 215 if (trace->syscalls.max != -1) { 216 memset(nsyscalls + trace->syscalls.max + 1, 0, 217 (id - trace->syscalls.max) * sizeof(*sc)); 218 } else { 219 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 220 } 221 222 trace->syscalls.table = nsyscalls; 223 trace->syscalls.max = id; 224 } 225 226 sc = trace->syscalls.table + id; 227 sc->name = name; 228 sc->fmt = syscall_fmt__find(sc->name); 229 230 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 231 sc->tp_format = event_format__new("syscalls", tp_name); 232 233 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { 234 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 235 sc->tp_format = event_format__new("syscalls", tp_name); 236 } 237 238 return sc->tp_format != NULL ? 0 : -1; 239 } 240 241 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 242 unsigned long *args) 243 { 244 int i = 0; 245 size_t printed = 0; 246 247 if (sc->tp_format != NULL) { 248 struct format_field *field; 249 250 for (field = sc->tp_format->format.fields->next; field; field = field->next) { 251 printed += scnprintf(bf + printed, size - printed, 252 "%s%s: %ld", printed ? ", " : "", 253 field->name, args[i++]); 254 } 255 } else { 256 while (i < 6) { 257 printed += scnprintf(bf + printed, size - printed, 258 "%sarg%d: %ld", 259 printed ? ", " : "", i, args[i]); 260 ++i; 261 } 262 } 263 264 return printed; 265 } 266 267 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 268 struct perf_sample *sample); 269 270 static struct syscall *trace__syscall_info(struct trace *trace, 271 struct perf_evsel *evsel, 272 struct perf_sample *sample) 273 { 274 int id = perf_evsel__intval(evsel, sample, "id"); 275 276 if (id < 0) { 277 printf("Invalid syscall %d id, skipping...\n", id); 278 return NULL; 279 } 280 281 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 282 trace__read_syscall_info(trace, id)) 283 goto out_cant_read; 284 285 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 286 goto out_cant_read; 287 288 return &trace->syscalls.table[id]; 289 290 out_cant_read: 291 printf("Problems reading syscall %d", id); 292 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 293 printf("(%s)", trace->syscalls.table[id].name); 294 puts(" information"); 295 return NULL; 296 } 297 298 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 299 struct perf_sample *sample) 300 { 301 char *msg; 302 void *args; 303 size_t printed = 0; 304 struct thread *thread = machine__findnew_thread(&trace->host, sample->tid); 305 struct syscall *sc = trace__syscall_info(trace, evsel, sample); 306 struct thread_trace *ttrace = thread__trace(thread); 307 308 if (ttrace == NULL || sc == NULL) 309 return -1; 310 311 args = perf_evsel__rawptr(evsel, sample, "args"); 312 if (args == NULL) { 313 printf("Problems reading syscall arguments\n"); 314 return -1; 315 } 316 317 ttrace = thread->priv; 318 319 if (ttrace->entry_str == NULL) { 320 ttrace->entry_str = malloc(1024); 321 if (!ttrace->entry_str) 322 return -1; 323 } 324 325 ttrace->entry_time = sample->time; 326 msg = ttrace->entry_str; 327 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); 328 329 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args); 330 331 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) { 332 if (!trace->duration_filter) { 333 trace__fprintf_entry_head(trace, thread, 1, sample->time, stdout); 334 printf("%-70s\n", ttrace->entry_str); 335 } 336 } else 337 ttrace->entry_pending = true; 338 339 return 0; 340 } 341 342 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 343 struct perf_sample *sample) 344 { 345 int ret; 346 u64 duration = 0; 347 struct thread *thread = machine__findnew_thread(&trace->host, sample->tid); 348 struct thread_trace *ttrace = thread__trace(thread); 349 struct syscall *sc = trace__syscall_info(trace, evsel, sample); 350 351 if (ttrace == NULL || sc == NULL) 352 return -1; 353 354 ret = perf_evsel__intval(evsel, sample, "ret"); 355 356 ttrace = thread->priv; 357 358 ttrace->exit_time = sample->time; 359 360 if (ttrace->entry_time) { 361 duration = sample->time - ttrace->entry_time; 362 if (trace__filter_duration(trace, duration)) 363 goto out; 364 } else if (trace->duration_filter) 365 goto out; 366 367 trace__fprintf_entry_head(trace, thread, duration, sample->time, stdout); 368 369 if (ttrace->entry_pending) { 370 printf("%-70s", ttrace->entry_str); 371 } else { 372 printf(" ... ["); 373 color_fprintf(stdout, PERF_COLOR_YELLOW, "continued"); 374 printf("]: %s()", sc->name); 375 } 376 377 if (ret < 0 && sc->fmt && sc->fmt->errmsg) { 378 char bf[256]; 379 const char *emsg = strerror_r(-ret, bf, sizeof(bf)), 380 *e = audit_errno_to_name(-ret); 381 382 printf(") = -1 %s %s", e, emsg); 383 } else if (ret == 0 && sc->fmt && sc->fmt->timeout) 384 printf(") = 0 Timeout"); 385 else 386 printf(") = %d", ret); 387 388 putchar('\n'); 389 out: 390 ttrace->entry_pending = false; 391 392 return 0; 393 } 394 395 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 396 struct perf_sample *sample) 397 { 398 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 399 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 400 struct thread *thread = machine__findnew_thread(&trace->host, sample->tid); 401 struct thread_trace *ttrace = thread__trace(thread); 402 403 if (ttrace == NULL) 404 goto out_dump; 405 406 ttrace->runtime_ms += runtime_ms; 407 trace->runtime_ms += runtime_ms; 408 return 0; 409 410 out_dump: 411 printf("%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 412 evsel->name, 413 perf_evsel__strval(evsel, sample, "comm"), 414 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 415 runtime, 416 perf_evsel__intval(evsel, sample, "vruntime")); 417 return 0; 418 } 419 420 static int trace__run(struct trace *trace, int argc, const char **argv) 421 { 422 struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); 423 struct perf_evsel *evsel; 424 int err = -1, i; 425 unsigned long before; 426 const bool forks = argc > 0; 427 428 if (evlist == NULL) { 429 printf("Not enough memory to run!\n"); 430 goto out; 431 } 432 433 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) || 434 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) { 435 printf("Couldn't read the raw_syscalls tracepoints information!\n"); 436 goto out_delete_evlist; 437 } 438 439 if (trace->sched && 440 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 441 trace__sched_stat_runtime)) { 442 printf("Couldn't read the sched_stat_runtime tracepoint information!\n"); 443 goto out_delete_evlist; 444 } 445 446 err = perf_evlist__create_maps(evlist, &trace->opts.target); 447 if (err < 0) { 448 printf("Problems parsing the target to trace, check your options!\n"); 449 goto out_delete_evlist; 450 } 451 452 err = trace__symbols_init(trace, evlist); 453 if (err < 0) { 454 printf("Problems initializing symbol libraries!\n"); 455 goto out_delete_evlist; 456 } 457 458 perf_evlist__config_attrs(evlist, &trace->opts); 459 460 signal(SIGCHLD, sig_handler); 461 signal(SIGINT, sig_handler); 462 463 if (forks) { 464 err = perf_evlist__prepare_workload(evlist, &trace->opts, argv); 465 if (err < 0) { 466 printf("Couldn't run the workload!\n"); 467 goto out_delete_evlist; 468 } 469 } 470 471 err = perf_evlist__open(evlist); 472 if (err < 0) { 473 printf("Couldn't create the events: %s\n", strerror(errno)); 474 goto out_delete_evlist; 475 } 476 477 err = perf_evlist__mmap(evlist, UINT_MAX, false); 478 if (err < 0) { 479 printf("Couldn't mmap the events: %s\n", strerror(errno)); 480 goto out_delete_evlist; 481 } 482 483 perf_evlist__enable(evlist); 484 485 if (forks) 486 perf_evlist__start_workload(evlist); 487 488 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; 489 again: 490 before = trace->nr_events; 491 492 for (i = 0; i < evlist->nr_mmaps; i++) { 493 union perf_event *event; 494 495 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 496 const u32 type = event->header.type; 497 tracepoint_handler handler; 498 struct perf_sample sample; 499 500 ++trace->nr_events; 501 502 err = perf_evlist__parse_sample(evlist, event, &sample); 503 if (err) { 504 printf("Can't parse sample, err = %d, skipping...\n", err); 505 continue; 506 } 507 508 if (trace->base_time == 0) 509 trace->base_time = sample.time; 510 511 if (type != PERF_RECORD_SAMPLE) { 512 trace__process_event(&trace->host, event); 513 continue; 514 } 515 516 evsel = perf_evlist__id2evsel(evlist, sample.id); 517 if (evsel == NULL) { 518 printf("Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); 519 continue; 520 } 521 522 if (sample.raw_data == NULL) { 523 printf("%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 524 perf_evsel__name(evsel), sample.tid, 525 sample.cpu, sample.raw_size); 526 continue; 527 } 528 529 if (sample.raw_data == NULL) { 530 printf("%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 531 perf_evsel__name(evsel), sample.tid, 532 sample.cpu, sample.raw_size); 533 continue; 534 } 535 536 handler = evsel->handler.func; 537 handler(trace, evsel, &sample); 538 } 539 } 540 541 if (trace->nr_events == before) { 542 if (done) 543 goto out_delete_evlist; 544 545 poll(evlist->pollfd, evlist->nr_fds, -1); 546 } 547 548 if (done) 549 perf_evlist__disable(evlist); 550 551 goto again; 552 553 out_delete_evlist: 554 perf_evlist__delete(evlist); 555 out: 556 return err; 557 } 558 559 static size_t trace__fprintf_threads_header(FILE *fp) 560 { 561 size_t printed; 562 563 printed = fprintf(fp, "\n _____________________________________________________________________\n"); 564 printed += fprintf(fp," __) Summary of events (__\n\n"); 565 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n"); 566 printed += fprintf(fp," _____________________________________________________________________\n\n"); 567 568 return printed; 569 } 570 571 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 572 { 573 size_t printed = trace__fprintf_threads_header(fp); 574 struct rb_node *nd; 575 576 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) { 577 struct thread *thread = rb_entry(nd, struct thread, rb_node); 578 struct thread_trace *ttrace = thread->priv; 579 const char *color; 580 double ratio; 581 582 if (ttrace == NULL) 583 continue; 584 585 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 586 587 color = PERF_COLOR_NORMAL; 588 if (ratio > 50.0) 589 color = PERF_COLOR_RED; 590 else if (ratio > 25.0) 591 color = PERF_COLOR_GREEN; 592 else if (ratio > 5.0) 593 color = PERF_COLOR_YELLOW; 594 595 printed += color_fprintf(fp, color, "%20s", thread->comm); 596 printed += fprintf(fp, " - %-5d :%11lu [", thread->pid, ttrace->nr_events); 597 printed += color_fprintf(fp, color, "%5.1f%%", ratio); 598 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms); 599 } 600 601 return printed; 602 } 603 604 static int trace__set_duration(const struct option *opt, const char *str, 605 int unset __maybe_unused) 606 { 607 struct trace *trace = opt->value; 608 609 trace->duration_filter = atof(str); 610 return 0; 611 } 612 613 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) 614 { 615 const char * const trace_usage[] = { 616 "perf trace [<options>] [<command>]", 617 "perf trace [<options>] -- <command> [<options>]", 618 NULL 619 }; 620 struct trace trace = { 621 .audit_machine = audit_detect_machine(), 622 .syscalls = { 623 . max = -1, 624 }, 625 .opts = { 626 .target = { 627 .uid = UINT_MAX, 628 .uses_mmap = true, 629 }, 630 .user_freq = UINT_MAX, 631 .user_interval = ULLONG_MAX, 632 .no_delay = true, 633 .mmap_pages = 1024, 634 }, 635 }; 636 const struct option trace_options[] = { 637 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 638 "trace events on existing process id"), 639 OPT_STRING(0, "tid", &trace.opts.target.tid, "tid", 640 "trace events on existing thread id"), 641 OPT_BOOLEAN(0, "all-cpus", &trace.opts.target.system_wide, 642 "system-wide collection from all CPUs"), 643 OPT_STRING(0, "cpu", &trace.opts.target.cpu_list, "cpu", 644 "list of cpus to monitor"), 645 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 646 "child tasks do not inherit counters"), 647 OPT_UINTEGER(0, "mmap-pages", &trace.opts.mmap_pages, 648 "number of mmap data pages"), 649 OPT_STRING(0, "uid", &trace.opts.target.uid_str, "user", 650 "user to profile"), 651 OPT_CALLBACK(0, "duration", &trace, "float", 652 "show only events with duration > N.M ms", 653 trace__set_duration), 654 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 655 OPT_END() 656 }; 657 int err; 658 char bf[BUFSIZ]; 659 660 argc = parse_options(argc, argv, trace_options, trace_usage, 0); 661 662 err = perf_target__validate(&trace.opts.target); 663 if (err) { 664 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 665 printf("%s", bf); 666 return err; 667 } 668 669 err = perf_target__parse_uid(&trace.opts.target); 670 if (err) { 671 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 672 printf("%s", bf); 673 return err; 674 } 675 676 if (!argc && perf_target__none(&trace.opts.target)) 677 trace.opts.target.system_wide = true; 678 679 err = trace__run(&trace, argc, argv); 680 681 if (trace.sched && !err) 682 trace__fprintf_thread_summary(&trace, stdout); 683 684 return err; 685 } 686