1 #include <traceevent/event-parse.h> 2 #include "builtin.h" 3 #include "util/color.h" 4 #include "util/debug.h" 5 #include "util/evlist.h" 6 #include "util/machine.h" 7 #include "util/session.h" 8 #include "util/thread.h" 9 #include "util/parse-options.h" 10 #include "util/strlist.h" 11 #include "util/intlist.h" 12 #include "util/thread_map.h" 13 14 #include <libaudit.h> 15 #include <stdlib.h> 16 #include <sys/mman.h> 17 #include <linux/futex.h> 18 19 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, 20 unsigned long arg, 21 u8 arg_idx __maybe_unused, 22 u8 *arg_mask __maybe_unused) 23 { 24 return scnprintf(bf, size, "%#lx", arg); 25 } 26 27 #define SCA_HEX syscall_arg__scnprintf_hex 28 29 static size_t syscall_arg__scnprintf_whence(char *bf, size_t size, 30 unsigned long arg, 31 u8 arg_idx __maybe_unused, 32 u8 *arg_mask __maybe_unused) 33 { 34 int whence = arg; 35 36 switch (whence) { 37 #define P_WHENCE(n) case SEEK_##n: return scnprintf(bf, size, #n) 38 P_WHENCE(SET); 39 P_WHENCE(CUR); 40 P_WHENCE(END); 41 #ifdef SEEK_DATA 42 P_WHENCE(DATA); 43 #endif 44 #ifdef SEEK_HOLE 45 P_WHENCE(HOLE); 46 #endif 47 #undef P_WHENCE 48 default: break; 49 } 50 51 return scnprintf(bf, size, "%#x", whence); 52 } 53 54 #define SCA_WHENCE syscall_arg__scnprintf_whence 55 56 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, 57 unsigned long arg, 58 u8 arg_idx __maybe_unused, 59 u8 *arg_mask __maybe_unused) 60 { 61 int printed = 0, prot = arg; 62 63 if (prot == PROT_NONE) 64 return scnprintf(bf, size, "NONE"); 65 #define P_MMAP_PROT(n) \ 66 if (prot & PROT_##n) { \ 67 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 68 prot &= ~PROT_##n; \ 69 } 70 71 P_MMAP_PROT(EXEC); 72 P_MMAP_PROT(READ); 73 P_MMAP_PROT(WRITE); 74 #ifdef PROT_SEM 75 P_MMAP_PROT(SEM); 76 #endif 77 P_MMAP_PROT(GROWSDOWN); 78 P_MMAP_PROT(GROWSUP); 79 #undef P_MMAP_PROT 80 81 if (prot) 82 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); 83 84 return printed; 85 } 86 87 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot 88 89 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, 90 unsigned long arg, u8 arg_idx __maybe_unused, 91 u8 *arg_mask __maybe_unused) 92 { 93 int printed = 0, flags = arg; 94 95 #define P_MMAP_FLAG(n) \ 96 if (flags & MAP_##n) { \ 97 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 98 flags &= ~MAP_##n; \ 99 } 100 101 P_MMAP_FLAG(SHARED); 102 P_MMAP_FLAG(PRIVATE); 103 #ifdef MAP_32BIT 104 P_MMAP_FLAG(32BIT); 105 #endif 106 P_MMAP_FLAG(ANONYMOUS); 107 P_MMAP_FLAG(DENYWRITE); 108 P_MMAP_FLAG(EXECUTABLE); 109 P_MMAP_FLAG(FILE); 110 P_MMAP_FLAG(FIXED); 111 P_MMAP_FLAG(GROWSDOWN); 112 #ifdef MAP_HUGETLB 113 P_MMAP_FLAG(HUGETLB); 114 #endif 115 P_MMAP_FLAG(LOCKED); 116 P_MMAP_FLAG(NONBLOCK); 117 P_MMAP_FLAG(NORESERVE); 118 P_MMAP_FLAG(POPULATE); 119 P_MMAP_FLAG(STACK); 120 #ifdef MAP_UNINITIALIZED 121 P_MMAP_FLAG(UNINITIALIZED); 122 #endif 123 #undef P_MMAP_FLAG 124 125 if (flags) 126 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 127 128 return printed; 129 } 130 131 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags 132 133 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, 134 unsigned long arg, u8 arg_idx __maybe_unused, 135 u8 *arg_mask __maybe_unused) 136 { 137 int behavior = arg; 138 139 switch (behavior) { 140 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) 141 P_MADV_BHV(NORMAL); 142 P_MADV_BHV(RANDOM); 143 P_MADV_BHV(SEQUENTIAL); 144 P_MADV_BHV(WILLNEED); 145 P_MADV_BHV(DONTNEED); 146 P_MADV_BHV(REMOVE); 147 P_MADV_BHV(DONTFORK); 148 P_MADV_BHV(DOFORK); 149 P_MADV_BHV(HWPOISON); 150 #ifdef MADV_SOFT_OFFLINE 151 P_MADV_BHV(SOFT_OFFLINE); 152 #endif 153 P_MADV_BHV(MERGEABLE); 154 P_MADV_BHV(UNMERGEABLE); 155 #ifdef MADV_HUGEPAGE 156 P_MADV_BHV(HUGEPAGE); 157 #endif 158 #ifdef MADV_NOHUGEPAGE 159 P_MADV_BHV(NOHUGEPAGE); 160 #endif 161 #ifdef MADV_DONTDUMP 162 P_MADV_BHV(DONTDUMP); 163 #endif 164 #ifdef MADV_DODUMP 165 P_MADV_BHV(DODUMP); 166 #endif 167 #undef P_MADV_PHV 168 default: break; 169 } 170 171 return scnprintf(bf, size, "%#x", behavior); 172 } 173 174 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior 175 176 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned long arg, 177 u8 arg_idx __maybe_unused, u8 *arg_mask) 178 { 179 enum syscall_futex_args { 180 SCF_UADDR = (1 << 0), 181 SCF_OP = (1 << 1), 182 SCF_VAL = (1 << 2), 183 SCF_TIMEOUT = (1 << 3), 184 SCF_UADDR2 = (1 << 4), 185 SCF_VAL3 = (1 << 5), 186 }; 187 int op = arg; 188 int cmd = op & FUTEX_CMD_MASK; 189 size_t printed = 0; 190 191 switch (cmd) { 192 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); 193 P_FUTEX_OP(WAIT); *arg_mask |= SCF_VAL3|SCF_UADDR2; break; 194 P_FUTEX_OP(WAKE); *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 195 P_FUTEX_OP(FD); *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 196 P_FUTEX_OP(REQUEUE); *arg_mask |= SCF_VAL3|SCF_TIMEOUT; break; 197 P_FUTEX_OP(CMP_REQUEUE); *arg_mask |= SCF_TIMEOUT; break; 198 P_FUTEX_OP(CMP_REQUEUE_PI); *arg_mask |= SCF_TIMEOUT; break; 199 P_FUTEX_OP(WAKE_OP); break; 200 P_FUTEX_OP(LOCK_PI); *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 201 P_FUTEX_OP(UNLOCK_PI); *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 202 P_FUTEX_OP(TRYLOCK_PI); *arg_mask |= SCF_VAL3|SCF_UADDR2; break; 203 P_FUTEX_OP(WAIT_BITSET); *arg_mask |= SCF_UADDR2; break; 204 P_FUTEX_OP(WAKE_BITSET); *arg_mask |= SCF_UADDR2; break; 205 P_FUTEX_OP(WAIT_REQUEUE_PI); break; 206 default: printed = scnprintf(bf, size, "%#x", cmd); break; 207 } 208 209 if (op & FUTEX_PRIVATE_FLAG) 210 printed += scnprintf(bf + printed, size - printed, "|PRIV"); 211 212 if (op & FUTEX_CLOCK_REALTIME) 213 printed += scnprintf(bf + printed, size - printed, "|CLKRT"); 214 215 return printed; 216 } 217 218 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op 219 220 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, 221 unsigned long arg, 222 u8 arg_idx, u8 *arg_mask) 223 { 224 int printed = 0, flags = arg; 225 226 if (!(flags & O_CREAT)) 227 *arg_mask |= 1 << (arg_idx + 1); /* Mask the mode parm */ 228 229 if (flags == 0) 230 return scnprintf(bf, size, "RDONLY"); 231 #define P_FLAG(n) \ 232 if (flags & O_##n) { \ 233 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 234 flags &= ~O_##n; \ 235 } 236 237 P_FLAG(APPEND); 238 P_FLAG(ASYNC); 239 P_FLAG(CLOEXEC); 240 P_FLAG(CREAT); 241 P_FLAG(DIRECT); 242 P_FLAG(DIRECTORY); 243 P_FLAG(EXCL); 244 P_FLAG(LARGEFILE); 245 P_FLAG(NOATIME); 246 P_FLAG(NOCTTY); 247 #ifdef O_NONBLOCK 248 P_FLAG(NONBLOCK); 249 #elif O_NDELAY 250 P_FLAG(NDELAY); 251 #endif 252 #ifdef O_PATH 253 P_FLAG(PATH); 254 #endif 255 P_FLAG(RDWR); 256 #ifdef O_DSYNC 257 if ((flags & O_SYNC) == O_SYNC) 258 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); 259 else { 260 P_FLAG(DSYNC); 261 } 262 #else 263 P_FLAG(SYNC); 264 #endif 265 P_FLAG(TRUNC); 266 P_FLAG(WRONLY); 267 #undef P_FLAG 268 269 if (flags) 270 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 271 272 return printed; 273 } 274 275 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags 276 277 static struct syscall_fmt { 278 const char *name; 279 const char *alias; 280 size_t (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg, u8 arg_idx, u8 *arg_mask); 281 bool errmsg; 282 bool timeout; 283 bool hexret; 284 } syscall_fmts[] = { 285 { .name = "access", .errmsg = true, }, 286 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, 287 { .name = "brk", .hexret = true, 288 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, 289 { .name = "mmap", .hexret = true, }, 290 { .name = "connect", .errmsg = true, }, 291 { .name = "fstat", .errmsg = true, .alias = "newfstat", }, 292 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, 293 { .name = "futex", .errmsg = true, 294 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, 295 { .name = "ioctl", .errmsg = true, 296 .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, }, 297 { .name = "lseek", .errmsg = true, 298 .arg_scnprintf = { [2] = SCA_WHENCE, /* whence */ }, }, 299 { .name = "lstat", .errmsg = true, .alias = "newlstat", }, 300 { .name = "madvise", .errmsg = true, 301 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 302 [2] = SCA_MADV_BHV, /* behavior */ }, }, 303 { .name = "mmap", .hexret = true, 304 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 305 [2] = SCA_MMAP_PROT, /* prot */ 306 [3] = SCA_MMAP_FLAGS, /* flags */ }, }, 307 { .name = "mprotect", .errmsg = true, 308 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 309 [2] = SCA_MMAP_PROT, /* prot */ }, }, 310 { .name = "mremap", .hexret = true, 311 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 312 [4] = SCA_HEX, /* new_addr */ }, }, 313 { .name = "munmap", .errmsg = true, 314 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 315 { .name = "open", .errmsg = true, 316 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, 317 { .name = "open_by_handle_at", .errmsg = true, 318 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 319 { .name = "openat", .errmsg = true, 320 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 321 { .name = "poll", .errmsg = true, .timeout = true, }, 322 { .name = "ppoll", .errmsg = true, .timeout = true, }, 323 { .name = "pread", .errmsg = true, .alias = "pread64", }, 324 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, 325 { .name = "read", .errmsg = true, }, 326 { .name = "recvfrom", .errmsg = true, }, 327 { .name = "select", .errmsg = true, .timeout = true, }, 328 { .name = "socket", .errmsg = true, }, 329 { .name = "stat", .errmsg = true, .alias = "newstat", }, 330 { .name = "uname", .errmsg = true, .alias = "newuname", }, 331 }; 332 333 static int syscall_fmt__cmp(const void *name, const void *fmtp) 334 { 335 const struct syscall_fmt *fmt = fmtp; 336 return strcmp(name, fmt->name); 337 } 338 339 static struct syscall_fmt *syscall_fmt__find(const char *name) 340 { 341 const int nmemb = ARRAY_SIZE(syscall_fmts); 342 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 343 } 344 345 struct syscall { 346 struct event_format *tp_format; 347 const char *name; 348 bool filtered; 349 struct syscall_fmt *fmt; 350 size_t (**arg_scnprintf)(char *bf, size_t size, 351 unsigned long arg, u8 arg_idx, u8 *args_mask); 352 }; 353 354 static size_t fprintf_duration(unsigned long t, FILE *fp) 355 { 356 double duration = (double)t / NSEC_PER_MSEC; 357 size_t printed = fprintf(fp, "("); 358 359 if (duration >= 1.0) 360 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 361 else if (duration >= 0.01) 362 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 363 else 364 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 365 return printed + fprintf(fp, "): "); 366 } 367 368 struct thread_trace { 369 u64 entry_time; 370 u64 exit_time; 371 bool entry_pending; 372 unsigned long nr_events; 373 char *entry_str; 374 double runtime_ms; 375 }; 376 377 static struct thread_trace *thread_trace__new(void) 378 { 379 return zalloc(sizeof(struct thread_trace)); 380 } 381 382 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 383 { 384 struct thread_trace *ttrace; 385 386 if (thread == NULL) 387 goto fail; 388 389 if (thread->priv == NULL) 390 thread->priv = thread_trace__new(); 391 392 if (thread->priv == NULL) 393 goto fail; 394 395 ttrace = thread->priv; 396 ++ttrace->nr_events; 397 398 return ttrace; 399 fail: 400 color_fprintf(fp, PERF_COLOR_RED, 401 "WARNING: not enough memory, dropping samples!\n"); 402 return NULL; 403 } 404 405 struct trace { 406 struct perf_tool tool; 407 int audit_machine; 408 struct { 409 int max; 410 struct syscall *table; 411 } syscalls; 412 struct perf_record_opts opts; 413 struct machine host; 414 u64 base_time; 415 FILE *output; 416 unsigned long nr_events; 417 struct strlist *ev_qualifier; 418 bool not_ev_qualifier; 419 struct intlist *tid_list; 420 struct intlist *pid_list; 421 bool sched; 422 bool multiple_threads; 423 double duration_filter; 424 double runtime_ms; 425 }; 426 427 static bool trace__filter_duration(struct trace *trace, double t) 428 { 429 return t < (trace->duration_filter * NSEC_PER_MSEC); 430 } 431 432 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 433 { 434 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 435 436 return fprintf(fp, "%10.3f ", ts); 437 } 438 439 static bool done = false; 440 441 static void sig_handler(int sig __maybe_unused) 442 { 443 done = true; 444 } 445 446 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 447 u64 duration, u64 tstamp, FILE *fp) 448 { 449 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 450 printed += fprintf_duration(duration, fp); 451 452 if (trace->multiple_threads) 453 printed += fprintf(fp, "%d ", thread->tid); 454 455 return printed; 456 } 457 458 static int trace__process_event(struct trace *trace, struct machine *machine, 459 union perf_event *event) 460 { 461 int ret = 0; 462 463 switch (event->header.type) { 464 case PERF_RECORD_LOST: 465 color_fprintf(trace->output, PERF_COLOR_RED, 466 "LOST %" PRIu64 " events!\n", event->lost.lost); 467 ret = machine__process_lost_event(machine, event); 468 default: 469 ret = machine__process_event(machine, event); 470 break; 471 } 472 473 return ret; 474 } 475 476 static int trace__tool_process(struct perf_tool *tool, 477 union perf_event *event, 478 struct perf_sample *sample __maybe_unused, 479 struct machine *machine) 480 { 481 struct trace *trace = container_of(tool, struct trace, tool); 482 return trace__process_event(trace, machine, event); 483 } 484 485 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 486 { 487 int err = symbol__init(); 488 489 if (err) 490 return err; 491 492 machine__init(&trace->host, "", HOST_KERNEL_ID); 493 machine__create_kernel_maps(&trace->host); 494 495 if (perf_target__has_task(&trace->opts.target)) { 496 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads, 497 trace__tool_process, 498 &trace->host); 499 } else { 500 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process, 501 &trace->host); 502 } 503 504 if (err) 505 symbol__exit(); 506 507 return err; 508 } 509 510 static int syscall__set_arg_fmts(struct syscall *sc) 511 { 512 struct format_field *field; 513 int idx = 0; 514 515 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *)); 516 if (sc->arg_scnprintf == NULL) 517 return -1; 518 519 for (field = sc->tp_format->format.fields->next; field; field = field->next) { 520 if (sc->fmt && sc->fmt->arg_scnprintf[idx]) 521 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; 522 else if (field->flags & FIELD_IS_POINTER) 523 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; 524 ++idx; 525 } 526 527 return 0; 528 } 529 530 static int trace__read_syscall_info(struct trace *trace, int id) 531 { 532 char tp_name[128]; 533 struct syscall *sc; 534 const char *name = audit_syscall_to_name(id, trace->audit_machine); 535 536 if (name == NULL) 537 return -1; 538 539 if (id > trace->syscalls.max) { 540 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 541 542 if (nsyscalls == NULL) 543 return -1; 544 545 if (trace->syscalls.max != -1) { 546 memset(nsyscalls + trace->syscalls.max + 1, 0, 547 (id - trace->syscalls.max) * sizeof(*sc)); 548 } else { 549 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 550 } 551 552 trace->syscalls.table = nsyscalls; 553 trace->syscalls.max = id; 554 } 555 556 sc = trace->syscalls.table + id; 557 sc->name = name; 558 559 if (trace->ev_qualifier) { 560 bool in = strlist__find(trace->ev_qualifier, name) != NULL; 561 562 if (!(in ^ trace->not_ev_qualifier)) { 563 sc->filtered = true; 564 /* 565 * No need to do read tracepoint information since this will be 566 * filtered out. 567 */ 568 return 0; 569 } 570 } 571 572 sc->fmt = syscall_fmt__find(sc->name); 573 574 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 575 sc->tp_format = event_format__new("syscalls", tp_name); 576 577 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { 578 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 579 sc->tp_format = event_format__new("syscalls", tp_name); 580 } 581 582 if (sc->tp_format == NULL) 583 return -1; 584 585 return syscall__set_arg_fmts(sc); 586 } 587 588 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 589 unsigned long *args) 590 { 591 int i = 0; 592 size_t printed = 0; 593 594 if (sc->tp_format != NULL) { 595 struct format_field *field; 596 u8 mask = 0, bit = 1; 597 598 for (field = sc->tp_format->format.fields->next; field; 599 field = field->next, ++i, bit <<= 1) { 600 if (mask & bit) 601 continue; 602 603 printed += scnprintf(bf + printed, size - printed, 604 "%s%s: ", printed ? ", " : "", field->name); 605 606 if (sc->arg_scnprintf && sc->arg_scnprintf[i]) { 607 printed += sc->arg_scnprintf[i](bf + printed, size - printed, 608 args[i], i, &mask); 609 } else { 610 printed += scnprintf(bf + printed, size - printed, 611 "%ld", args[i]); 612 } 613 } 614 } else { 615 while (i < 6) { 616 printed += scnprintf(bf + printed, size - printed, 617 "%sarg%d: %ld", 618 printed ? ", " : "", i, args[i]); 619 ++i; 620 } 621 } 622 623 return printed; 624 } 625 626 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 627 struct perf_sample *sample); 628 629 static struct syscall *trace__syscall_info(struct trace *trace, 630 struct perf_evsel *evsel, 631 struct perf_sample *sample) 632 { 633 int id = perf_evsel__intval(evsel, sample, "id"); 634 635 if (id < 0) { 636 637 /* 638 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 639 * before that, leaving at a higher verbosity level till that is 640 * explained. Reproduced with plain ftrace with: 641 * 642 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 643 * grep "NR -1 " /t/trace_pipe 644 * 645 * After generating some load on the machine. 646 */ 647 if (verbose > 1) { 648 static u64 n; 649 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 650 id, perf_evsel__name(evsel), ++n); 651 } 652 return NULL; 653 } 654 655 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 656 trace__read_syscall_info(trace, id)) 657 goto out_cant_read; 658 659 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 660 goto out_cant_read; 661 662 return &trace->syscalls.table[id]; 663 664 out_cant_read: 665 if (verbose) { 666 fprintf(trace->output, "Problems reading syscall %d", id); 667 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 668 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 669 fputs(" information\n", trace->output); 670 } 671 return NULL; 672 } 673 674 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 675 struct perf_sample *sample) 676 { 677 char *msg; 678 void *args; 679 size_t printed = 0; 680 struct thread *thread; 681 struct syscall *sc = trace__syscall_info(trace, evsel, sample); 682 struct thread_trace *ttrace; 683 684 if (sc == NULL) 685 return -1; 686 687 if (sc->filtered) 688 return 0; 689 690 thread = machine__findnew_thread(&trace->host, sample->pid, 691 sample->tid); 692 ttrace = thread__trace(thread, trace->output); 693 if (ttrace == NULL) 694 return -1; 695 696 args = perf_evsel__rawptr(evsel, sample, "args"); 697 if (args == NULL) { 698 fprintf(trace->output, "Problems reading syscall arguments\n"); 699 return -1; 700 } 701 702 ttrace = thread->priv; 703 704 if (ttrace->entry_str == NULL) { 705 ttrace->entry_str = malloc(1024); 706 if (!ttrace->entry_str) 707 return -1; 708 } 709 710 ttrace->entry_time = sample->time; 711 msg = ttrace->entry_str; 712 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); 713 714 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args); 715 716 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) { 717 if (!trace->duration_filter) { 718 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); 719 fprintf(trace->output, "%-70s\n", ttrace->entry_str); 720 } 721 } else 722 ttrace->entry_pending = true; 723 724 return 0; 725 } 726 727 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 728 struct perf_sample *sample) 729 { 730 int ret; 731 u64 duration = 0; 732 struct thread *thread; 733 struct syscall *sc = trace__syscall_info(trace, evsel, sample); 734 struct thread_trace *ttrace; 735 736 if (sc == NULL) 737 return -1; 738 739 if (sc->filtered) 740 return 0; 741 742 thread = machine__findnew_thread(&trace->host, sample->pid, 743 sample->tid); 744 ttrace = thread__trace(thread, trace->output); 745 if (ttrace == NULL) 746 return -1; 747 748 ret = perf_evsel__intval(evsel, sample, "ret"); 749 750 ttrace = thread->priv; 751 752 ttrace->exit_time = sample->time; 753 754 if (ttrace->entry_time) { 755 duration = sample->time - ttrace->entry_time; 756 if (trace__filter_duration(trace, duration)) 757 goto out; 758 } else if (trace->duration_filter) 759 goto out; 760 761 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); 762 763 if (ttrace->entry_pending) { 764 fprintf(trace->output, "%-70s", ttrace->entry_str); 765 } else { 766 fprintf(trace->output, " ... ["); 767 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 768 fprintf(trace->output, "]: %s()", sc->name); 769 } 770 771 if (sc->fmt == NULL) { 772 signed_print: 773 fprintf(trace->output, ") = %d", ret); 774 } else if (ret < 0 && sc->fmt->errmsg) { 775 char bf[256]; 776 const char *emsg = strerror_r(-ret, bf, sizeof(bf)), 777 *e = audit_errno_to_name(-ret); 778 779 fprintf(trace->output, ") = -1 %s %s", e, emsg); 780 } else if (ret == 0 && sc->fmt->timeout) 781 fprintf(trace->output, ") = 0 Timeout"); 782 else if (sc->fmt->hexret) 783 fprintf(trace->output, ") = %#x", ret); 784 else 785 goto signed_print; 786 787 fputc('\n', trace->output); 788 out: 789 ttrace->entry_pending = false; 790 791 return 0; 792 } 793 794 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 795 struct perf_sample *sample) 796 { 797 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 798 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 799 struct thread *thread = machine__findnew_thread(&trace->host, 800 sample->pid, 801 sample->tid); 802 struct thread_trace *ttrace = thread__trace(thread, trace->output); 803 804 if (ttrace == NULL) 805 goto out_dump; 806 807 ttrace->runtime_ms += runtime_ms; 808 trace->runtime_ms += runtime_ms; 809 return 0; 810 811 out_dump: 812 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 813 evsel->name, 814 perf_evsel__strval(evsel, sample, "comm"), 815 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 816 runtime, 817 perf_evsel__intval(evsel, sample, "vruntime")); 818 return 0; 819 } 820 821 static bool skip_sample(struct trace *trace, struct perf_sample *sample) 822 { 823 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || 824 (trace->tid_list && intlist__find(trace->tid_list, sample->tid))) 825 return false; 826 827 if (trace->pid_list || trace->tid_list) 828 return true; 829 830 return false; 831 } 832 833 static int trace__process_sample(struct perf_tool *tool, 834 union perf_event *event __maybe_unused, 835 struct perf_sample *sample, 836 struct perf_evsel *evsel, 837 struct machine *machine __maybe_unused) 838 { 839 struct trace *trace = container_of(tool, struct trace, tool); 840 int err = 0; 841 842 tracepoint_handler handler = evsel->handler.func; 843 844 if (skip_sample(trace, sample)) 845 return 0; 846 847 if (trace->base_time == 0) 848 trace->base_time = sample->time; 849 850 if (handler) 851 handler(trace, evsel, sample); 852 853 return err; 854 } 855 856 static bool 857 perf_session__has_tp(struct perf_session *session, const char *name) 858 { 859 struct perf_evsel *evsel; 860 861 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name); 862 863 return evsel != NULL; 864 } 865 866 static int parse_target_str(struct trace *trace) 867 { 868 if (trace->opts.target.pid) { 869 trace->pid_list = intlist__new(trace->opts.target.pid); 870 if (trace->pid_list == NULL) { 871 pr_err("Error parsing process id string\n"); 872 return -EINVAL; 873 } 874 } 875 876 if (trace->opts.target.tid) { 877 trace->tid_list = intlist__new(trace->opts.target.tid); 878 if (trace->tid_list == NULL) { 879 pr_err("Error parsing thread id string\n"); 880 return -EINVAL; 881 } 882 } 883 884 return 0; 885 } 886 887 static int trace__run(struct trace *trace, int argc, const char **argv) 888 { 889 struct perf_evlist *evlist = perf_evlist__new(); 890 struct perf_evsel *evsel; 891 int err = -1, i; 892 unsigned long before; 893 const bool forks = argc > 0; 894 895 if (evlist == NULL) { 896 fprintf(trace->output, "Not enough memory to run!\n"); 897 goto out; 898 } 899 900 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) || 901 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) { 902 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n"); 903 goto out_delete_evlist; 904 } 905 906 if (trace->sched && 907 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 908 trace__sched_stat_runtime)) { 909 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n"); 910 goto out_delete_evlist; 911 } 912 913 err = perf_evlist__create_maps(evlist, &trace->opts.target); 914 if (err < 0) { 915 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 916 goto out_delete_evlist; 917 } 918 919 err = trace__symbols_init(trace, evlist); 920 if (err < 0) { 921 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 922 goto out_delete_maps; 923 } 924 925 perf_evlist__config(evlist, &trace->opts); 926 927 signal(SIGCHLD, sig_handler); 928 signal(SIGINT, sig_handler); 929 930 if (forks) { 931 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 932 argv, false, false); 933 if (err < 0) { 934 fprintf(trace->output, "Couldn't run the workload!\n"); 935 goto out_delete_maps; 936 } 937 } 938 939 err = perf_evlist__open(evlist); 940 if (err < 0) { 941 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno)); 942 goto out_delete_maps; 943 } 944 945 err = perf_evlist__mmap(evlist, UINT_MAX, false); 946 if (err < 0) { 947 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno)); 948 goto out_close_evlist; 949 } 950 951 perf_evlist__enable(evlist); 952 953 if (forks) 954 perf_evlist__start_workload(evlist); 955 956 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; 957 again: 958 before = trace->nr_events; 959 960 for (i = 0; i < evlist->nr_mmaps; i++) { 961 union perf_event *event; 962 963 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 964 const u32 type = event->header.type; 965 tracepoint_handler handler; 966 struct perf_sample sample; 967 968 ++trace->nr_events; 969 970 err = perf_evlist__parse_sample(evlist, event, &sample); 971 if (err) { 972 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 973 continue; 974 } 975 976 if (trace->base_time == 0) 977 trace->base_time = sample.time; 978 979 if (type != PERF_RECORD_SAMPLE) { 980 trace__process_event(trace, &trace->host, event); 981 continue; 982 } 983 984 evsel = perf_evlist__id2evsel(evlist, sample.id); 985 if (evsel == NULL) { 986 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); 987 continue; 988 } 989 990 if (sample.raw_data == NULL) { 991 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 992 perf_evsel__name(evsel), sample.tid, 993 sample.cpu, sample.raw_size); 994 continue; 995 } 996 997 handler = evsel->handler.func; 998 handler(trace, evsel, &sample); 999 1000 if (done) 1001 goto out_unmap_evlist; 1002 } 1003 } 1004 1005 if (trace->nr_events == before) { 1006 if (done) 1007 goto out_unmap_evlist; 1008 1009 poll(evlist->pollfd, evlist->nr_fds, -1); 1010 } 1011 1012 if (done) 1013 perf_evlist__disable(evlist); 1014 1015 goto again; 1016 1017 out_unmap_evlist: 1018 perf_evlist__munmap(evlist); 1019 out_close_evlist: 1020 perf_evlist__close(evlist); 1021 out_delete_maps: 1022 perf_evlist__delete_maps(evlist); 1023 out_delete_evlist: 1024 perf_evlist__delete(evlist); 1025 out: 1026 return err; 1027 } 1028 1029 static int trace__replay(struct trace *trace) 1030 { 1031 const struct perf_evsel_str_handler handlers[] = { 1032 { "raw_syscalls:sys_enter", trace__sys_enter, }, 1033 { "raw_syscalls:sys_exit", trace__sys_exit, }, 1034 }; 1035 1036 struct perf_session *session; 1037 int err = -1; 1038 1039 trace->tool.sample = trace__process_sample; 1040 trace->tool.mmap = perf_event__process_mmap; 1041 trace->tool.comm = perf_event__process_comm; 1042 trace->tool.exit = perf_event__process_exit; 1043 trace->tool.fork = perf_event__process_fork; 1044 trace->tool.attr = perf_event__process_attr; 1045 trace->tool.tracing_data = perf_event__process_tracing_data; 1046 trace->tool.build_id = perf_event__process_build_id; 1047 1048 trace->tool.ordered_samples = true; 1049 trace->tool.ordering_requires_timestamps = true; 1050 1051 /* add tid to output */ 1052 trace->multiple_threads = true; 1053 1054 if (symbol__init() < 0) 1055 return -1; 1056 1057 session = perf_session__new(input_name, O_RDONLY, 0, false, 1058 &trace->tool); 1059 if (session == NULL) 1060 return -ENOMEM; 1061 1062 err = perf_session__set_tracepoints_handlers(session, handlers); 1063 if (err) 1064 goto out; 1065 1066 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) { 1067 pr_err("Data file does not have raw_syscalls:sys_enter events\n"); 1068 goto out; 1069 } 1070 1071 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) { 1072 pr_err("Data file does not have raw_syscalls:sys_exit events\n"); 1073 goto out; 1074 } 1075 1076 err = parse_target_str(trace); 1077 if (err != 0) 1078 goto out; 1079 1080 setup_pager(); 1081 1082 err = perf_session__process_events(session, &trace->tool); 1083 if (err) 1084 pr_err("Failed to process events, error %d", err); 1085 1086 out: 1087 perf_session__delete(session); 1088 1089 return err; 1090 } 1091 1092 static size_t trace__fprintf_threads_header(FILE *fp) 1093 { 1094 size_t printed; 1095 1096 printed = fprintf(fp, "\n _____________________________________________________________________\n"); 1097 printed += fprintf(fp," __) Summary of events (__\n\n"); 1098 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n"); 1099 printed += fprintf(fp," _____________________________________________________________________\n\n"); 1100 1101 return printed; 1102 } 1103 1104 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 1105 { 1106 size_t printed = trace__fprintf_threads_header(fp); 1107 struct rb_node *nd; 1108 1109 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) { 1110 struct thread *thread = rb_entry(nd, struct thread, rb_node); 1111 struct thread_trace *ttrace = thread->priv; 1112 const char *color; 1113 double ratio; 1114 1115 if (ttrace == NULL) 1116 continue; 1117 1118 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 1119 1120 color = PERF_COLOR_NORMAL; 1121 if (ratio > 50.0) 1122 color = PERF_COLOR_RED; 1123 else if (ratio > 25.0) 1124 color = PERF_COLOR_GREEN; 1125 else if (ratio > 5.0) 1126 color = PERF_COLOR_YELLOW; 1127 1128 printed += color_fprintf(fp, color, "%20s", thread->comm); 1129 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events); 1130 printed += color_fprintf(fp, color, "%5.1f%%", ratio); 1131 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms); 1132 } 1133 1134 return printed; 1135 } 1136 1137 static int trace__set_duration(const struct option *opt, const char *str, 1138 int unset __maybe_unused) 1139 { 1140 struct trace *trace = opt->value; 1141 1142 trace->duration_filter = atof(str); 1143 return 0; 1144 } 1145 1146 static int trace__open_output(struct trace *trace, const char *filename) 1147 { 1148 struct stat st; 1149 1150 if (!stat(filename, &st) && st.st_size) { 1151 char oldname[PATH_MAX]; 1152 1153 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 1154 unlink(oldname); 1155 rename(filename, oldname); 1156 } 1157 1158 trace->output = fopen(filename, "w"); 1159 1160 return trace->output == NULL ? -errno : 0; 1161 } 1162 1163 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) 1164 { 1165 const char * const trace_usage[] = { 1166 "perf trace [<options>] [<command>]", 1167 "perf trace [<options>] -- <command> [<options>]", 1168 NULL 1169 }; 1170 struct trace trace = { 1171 .audit_machine = audit_detect_machine(), 1172 .syscalls = { 1173 . max = -1, 1174 }, 1175 .opts = { 1176 .target = { 1177 .uid = UINT_MAX, 1178 .uses_mmap = true, 1179 }, 1180 .user_freq = UINT_MAX, 1181 .user_interval = ULLONG_MAX, 1182 .no_delay = true, 1183 .mmap_pages = 1024, 1184 }, 1185 .output = stdout, 1186 }; 1187 const char *output_name = NULL; 1188 const char *ev_qualifier_str = NULL; 1189 const struct option trace_options[] = { 1190 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", 1191 "list of events to trace"), 1192 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1193 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 1194 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 1195 "trace events on existing process id"), 1196 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 1197 "trace events on existing thread id"), 1198 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 1199 "system-wide collection from all CPUs"), 1200 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 1201 "list of cpus to monitor"), 1202 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 1203 "child tasks do not inherit counters"), 1204 OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages, 1205 "number of mmap data pages"), 1206 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 1207 "user to profile"), 1208 OPT_CALLBACK(0, "duration", &trace, "float", 1209 "show only events with duration > N.M ms", 1210 trace__set_duration), 1211 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 1212 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 1213 OPT_END() 1214 }; 1215 int err; 1216 char bf[BUFSIZ]; 1217 1218 argc = parse_options(argc, argv, trace_options, trace_usage, 0); 1219 1220 if (output_name != NULL) { 1221 err = trace__open_output(&trace, output_name); 1222 if (err < 0) { 1223 perror("failed to create output file"); 1224 goto out; 1225 } 1226 } 1227 1228 if (ev_qualifier_str != NULL) { 1229 const char *s = ev_qualifier_str; 1230 1231 trace.not_ev_qualifier = *s == '!'; 1232 if (trace.not_ev_qualifier) 1233 ++s; 1234 trace.ev_qualifier = strlist__new(true, s); 1235 if (trace.ev_qualifier == NULL) { 1236 fputs("Not enough memory to parse event qualifier", 1237 trace.output); 1238 err = -ENOMEM; 1239 goto out_close; 1240 } 1241 } 1242 1243 err = perf_target__validate(&trace.opts.target); 1244 if (err) { 1245 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 1246 fprintf(trace.output, "%s", bf); 1247 goto out_close; 1248 } 1249 1250 err = perf_target__parse_uid(&trace.opts.target); 1251 if (err) { 1252 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 1253 fprintf(trace.output, "%s", bf); 1254 goto out_close; 1255 } 1256 1257 if (!argc && perf_target__none(&trace.opts.target)) 1258 trace.opts.target.system_wide = true; 1259 1260 if (input_name) 1261 err = trace__replay(&trace); 1262 else 1263 err = trace__run(&trace, argc, argv); 1264 1265 if (trace.sched && !err) 1266 trace__fprintf_thread_summary(&trace, trace.output); 1267 1268 out_close: 1269 if (output_name != NULL) 1270 fclose(trace.output); 1271 out: 1272 return err; 1273 } 1274