1 #include <traceevent/event-parse.h> 2 #include "builtin.h" 3 #include "util/color.h" 4 #include "util/debug.h" 5 #include "util/evlist.h" 6 #include "util/machine.h" 7 #include "util/session.h" 8 #include "util/thread.h" 9 #include "util/parse-options.h" 10 #include "util/strlist.h" 11 #include "util/intlist.h" 12 #include "util/thread_map.h" 13 #include "util/stat.h" 14 15 #include <libaudit.h> 16 #include <stdlib.h> 17 #include <sys/eventfd.h> 18 #include <sys/mman.h> 19 #include <linux/futex.h> 20 21 /* For older distros: */ 22 #ifndef MAP_STACK 23 # define MAP_STACK 0x20000 24 #endif 25 26 #ifndef MADV_HWPOISON 27 # define MADV_HWPOISON 100 28 #endif 29 30 #ifndef MADV_MERGEABLE 31 # define MADV_MERGEABLE 12 32 #endif 33 34 #ifndef MADV_UNMERGEABLE 35 # define MADV_UNMERGEABLE 13 36 #endif 37 38 struct tp_field { 39 int offset; 40 union { 41 u64 (*integer)(struct tp_field *field, struct perf_sample *sample); 42 void *(*pointer)(struct tp_field *field, struct perf_sample *sample); 43 }; 44 }; 45 46 #define TP_UINT_FIELD(bits) \ 47 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ 48 { \ 49 return *(u##bits *)(sample->raw_data + field->offset); \ 50 } 51 52 TP_UINT_FIELD(8); 53 TP_UINT_FIELD(16); 54 TP_UINT_FIELD(32); 55 TP_UINT_FIELD(64); 56 57 #define TP_UINT_FIELD__SWAPPED(bits) \ 58 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ 59 { \ 60 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \ 61 return bswap_##bits(value);\ 62 } 63 64 TP_UINT_FIELD__SWAPPED(16); 65 TP_UINT_FIELD__SWAPPED(32); 66 TP_UINT_FIELD__SWAPPED(64); 67 68 static int tp_field__init_uint(struct tp_field *field, 69 struct format_field *format_field, 70 bool needs_swap) 71 { 72 field->offset = format_field->offset; 73 74 switch (format_field->size) { 75 case 1: 76 field->integer = tp_field__u8; 77 break; 78 case 2: 79 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; 80 break; 81 case 4: 82 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; 83 break; 84 case 8: 85 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; 86 break; 87 default: 88 return -1; 89 } 90 91 return 0; 92 } 93 94 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) 95 { 96 return sample->raw_data + field->offset; 97 } 98 99 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) 100 { 101 field->offset = format_field->offset; 102 field->pointer = tp_field__ptr; 103 return 0; 104 } 105 106 struct syscall_tp { 107 struct tp_field id; 108 union { 109 struct tp_field args, ret; 110 }; 111 }; 112 113 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, 114 struct tp_field *field, 115 const char *name) 116 { 117 struct format_field *format_field = perf_evsel__field(evsel, name); 118 119 if (format_field == NULL) 120 return -1; 121 122 return tp_field__init_uint(field, format_field, evsel->needs_swap); 123 } 124 125 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ 126 ({ struct syscall_tp *sc = evsel->priv;\ 127 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) 128 129 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, 130 struct tp_field *field, 131 const char *name) 132 { 133 struct format_field *format_field = perf_evsel__field(evsel, name); 134 135 if (format_field == NULL) 136 return -1; 137 138 return tp_field__init_ptr(field, format_field); 139 } 140 141 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ 142 ({ struct syscall_tp *sc = evsel->priv;\ 143 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) 144 145 static void perf_evsel__delete_priv(struct perf_evsel *evsel) 146 { 147 free(evsel->priv); 148 evsel->priv = NULL; 149 perf_evsel__delete(evsel); 150 } 151 152 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, 153 void *handler, int idx) 154 { 155 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction, idx); 156 157 if (evsel) { 158 evsel->priv = malloc(sizeof(struct syscall_tp)); 159 160 if (evsel->priv == NULL) 161 goto out_delete; 162 163 if (perf_evsel__init_sc_tp_uint_field(evsel, id)) 164 goto out_delete; 165 166 evsel->handler = handler; 167 } 168 169 return evsel; 170 171 out_delete: 172 perf_evsel__delete_priv(evsel); 173 return NULL; 174 } 175 176 #define perf_evsel__sc_tp_uint(evsel, name, sample) \ 177 ({ struct syscall_tp *fields = evsel->priv; \ 178 fields->name.integer(&fields->name, sample); }) 179 180 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ 181 ({ struct syscall_tp *fields = evsel->priv; \ 182 fields->name.pointer(&fields->name, sample); }) 183 184 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist, 185 void *sys_enter_handler, 186 void *sys_exit_handler) 187 { 188 int ret = -1; 189 int idx = evlist->nr_entries; 190 struct perf_evsel *sys_enter, *sys_exit; 191 192 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler, idx++); 193 if (sys_enter == NULL) 194 goto out; 195 196 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) 197 goto out_delete_sys_enter; 198 199 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler, idx++); 200 if (sys_exit == NULL) 201 goto out_delete_sys_enter; 202 203 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) 204 goto out_delete_sys_exit; 205 206 perf_evlist__add(evlist, sys_enter); 207 perf_evlist__add(evlist, sys_exit); 208 209 ret = 0; 210 out: 211 return ret; 212 213 out_delete_sys_exit: 214 perf_evsel__delete_priv(sys_exit); 215 out_delete_sys_enter: 216 perf_evsel__delete_priv(sys_enter); 217 goto out; 218 } 219 220 221 struct syscall_arg { 222 unsigned long val; 223 struct thread *thread; 224 struct trace *trace; 225 void *parm; 226 u8 idx; 227 u8 mask; 228 }; 229 230 struct strarray { 231 int offset; 232 int nr_entries; 233 const char **entries; 234 }; 235 236 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \ 237 .nr_entries = ARRAY_SIZE(array), \ 238 .entries = array, \ 239 } 240 241 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \ 242 .offset = off, \ 243 .nr_entries = ARRAY_SIZE(array), \ 244 .entries = array, \ 245 } 246 247 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, 248 const char *intfmt, 249 struct syscall_arg *arg) 250 { 251 struct strarray *sa = arg->parm; 252 int idx = arg->val - sa->offset; 253 254 if (idx < 0 || idx >= sa->nr_entries) 255 return scnprintf(bf, size, intfmt, arg->val); 256 257 return scnprintf(bf, size, "%s", sa->entries[idx]); 258 } 259 260 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, 261 struct syscall_arg *arg) 262 { 263 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg); 264 } 265 266 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 267 268 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size, 269 struct syscall_arg *arg) 270 { 271 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg); 272 } 273 274 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray 275 276 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 277 struct syscall_arg *arg); 278 279 #define SCA_FD syscall_arg__scnprintf_fd 280 281 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, 282 struct syscall_arg *arg) 283 { 284 int fd = arg->val; 285 286 if (fd == AT_FDCWD) 287 return scnprintf(bf, size, "CWD"); 288 289 return syscall_arg__scnprintf_fd(bf, size, arg); 290 } 291 292 #define SCA_FDAT syscall_arg__scnprintf_fd_at 293 294 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 295 struct syscall_arg *arg); 296 297 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd 298 299 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, 300 struct syscall_arg *arg) 301 { 302 return scnprintf(bf, size, "%#lx", arg->val); 303 } 304 305 #define SCA_HEX syscall_arg__scnprintf_hex 306 307 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, 308 struct syscall_arg *arg) 309 { 310 int printed = 0, prot = arg->val; 311 312 if (prot == PROT_NONE) 313 return scnprintf(bf, size, "NONE"); 314 #define P_MMAP_PROT(n) \ 315 if (prot & PROT_##n) { \ 316 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 317 prot &= ~PROT_##n; \ 318 } 319 320 P_MMAP_PROT(EXEC); 321 P_MMAP_PROT(READ); 322 P_MMAP_PROT(WRITE); 323 #ifdef PROT_SEM 324 P_MMAP_PROT(SEM); 325 #endif 326 P_MMAP_PROT(GROWSDOWN); 327 P_MMAP_PROT(GROWSUP); 328 #undef P_MMAP_PROT 329 330 if (prot) 331 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); 332 333 return printed; 334 } 335 336 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot 337 338 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, 339 struct syscall_arg *arg) 340 { 341 int printed = 0, flags = arg->val; 342 343 #define P_MMAP_FLAG(n) \ 344 if (flags & MAP_##n) { \ 345 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 346 flags &= ~MAP_##n; \ 347 } 348 349 P_MMAP_FLAG(SHARED); 350 P_MMAP_FLAG(PRIVATE); 351 #ifdef MAP_32BIT 352 P_MMAP_FLAG(32BIT); 353 #endif 354 P_MMAP_FLAG(ANONYMOUS); 355 P_MMAP_FLAG(DENYWRITE); 356 P_MMAP_FLAG(EXECUTABLE); 357 P_MMAP_FLAG(FILE); 358 P_MMAP_FLAG(FIXED); 359 P_MMAP_FLAG(GROWSDOWN); 360 #ifdef MAP_HUGETLB 361 P_MMAP_FLAG(HUGETLB); 362 #endif 363 P_MMAP_FLAG(LOCKED); 364 P_MMAP_FLAG(NONBLOCK); 365 P_MMAP_FLAG(NORESERVE); 366 P_MMAP_FLAG(POPULATE); 367 P_MMAP_FLAG(STACK); 368 #ifdef MAP_UNINITIALIZED 369 P_MMAP_FLAG(UNINITIALIZED); 370 #endif 371 #undef P_MMAP_FLAG 372 373 if (flags) 374 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 375 376 return printed; 377 } 378 379 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags 380 381 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, 382 struct syscall_arg *arg) 383 { 384 int behavior = arg->val; 385 386 switch (behavior) { 387 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) 388 P_MADV_BHV(NORMAL); 389 P_MADV_BHV(RANDOM); 390 P_MADV_BHV(SEQUENTIAL); 391 P_MADV_BHV(WILLNEED); 392 P_MADV_BHV(DONTNEED); 393 P_MADV_BHV(REMOVE); 394 P_MADV_BHV(DONTFORK); 395 P_MADV_BHV(DOFORK); 396 P_MADV_BHV(HWPOISON); 397 #ifdef MADV_SOFT_OFFLINE 398 P_MADV_BHV(SOFT_OFFLINE); 399 #endif 400 P_MADV_BHV(MERGEABLE); 401 P_MADV_BHV(UNMERGEABLE); 402 #ifdef MADV_HUGEPAGE 403 P_MADV_BHV(HUGEPAGE); 404 #endif 405 #ifdef MADV_NOHUGEPAGE 406 P_MADV_BHV(NOHUGEPAGE); 407 #endif 408 #ifdef MADV_DONTDUMP 409 P_MADV_BHV(DONTDUMP); 410 #endif 411 #ifdef MADV_DODUMP 412 P_MADV_BHV(DODUMP); 413 #endif 414 #undef P_MADV_PHV 415 default: break; 416 } 417 418 return scnprintf(bf, size, "%#x", behavior); 419 } 420 421 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior 422 423 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, 424 struct syscall_arg *arg) 425 { 426 int printed = 0, op = arg->val; 427 428 if (op == 0) 429 return scnprintf(bf, size, "NONE"); 430 #define P_CMD(cmd) \ 431 if ((op & LOCK_##cmd) == LOCK_##cmd) { \ 432 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \ 433 op &= ~LOCK_##cmd; \ 434 } 435 436 P_CMD(SH); 437 P_CMD(EX); 438 P_CMD(NB); 439 P_CMD(UN); 440 P_CMD(MAND); 441 P_CMD(RW); 442 P_CMD(READ); 443 P_CMD(WRITE); 444 #undef P_OP 445 446 if (op) 447 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op); 448 449 return printed; 450 } 451 452 #define SCA_FLOCK syscall_arg__scnprintf_flock 453 454 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) 455 { 456 enum syscall_futex_args { 457 SCF_UADDR = (1 << 0), 458 SCF_OP = (1 << 1), 459 SCF_VAL = (1 << 2), 460 SCF_TIMEOUT = (1 << 3), 461 SCF_UADDR2 = (1 << 4), 462 SCF_VAL3 = (1 << 5), 463 }; 464 int op = arg->val; 465 int cmd = op & FUTEX_CMD_MASK; 466 size_t printed = 0; 467 468 switch (cmd) { 469 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); 470 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 471 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 472 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 473 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break; 474 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break; 475 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break; 476 P_FUTEX_OP(WAKE_OP); break; 477 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 478 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 479 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 480 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break; 481 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break; 482 P_FUTEX_OP(WAIT_REQUEUE_PI); break; 483 default: printed = scnprintf(bf, size, "%#x", cmd); break; 484 } 485 486 if (op & FUTEX_PRIVATE_FLAG) 487 printed += scnprintf(bf + printed, size - printed, "|PRIV"); 488 489 if (op & FUTEX_CLOCK_REALTIME) 490 printed += scnprintf(bf + printed, size - printed, "|CLKRT"); 491 492 return printed; 493 } 494 495 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op 496 497 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 498 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); 499 500 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 501 static DEFINE_STRARRAY(itimers); 502 503 static const char *whences[] = { "SET", "CUR", "END", 504 #ifdef SEEK_DATA 505 "DATA", 506 #endif 507 #ifdef SEEK_HOLE 508 "HOLE", 509 #endif 510 }; 511 static DEFINE_STRARRAY(whences); 512 513 static const char *fcntl_cmds[] = { 514 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", 515 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64", 516 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX", 517 "F_GETOWNER_UIDS", 518 }; 519 static DEFINE_STRARRAY(fcntl_cmds); 520 521 static const char *rlimit_resources[] = { 522 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", 523 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", 524 "RTTIME", 525 }; 526 static DEFINE_STRARRAY(rlimit_resources); 527 528 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; 529 static DEFINE_STRARRAY(sighow); 530 531 static const char *clockid[] = { 532 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", 533 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", 534 }; 535 static DEFINE_STRARRAY(clockid); 536 537 static const char *socket_families[] = { 538 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", 539 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", 540 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", 541 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", 542 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", 543 "ALG", "NFC", "VSOCK", 544 }; 545 static DEFINE_STRARRAY(socket_families); 546 547 #ifndef SOCK_TYPE_MASK 548 #define SOCK_TYPE_MASK 0xf 549 #endif 550 551 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, 552 struct syscall_arg *arg) 553 { 554 size_t printed; 555 int type = arg->val, 556 flags = type & ~SOCK_TYPE_MASK; 557 558 type &= SOCK_TYPE_MASK; 559 /* 560 * Can't use a strarray, MIPS may override for ABI reasons. 561 */ 562 switch (type) { 563 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break; 564 P_SK_TYPE(STREAM); 565 P_SK_TYPE(DGRAM); 566 P_SK_TYPE(RAW); 567 P_SK_TYPE(RDM); 568 P_SK_TYPE(SEQPACKET); 569 P_SK_TYPE(DCCP); 570 P_SK_TYPE(PACKET); 571 #undef P_SK_TYPE 572 default: 573 printed = scnprintf(bf, size, "%#x", type); 574 } 575 576 #define P_SK_FLAG(n) \ 577 if (flags & SOCK_##n) { \ 578 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ 579 flags &= ~SOCK_##n; \ 580 } 581 582 P_SK_FLAG(CLOEXEC); 583 P_SK_FLAG(NONBLOCK); 584 #undef P_SK_FLAG 585 586 if (flags) 587 printed += scnprintf(bf + printed, size - printed, "|%#x", flags); 588 589 return printed; 590 } 591 592 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type 593 594 #ifndef MSG_PROBE 595 #define MSG_PROBE 0x10 596 #endif 597 #ifndef MSG_WAITFORONE 598 #define MSG_WAITFORONE 0x10000 599 #endif 600 #ifndef MSG_SENDPAGE_NOTLAST 601 #define MSG_SENDPAGE_NOTLAST 0x20000 602 #endif 603 #ifndef MSG_FASTOPEN 604 #define MSG_FASTOPEN 0x20000000 605 #endif 606 607 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, 608 struct syscall_arg *arg) 609 { 610 int printed = 0, flags = arg->val; 611 612 if (flags == 0) 613 return scnprintf(bf, size, "NONE"); 614 #define P_MSG_FLAG(n) \ 615 if (flags & MSG_##n) { \ 616 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 617 flags &= ~MSG_##n; \ 618 } 619 620 P_MSG_FLAG(OOB); 621 P_MSG_FLAG(PEEK); 622 P_MSG_FLAG(DONTROUTE); 623 P_MSG_FLAG(TRYHARD); 624 P_MSG_FLAG(CTRUNC); 625 P_MSG_FLAG(PROBE); 626 P_MSG_FLAG(TRUNC); 627 P_MSG_FLAG(DONTWAIT); 628 P_MSG_FLAG(EOR); 629 P_MSG_FLAG(WAITALL); 630 P_MSG_FLAG(FIN); 631 P_MSG_FLAG(SYN); 632 P_MSG_FLAG(CONFIRM); 633 P_MSG_FLAG(RST); 634 P_MSG_FLAG(ERRQUEUE); 635 P_MSG_FLAG(NOSIGNAL); 636 P_MSG_FLAG(MORE); 637 P_MSG_FLAG(WAITFORONE); 638 P_MSG_FLAG(SENDPAGE_NOTLAST); 639 P_MSG_FLAG(FASTOPEN); 640 P_MSG_FLAG(CMSG_CLOEXEC); 641 #undef P_MSG_FLAG 642 643 if (flags) 644 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 645 646 return printed; 647 } 648 649 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags 650 651 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, 652 struct syscall_arg *arg) 653 { 654 size_t printed = 0; 655 int mode = arg->val; 656 657 if (mode == F_OK) /* 0 */ 658 return scnprintf(bf, size, "F"); 659 #define P_MODE(n) \ 660 if (mode & n##_OK) { \ 661 printed += scnprintf(bf + printed, size - printed, "%s", #n); \ 662 mode &= ~n##_OK; \ 663 } 664 665 P_MODE(R); 666 P_MODE(W); 667 P_MODE(X); 668 #undef P_MODE 669 670 if (mode) 671 printed += scnprintf(bf + printed, size - printed, "|%#x", mode); 672 673 return printed; 674 } 675 676 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode 677 678 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, 679 struct syscall_arg *arg) 680 { 681 int printed = 0, flags = arg->val; 682 683 if (!(flags & O_CREAT)) 684 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ 685 686 if (flags == 0) 687 return scnprintf(bf, size, "RDONLY"); 688 #define P_FLAG(n) \ 689 if (flags & O_##n) { \ 690 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 691 flags &= ~O_##n; \ 692 } 693 694 P_FLAG(APPEND); 695 P_FLAG(ASYNC); 696 P_FLAG(CLOEXEC); 697 P_FLAG(CREAT); 698 P_FLAG(DIRECT); 699 P_FLAG(DIRECTORY); 700 P_FLAG(EXCL); 701 P_FLAG(LARGEFILE); 702 P_FLAG(NOATIME); 703 P_FLAG(NOCTTY); 704 #ifdef O_NONBLOCK 705 P_FLAG(NONBLOCK); 706 #elif O_NDELAY 707 P_FLAG(NDELAY); 708 #endif 709 #ifdef O_PATH 710 P_FLAG(PATH); 711 #endif 712 P_FLAG(RDWR); 713 #ifdef O_DSYNC 714 if ((flags & O_SYNC) == O_SYNC) 715 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); 716 else { 717 P_FLAG(DSYNC); 718 } 719 #else 720 P_FLAG(SYNC); 721 #endif 722 P_FLAG(TRUNC); 723 P_FLAG(WRONLY); 724 #undef P_FLAG 725 726 if (flags) 727 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 728 729 return printed; 730 } 731 732 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags 733 734 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, 735 struct syscall_arg *arg) 736 { 737 int printed = 0, flags = arg->val; 738 739 if (flags == 0) 740 return scnprintf(bf, size, "NONE"); 741 #define P_FLAG(n) \ 742 if (flags & EFD_##n) { \ 743 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 744 flags &= ~EFD_##n; \ 745 } 746 747 P_FLAG(SEMAPHORE); 748 P_FLAG(CLOEXEC); 749 P_FLAG(NONBLOCK); 750 #undef P_FLAG 751 752 if (flags) 753 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 754 755 return printed; 756 } 757 758 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags 759 760 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, 761 struct syscall_arg *arg) 762 { 763 int printed = 0, flags = arg->val; 764 765 #define P_FLAG(n) \ 766 if (flags & O_##n) { \ 767 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 768 flags &= ~O_##n; \ 769 } 770 771 P_FLAG(CLOEXEC); 772 P_FLAG(NONBLOCK); 773 #undef P_FLAG 774 775 if (flags) 776 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 777 778 return printed; 779 } 780 781 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags 782 783 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) 784 { 785 int sig = arg->val; 786 787 switch (sig) { 788 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n) 789 P_SIGNUM(HUP); 790 P_SIGNUM(INT); 791 P_SIGNUM(QUIT); 792 P_SIGNUM(ILL); 793 P_SIGNUM(TRAP); 794 P_SIGNUM(ABRT); 795 P_SIGNUM(BUS); 796 P_SIGNUM(FPE); 797 P_SIGNUM(KILL); 798 P_SIGNUM(USR1); 799 P_SIGNUM(SEGV); 800 P_SIGNUM(USR2); 801 P_SIGNUM(PIPE); 802 P_SIGNUM(ALRM); 803 P_SIGNUM(TERM); 804 P_SIGNUM(STKFLT); 805 P_SIGNUM(CHLD); 806 P_SIGNUM(CONT); 807 P_SIGNUM(STOP); 808 P_SIGNUM(TSTP); 809 P_SIGNUM(TTIN); 810 P_SIGNUM(TTOU); 811 P_SIGNUM(URG); 812 P_SIGNUM(XCPU); 813 P_SIGNUM(XFSZ); 814 P_SIGNUM(VTALRM); 815 P_SIGNUM(PROF); 816 P_SIGNUM(WINCH); 817 P_SIGNUM(IO); 818 P_SIGNUM(PWR); 819 P_SIGNUM(SYS); 820 default: break; 821 } 822 823 return scnprintf(bf, size, "%#x", sig); 824 } 825 826 #define SCA_SIGNUM syscall_arg__scnprintf_signum 827 828 #define TCGETS 0x5401 829 830 static const char *tioctls[] = { 831 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW", 832 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL", 833 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI", 834 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC", 835 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX", 836 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO", 837 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK", 838 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2", 839 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", 840 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", 841 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL", 842 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", 843 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", 844 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", 845 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE", 846 }; 847 848 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401); 849 850 #define STRARRAY(arg, name, array) \ 851 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ 852 .arg_parm = { [arg] = &strarray__##array, } 853 854 static struct syscall_fmt { 855 const char *name; 856 const char *alias; 857 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); 858 void *arg_parm[6]; 859 bool errmsg; 860 bool timeout; 861 bool hexret; 862 } syscall_fmts[] = { 863 { .name = "access", .errmsg = true, 864 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, 865 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, 866 { .name = "brk", .hexret = true, 867 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, 868 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, 869 { .name = "close", .errmsg = true, 870 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 871 { .name = "connect", .errmsg = true, }, 872 { .name = "dup", .errmsg = true, 873 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 874 { .name = "dup2", .errmsg = true, 875 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 876 { .name = "dup3", .errmsg = true, 877 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 878 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, 879 { .name = "eventfd2", .errmsg = true, 880 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, 881 { .name = "faccessat", .errmsg = true, 882 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 883 { .name = "fadvise64", .errmsg = true, 884 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 885 { .name = "fallocate", .errmsg = true, 886 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 887 { .name = "fchdir", .errmsg = true, 888 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 889 { .name = "fchmod", .errmsg = true, 890 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 891 { .name = "fchmodat", .errmsg = true, 892 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 893 { .name = "fchown", .errmsg = true, 894 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 895 { .name = "fchownat", .errmsg = true, 896 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 897 { .name = "fcntl", .errmsg = true, 898 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 899 [1] = SCA_STRARRAY, /* cmd */ }, 900 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, 901 { .name = "fdatasync", .errmsg = true, 902 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 903 { .name = "flock", .errmsg = true, 904 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 905 [1] = SCA_FLOCK, /* cmd */ }, }, 906 { .name = "fsetxattr", .errmsg = true, 907 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 908 { .name = "fstat", .errmsg = true, .alias = "newfstat", 909 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 910 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", 911 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 912 { .name = "fstatfs", .errmsg = true, 913 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 914 { .name = "fsync", .errmsg = true, 915 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 916 { .name = "ftruncate", .errmsg = true, 917 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 918 { .name = "futex", .errmsg = true, 919 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, 920 { .name = "futimesat", .errmsg = true, 921 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 922 { .name = "getdents", .errmsg = true, 923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 924 { .name = "getdents64", .errmsg = true, 925 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 926 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 927 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 928 { .name = "ioctl", .errmsg = true, 929 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 930 [1] = SCA_STRHEXARRAY, /* cmd */ 931 [2] = SCA_HEX, /* arg */ }, 932 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, }, 933 { .name = "kill", .errmsg = true, 934 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 935 { .name = "linkat", .errmsg = true, 936 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 937 { .name = "lseek", .errmsg = true, 938 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 939 [2] = SCA_STRARRAY, /* whence */ }, 940 .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, 941 { .name = "lstat", .errmsg = true, .alias = "newlstat", }, 942 { .name = "madvise", .errmsg = true, 943 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 944 [2] = SCA_MADV_BHV, /* behavior */ }, }, 945 { .name = "mkdirat", .errmsg = true, 946 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 947 { .name = "mknodat", .errmsg = true, 948 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 949 { .name = "mlock", .errmsg = true, 950 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 951 { .name = "mlockall", .errmsg = true, 952 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 953 { .name = "mmap", .hexret = true, 954 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 955 [2] = SCA_MMAP_PROT, /* prot */ 956 [3] = SCA_MMAP_FLAGS, /* flags */ }, }, 957 { .name = "mprotect", .errmsg = true, 958 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 959 [2] = SCA_MMAP_PROT, /* prot */ }, }, 960 { .name = "mremap", .hexret = true, 961 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 962 [4] = SCA_HEX, /* new_addr */ }, }, 963 { .name = "munlock", .errmsg = true, 964 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 965 { .name = "munmap", .errmsg = true, 966 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 967 { .name = "name_to_handle_at", .errmsg = true, 968 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 969 { .name = "newfstatat", .errmsg = true, 970 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 971 { .name = "open", .errmsg = true, 972 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, 973 { .name = "open_by_handle_at", .errmsg = true, 974 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 975 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 976 { .name = "openat", .errmsg = true, 977 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 978 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 979 { .name = "pipe2", .errmsg = true, 980 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, 981 { .name = "poll", .errmsg = true, .timeout = true, }, 982 { .name = "ppoll", .errmsg = true, .timeout = true, }, 983 { .name = "pread", .errmsg = true, .alias = "pread64", 984 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 985 { .name = "preadv", .errmsg = true, .alias = "pread", 986 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 987 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, 988 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", 989 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 990 { .name = "pwritev", .errmsg = true, 991 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 992 { .name = "read", .errmsg = true, 993 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 994 { .name = "readlinkat", .errmsg = true, 995 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 996 { .name = "readv", .errmsg = true, 997 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 998 { .name = "recvfrom", .errmsg = true, 999 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1000 { .name = "recvmmsg", .errmsg = true, 1001 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1002 { .name = "recvmsg", .errmsg = true, 1003 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1004 { .name = "renameat", .errmsg = true, 1005 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1006 { .name = "rt_sigaction", .errmsg = true, 1007 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, 1008 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, 1009 { .name = "rt_sigqueueinfo", .errmsg = true, 1010 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1011 { .name = "rt_tgsigqueueinfo", .errmsg = true, 1012 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1013 { .name = "select", .errmsg = true, .timeout = true, }, 1014 { .name = "sendmmsg", .errmsg = true, 1015 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1016 { .name = "sendmsg", .errmsg = true, 1017 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1018 { .name = "sendto", .errmsg = true, 1019 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1020 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 1021 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 1022 { .name = "shutdown", .errmsg = true, 1023 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1024 { .name = "socket", .errmsg = true, 1025 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1026 [1] = SCA_SK_TYPE, /* type */ }, 1027 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1028 { .name = "socketpair", .errmsg = true, 1029 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1030 [1] = SCA_SK_TYPE, /* type */ }, 1031 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1032 { .name = "stat", .errmsg = true, .alias = "newstat", }, 1033 { .name = "symlinkat", .errmsg = true, 1034 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1035 { .name = "tgkill", .errmsg = true, 1036 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1037 { .name = "tkill", .errmsg = true, 1038 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1039 { .name = "uname", .errmsg = true, .alias = "newuname", }, 1040 { .name = "unlinkat", .errmsg = true, 1041 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1042 { .name = "utimensat", .errmsg = true, 1043 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, 1044 { .name = "write", .errmsg = true, 1045 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1046 { .name = "writev", .errmsg = true, 1047 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1048 }; 1049 1050 static int syscall_fmt__cmp(const void *name, const void *fmtp) 1051 { 1052 const struct syscall_fmt *fmt = fmtp; 1053 return strcmp(name, fmt->name); 1054 } 1055 1056 static struct syscall_fmt *syscall_fmt__find(const char *name) 1057 { 1058 const int nmemb = ARRAY_SIZE(syscall_fmts); 1059 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 1060 } 1061 1062 struct syscall { 1063 struct event_format *tp_format; 1064 const char *name; 1065 bool filtered; 1066 struct syscall_fmt *fmt; 1067 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 1068 void **arg_parm; 1069 }; 1070 1071 static size_t fprintf_duration(unsigned long t, FILE *fp) 1072 { 1073 double duration = (double)t / NSEC_PER_MSEC; 1074 size_t printed = fprintf(fp, "("); 1075 1076 if (duration >= 1.0) 1077 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 1078 else if (duration >= 0.01) 1079 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 1080 else 1081 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 1082 return printed + fprintf(fp, "): "); 1083 } 1084 1085 struct thread_trace { 1086 u64 entry_time; 1087 u64 exit_time; 1088 bool entry_pending; 1089 unsigned long nr_events; 1090 char *entry_str; 1091 double runtime_ms; 1092 struct { 1093 int max; 1094 char **table; 1095 } paths; 1096 1097 struct intlist *syscall_stats; 1098 }; 1099 1100 static struct thread_trace *thread_trace__new(void) 1101 { 1102 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); 1103 1104 if (ttrace) 1105 ttrace->paths.max = -1; 1106 1107 ttrace->syscall_stats = intlist__new(NULL); 1108 1109 return ttrace; 1110 } 1111 1112 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 1113 { 1114 struct thread_trace *ttrace; 1115 1116 if (thread == NULL) 1117 goto fail; 1118 1119 if (thread->priv == NULL) 1120 thread->priv = thread_trace__new(); 1121 1122 if (thread->priv == NULL) 1123 goto fail; 1124 1125 ttrace = thread->priv; 1126 ++ttrace->nr_events; 1127 1128 return ttrace; 1129 fail: 1130 color_fprintf(fp, PERF_COLOR_RED, 1131 "WARNING: not enough memory, dropping samples!\n"); 1132 return NULL; 1133 } 1134 1135 struct trace { 1136 struct perf_tool tool; 1137 struct { 1138 int machine; 1139 int open_id; 1140 } audit; 1141 struct { 1142 int max; 1143 struct syscall *table; 1144 } syscalls; 1145 struct perf_record_opts opts; 1146 struct machine *host; 1147 u64 base_time; 1148 bool full_time; 1149 FILE *output; 1150 unsigned long nr_events; 1151 struct strlist *ev_qualifier; 1152 bool not_ev_qualifier; 1153 bool live; 1154 const char *last_vfs_getname; 1155 struct intlist *tid_list; 1156 struct intlist *pid_list; 1157 bool sched; 1158 bool multiple_threads; 1159 bool summary; 1160 bool show_comm; 1161 bool show_tool_stats; 1162 double duration_filter; 1163 double runtime_ms; 1164 struct { 1165 u64 vfs_getname, proc_getname; 1166 } stats; 1167 }; 1168 1169 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) 1170 { 1171 struct thread_trace *ttrace = thread->priv; 1172 1173 if (fd > ttrace->paths.max) { 1174 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); 1175 1176 if (npath == NULL) 1177 return -1; 1178 1179 if (ttrace->paths.max != -1) { 1180 memset(npath + ttrace->paths.max + 1, 0, 1181 (fd - ttrace->paths.max) * sizeof(char *)); 1182 } else { 1183 memset(npath, 0, (fd + 1) * sizeof(char *)); 1184 } 1185 1186 ttrace->paths.table = npath; 1187 ttrace->paths.max = fd; 1188 } 1189 1190 ttrace->paths.table[fd] = strdup(pathname); 1191 1192 return ttrace->paths.table[fd] != NULL ? 0 : -1; 1193 } 1194 1195 static int thread__read_fd_path(struct thread *thread, int fd) 1196 { 1197 char linkname[PATH_MAX], pathname[PATH_MAX]; 1198 struct stat st; 1199 int ret; 1200 1201 if (thread->pid_ == thread->tid) { 1202 scnprintf(linkname, sizeof(linkname), 1203 "/proc/%d/fd/%d", thread->pid_, fd); 1204 } else { 1205 scnprintf(linkname, sizeof(linkname), 1206 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); 1207 } 1208 1209 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) 1210 return -1; 1211 1212 ret = readlink(linkname, pathname, sizeof(pathname)); 1213 1214 if (ret < 0 || ret > st.st_size) 1215 return -1; 1216 1217 pathname[ret] = '\0'; 1218 return trace__set_fd_pathname(thread, fd, pathname); 1219 } 1220 1221 static const char *thread__fd_path(struct thread *thread, int fd, 1222 struct trace *trace) 1223 { 1224 struct thread_trace *ttrace = thread->priv; 1225 1226 if (ttrace == NULL) 1227 return NULL; 1228 1229 if (fd < 0) 1230 return NULL; 1231 1232 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) 1233 if (!trace->live) 1234 return NULL; 1235 ++trace->stats.proc_getname; 1236 if (thread__read_fd_path(thread, fd)) { 1237 return NULL; 1238 } 1239 1240 return ttrace->paths.table[fd]; 1241 } 1242 1243 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 1244 struct syscall_arg *arg) 1245 { 1246 int fd = arg->val; 1247 size_t printed = scnprintf(bf, size, "%d", fd); 1248 const char *path = thread__fd_path(arg->thread, fd, arg->trace); 1249 1250 if (path) 1251 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 1252 1253 return printed; 1254 } 1255 1256 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 1257 struct syscall_arg *arg) 1258 { 1259 int fd = arg->val; 1260 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); 1261 struct thread_trace *ttrace = arg->thread->priv; 1262 1263 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) { 1264 free(ttrace->paths.table[fd]); 1265 ttrace->paths.table[fd] = NULL; 1266 } 1267 1268 return printed; 1269 } 1270 1271 static bool trace__filter_duration(struct trace *trace, double t) 1272 { 1273 return t < (trace->duration_filter * NSEC_PER_MSEC); 1274 } 1275 1276 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1277 { 1278 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 1279 1280 return fprintf(fp, "%10.3f ", ts); 1281 } 1282 1283 static bool done = false; 1284 static bool interrupted = false; 1285 1286 static void sig_handler(int sig) 1287 { 1288 done = true; 1289 interrupted = sig == SIGINT; 1290 } 1291 1292 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 1293 u64 duration, u64 tstamp, FILE *fp) 1294 { 1295 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 1296 printed += fprintf_duration(duration, fp); 1297 1298 if (trace->multiple_threads) { 1299 if (trace->show_comm) 1300 printed += fprintf(fp, "%.14s/", thread__comm_str(thread)); 1301 printed += fprintf(fp, "%d ", thread->tid); 1302 } 1303 1304 return printed; 1305 } 1306 1307 static int trace__process_event(struct trace *trace, struct machine *machine, 1308 union perf_event *event, struct perf_sample *sample) 1309 { 1310 int ret = 0; 1311 1312 switch (event->header.type) { 1313 case PERF_RECORD_LOST: 1314 color_fprintf(trace->output, PERF_COLOR_RED, 1315 "LOST %" PRIu64 " events!\n", event->lost.lost); 1316 ret = machine__process_lost_event(machine, event, sample); 1317 default: 1318 ret = machine__process_event(machine, event, sample); 1319 break; 1320 } 1321 1322 return ret; 1323 } 1324 1325 static int trace__tool_process(struct perf_tool *tool, 1326 union perf_event *event, 1327 struct perf_sample *sample, 1328 struct machine *machine) 1329 { 1330 struct trace *trace = container_of(tool, struct trace, tool); 1331 return trace__process_event(trace, machine, event, sample); 1332 } 1333 1334 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 1335 { 1336 int err = symbol__init(); 1337 1338 if (err) 1339 return err; 1340 1341 trace->host = machine__new_host(); 1342 if (trace->host == NULL) 1343 return -ENOMEM; 1344 1345 if (perf_target__has_task(&trace->opts.target)) { 1346 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads, 1347 trace__tool_process, 1348 trace->host); 1349 } else { 1350 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process, 1351 trace->host); 1352 } 1353 1354 if (err) 1355 symbol__exit(); 1356 1357 return err; 1358 } 1359 1360 static int syscall__set_arg_fmts(struct syscall *sc) 1361 { 1362 struct format_field *field; 1363 int idx = 0; 1364 1365 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *)); 1366 if (sc->arg_scnprintf == NULL) 1367 return -1; 1368 1369 if (sc->fmt) 1370 sc->arg_parm = sc->fmt->arg_parm; 1371 1372 for (field = sc->tp_format->format.fields->next; field; field = field->next) { 1373 if (sc->fmt && sc->fmt->arg_scnprintf[idx]) 1374 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; 1375 else if (field->flags & FIELD_IS_POINTER) 1376 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; 1377 ++idx; 1378 } 1379 1380 return 0; 1381 } 1382 1383 static int trace__read_syscall_info(struct trace *trace, int id) 1384 { 1385 char tp_name[128]; 1386 struct syscall *sc; 1387 const char *name = audit_syscall_to_name(id, trace->audit.machine); 1388 1389 if (name == NULL) 1390 return -1; 1391 1392 if (id > trace->syscalls.max) { 1393 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1394 1395 if (nsyscalls == NULL) 1396 return -1; 1397 1398 if (trace->syscalls.max != -1) { 1399 memset(nsyscalls + trace->syscalls.max + 1, 0, 1400 (id - trace->syscalls.max) * sizeof(*sc)); 1401 } else { 1402 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 1403 } 1404 1405 trace->syscalls.table = nsyscalls; 1406 trace->syscalls.max = id; 1407 } 1408 1409 sc = trace->syscalls.table + id; 1410 sc->name = name; 1411 1412 if (trace->ev_qualifier) { 1413 bool in = strlist__find(trace->ev_qualifier, name) != NULL; 1414 1415 if (!(in ^ trace->not_ev_qualifier)) { 1416 sc->filtered = true; 1417 /* 1418 * No need to do read tracepoint information since this will be 1419 * filtered out. 1420 */ 1421 return 0; 1422 } 1423 } 1424 1425 sc->fmt = syscall_fmt__find(sc->name); 1426 1427 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1428 sc->tp_format = event_format__new("syscalls", tp_name); 1429 1430 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { 1431 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 1432 sc->tp_format = event_format__new("syscalls", tp_name); 1433 } 1434 1435 if (sc->tp_format == NULL) 1436 return -1; 1437 1438 return syscall__set_arg_fmts(sc); 1439 } 1440 1441 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 1442 unsigned long *args, struct trace *trace, 1443 struct thread *thread) 1444 { 1445 size_t printed = 0; 1446 1447 if (sc->tp_format != NULL) { 1448 struct format_field *field; 1449 u8 bit = 1; 1450 struct syscall_arg arg = { 1451 .idx = 0, 1452 .mask = 0, 1453 .trace = trace, 1454 .thread = thread, 1455 }; 1456 1457 for (field = sc->tp_format->format.fields->next; field; 1458 field = field->next, ++arg.idx, bit <<= 1) { 1459 if (arg.mask & bit) 1460 continue; 1461 /* 1462 * Suppress this argument if its value is zero and 1463 * and we don't have a string associated in an 1464 * strarray for it. 1465 */ 1466 if (args[arg.idx] == 0 && 1467 !(sc->arg_scnprintf && 1468 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && 1469 sc->arg_parm[arg.idx])) 1470 continue; 1471 1472 printed += scnprintf(bf + printed, size - printed, 1473 "%s%s: ", printed ? ", " : "", field->name); 1474 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { 1475 arg.val = args[arg.idx]; 1476 if (sc->arg_parm) 1477 arg.parm = sc->arg_parm[arg.idx]; 1478 printed += sc->arg_scnprintf[arg.idx](bf + printed, 1479 size - printed, &arg); 1480 } else { 1481 printed += scnprintf(bf + printed, size - printed, 1482 "%ld", args[arg.idx]); 1483 } 1484 } 1485 } else { 1486 int i = 0; 1487 1488 while (i < 6) { 1489 printed += scnprintf(bf + printed, size - printed, 1490 "%sarg%d: %ld", 1491 printed ? ", " : "", i, args[i]); 1492 ++i; 1493 } 1494 } 1495 1496 return printed; 1497 } 1498 1499 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 1500 struct perf_sample *sample); 1501 1502 static struct syscall *trace__syscall_info(struct trace *trace, 1503 struct perf_evsel *evsel, int id) 1504 { 1505 1506 if (id < 0) { 1507 1508 /* 1509 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 1510 * before that, leaving at a higher verbosity level till that is 1511 * explained. Reproduced with plain ftrace with: 1512 * 1513 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 1514 * grep "NR -1 " /t/trace_pipe 1515 * 1516 * After generating some load on the machine. 1517 */ 1518 if (verbose > 1) { 1519 static u64 n; 1520 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 1521 id, perf_evsel__name(evsel), ++n); 1522 } 1523 return NULL; 1524 } 1525 1526 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 1527 trace__read_syscall_info(trace, id)) 1528 goto out_cant_read; 1529 1530 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 1531 goto out_cant_read; 1532 1533 return &trace->syscalls.table[id]; 1534 1535 out_cant_read: 1536 if (verbose) { 1537 fprintf(trace->output, "Problems reading syscall %d", id); 1538 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 1539 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 1540 fputs(" information\n", trace->output); 1541 } 1542 return NULL; 1543 } 1544 1545 static void thread__update_stats(struct thread_trace *ttrace, 1546 int id, struct perf_sample *sample) 1547 { 1548 struct int_node *inode; 1549 struct stats *stats; 1550 u64 duration = 0; 1551 1552 inode = intlist__findnew(ttrace->syscall_stats, id); 1553 if (inode == NULL) 1554 return; 1555 1556 stats = inode->priv; 1557 if (stats == NULL) { 1558 stats = malloc(sizeof(struct stats)); 1559 if (stats == NULL) 1560 return; 1561 init_stats(stats); 1562 inode->priv = stats; 1563 } 1564 1565 if (ttrace->entry_time && sample->time > ttrace->entry_time) 1566 duration = sample->time - ttrace->entry_time; 1567 1568 update_stats(stats, duration); 1569 } 1570 1571 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 1572 struct perf_sample *sample) 1573 { 1574 char *msg; 1575 void *args; 1576 size_t printed = 0; 1577 struct thread *thread; 1578 int id = perf_evsel__sc_tp_uint(evsel, id, sample); 1579 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1580 struct thread_trace *ttrace; 1581 1582 if (sc == NULL) 1583 return -1; 1584 1585 if (sc->filtered) 1586 return 0; 1587 1588 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1589 ttrace = thread__trace(thread, trace->output); 1590 if (ttrace == NULL) 1591 return -1; 1592 1593 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1594 ttrace = thread->priv; 1595 1596 if (ttrace->entry_str == NULL) { 1597 ttrace->entry_str = malloc(1024); 1598 if (!ttrace->entry_str) 1599 return -1; 1600 } 1601 1602 ttrace->entry_time = sample->time; 1603 msg = ttrace->entry_str; 1604 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); 1605 1606 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, 1607 args, trace, thread); 1608 1609 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) { 1610 if (!trace->duration_filter) { 1611 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); 1612 fprintf(trace->output, "%-70s\n", ttrace->entry_str); 1613 } 1614 } else 1615 ttrace->entry_pending = true; 1616 1617 return 0; 1618 } 1619 1620 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1621 struct perf_sample *sample) 1622 { 1623 int ret; 1624 u64 duration = 0; 1625 struct thread *thread; 1626 int id = perf_evsel__sc_tp_uint(evsel, id, sample); 1627 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1628 struct thread_trace *ttrace; 1629 1630 if (sc == NULL) 1631 return -1; 1632 1633 if (sc->filtered) 1634 return 0; 1635 1636 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1637 ttrace = thread__trace(thread, trace->output); 1638 if (ttrace == NULL) 1639 return -1; 1640 1641 if (trace->summary) 1642 thread__update_stats(ttrace, id, sample); 1643 1644 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 1645 1646 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { 1647 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); 1648 trace->last_vfs_getname = NULL; 1649 ++trace->stats.vfs_getname; 1650 } 1651 1652 ttrace = thread->priv; 1653 1654 ttrace->exit_time = sample->time; 1655 1656 if (ttrace->entry_time) { 1657 duration = sample->time - ttrace->entry_time; 1658 if (trace__filter_duration(trace, duration)) 1659 goto out; 1660 } else if (trace->duration_filter) 1661 goto out; 1662 1663 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); 1664 1665 if (ttrace->entry_pending) { 1666 fprintf(trace->output, "%-70s", ttrace->entry_str); 1667 } else { 1668 fprintf(trace->output, " ... ["); 1669 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 1670 fprintf(trace->output, "]: %s()", sc->name); 1671 } 1672 1673 if (sc->fmt == NULL) { 1674 signed_print: 1675 fprintf(trace->output, ") = %d", ret); 1676 } else if (ret < 0 && sc->fmt->errmsg) { 1677 char bf[256]; 1678 const char *emsg = strerror_r(-ret, bf, sizeof(bf)), 1679 *e = audit_errno_to_name(-ret); 1680 1681 fprintf(trace->output, ") = -1 %s %s", e, emsg); 1682 } else if (ret == 0 && sc->fmt->timeout) 1683 fprintf(trace->output, ") = 0 Timeout"); 1684 else if (sc->fmt->hexret) 1685 fprintf(trace->output, ") = %#x", ret); 1686 else 1687 goto signed_print; 1688 1689 fputc('\n', trace->output); 1690 out: 1691 ttrace->entry_pending = false; 1692 1693 return 0; 1694 } 1695 1696 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 1697 struct perf_sample *sample) 1698 { 1699 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); 1700 return 0; 1701 } 1702 1703 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 1704 struct perf_sample *sample) 1705 { 1706 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1707 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 1708 struct thread *thread = machine__findnew_thread(trace->host, 1709 sample->pid, 1710 sample->tid); 1711 struct thread_trace *ttrace = thread__trace(thread, trace->output); 1712 1713 if (ttrace == NULL) 1714 goto out_dump; 1715 1716 ttrace->runtime_ms += runtime_ms; 1717 trace->runtime_ms += runtime_ms; 1718 return 0; 1719 1720 out_dump: 1721 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 1722 evsel->name, 1723 perf_evsel__strval(evsel, sample, "comm"), 1724 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1725 runtime, 1726 perf_evsel__intval(evsel, sample, "vruntime")); 1727 return 0; 1728 } 1729 1730 static bool skip_sample(struct trace *trace, struct perf_sample *sample) 1731 { 1732 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || 1733 (trace->tid_list && intlist__find(trace->tid_list, sample->tid))) 1734 return false; 1735 1736 if (trace->pid_list || trace->tid_list) 1737 return true; 1738 1739 return false; 1740 } 1741 1742 static int trace__process_sample(struct perf_tool *tool, 1743 union perf_event *event __maybe_unused, 1744 struct perf_sample *sample, 1745 struct perf_evsel *evsel, 1746 struct machine *machine __maybe_unused) 1747 { 1748 struct trace *trace = container_of(tool, struct trace, tool); 1749 int err = 0; 1750 1751 tracepoint_handler handler = evsel->handler; 1752 1753 if (skip_sample(trace, sample)) 1754 return 0; 1755 1756 if (!trace->full_time && trace->base_time == 0) 1757 trace->base_time = sample->time; 1758 1759 if (handler) 1760 handler(trace, evsel, sample); 1761 1762 return err; 1763 } 1764 1765 static bool 1766 perf_session__has_tp(struct perf_session *session, const char *name) 1767 { 1768 struct perf_evsel *evsel; 1769 1770 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name); 1771 1772 return evsel != NULL; 1773 } 1774 1775 static int parse_target_str(struct trace *trace) 1776 { 1777 if (trace->opts.target.pid) { 1778 trace->pid_list = intlist__new(trace->opts.target.pid); 1779 if (trace->pid_list == NULL) { 1780 pr_err("Error parsing process id string\n"); 1781 return -EINVAL; 1782 } 1783 } 1784 1785 if (trace->opts.target.tid) { 1786 trace->tid_list = intlist__new(trace->opts.target.tid); 1787 if (trace->tid_list == NULL) { 1788 pr_err("Error parsing thread id string\n"); 1789 return -EINVAL; 1790 } 1791 } 1792 1793 return 0; 1794 } 1795 1796 static int trace__record(int argc, const char **argv) 1797 { 1798 unsigned int rec_argc, i, j; 1799 const char **rec_argv; 1800 const char * const record_args[] = { 1801 "record", 1802 "-R", 1803 "-m", "1024", 1804 "-c", "1", 1805 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit", 1806 }; 1807 1808 rec_argc = ARRAY_SIZE(record_args) + argc; 1809 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1810 1811 if (rec_argv == NULL) 1812 return -ENOMEM; 1813 1814 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1815 rec_argv[i] = record_args[i]; 1816 1817 for (j = 0; j < (unsigned int)argc; j++, i++) 1818 rec_argv[i] = argv[j]; 1819 1820 return cmd_record(i, rec_argv, NULL); 1821 } 1822 1823 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 1824 1825 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 1826 { 1827 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname", 1828 evlist->nr_entries); 1829 if (evsel == NULL) 1830 return; 1831 1832 if (perf_evsel__field(evsel, "pathname") == NULL) { 1833 perf_evsel__delete(evsel); 1834 return; 1835 } 1836 1837 evsel->handler = trace__vfs_getname; 1838 perf_evlist__add(evlist, evsel); 1839 } 1840 1841 static int trace__run(struct trace *trace, int argc, const char **argv) 1842 { 1843 struct perf_evlist *evlist = perf_evlist__new(); 1844 struct perf_evsel *evsel; 1845 int err = -1, i; 1846 unsigned long before; 1847 const bool forks = argc > 0; 1848 1849 trace->live = true; 1850 1851 if (evlist == NULL) { 1852 fprintf(trace->output, "Not enough memory to run!\n"); 1853 goto out; 1854 } 1855 1856 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) 1857 goto out_error_tp; 1858 1859 perf_evlist__add_vfs_getname(evlist); 1860 1861 if (trace->sched && 1862 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 1863 trace__sched_stat_runtime)) 1864 goto out_error_tp; 1865 1866 err = perf_evlist__create_maps(evlist, &trace->opts.target); 1867 if (err < 0) { 1868 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 1869 goto out_delete_evlist; 1870 } 1871 1872 err = trace__symbols_init(trace, evlist); 1873 if (err < 0) { 1874 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 1875 goto out_delete_maps; 1876 } 1877 1878 perf_evlist__config(evlist, &trace->opts); 1879 1880 signal(SIGCHLD, sig_handler); 1881 signal(SIGINT, sig_handler); 1882 1883 if (forks) { 1884 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 1885 argv, false, false); 1886 if (err < 0) { 1887 fprintf(trace->output, "Couldn't run the workload!\n"); 1888 goto out_delete_maps; 1889 } 1890 } 1891 1892 err = perf_evlist__open(evlist); 1893 if (err < 0) 1894 goto out_error_open; 1895 1896 err = perf_evlist__mmap(evlist, UINT_MAX, false); 1897 if (err < 0) { 1898 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno)); 1899 goto out_close_evlist; 1900 } 1901 1902 perf_evlist__enable(evlist); 1903 1904 if (forks) 1905 perf_evlist__start_workload(evlist); 1906 1907 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; 1908 again: 1909 before = trace->nr_events; 1910 1911 for (i = 0; i < evlist->nr_mmaps; i++) { 1912 union perf_event *event; 1913 1914 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 1915 const u32 type = event->header.type; 1916 tracepoint_handler handler; 1917 struct perf_sample sample; 1918 1919 ++trace->nr_events; 1920 1921 err = perf_evlist__parse_sample(evlist, event, &sample); 1922 if (err) { 1923 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 1924 goto next_event; 1925 } 1926 1927 if (!trace->full_time && trace->base_time == 0) 1928 trace->base_time = sample.time; 1929 1930 if (type != PERF_RECORD_SAMPLE) { 1931 trace__process_event(trace, trace->host, event, &sample); 1932 continue; 1933 } 1934 1935 evsel = perf_evlist__id2evsel(evlist, sample.id); 1936 if (evsel == NULL) { 1937 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); 1938 goto next_event; 1939 } 1940 1941 if (sample.raw_data == NULL) { 1942 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 1943 perf_evsel__name(evsel), sample.tid, 1944 sample.cpu, sample.raw_size); 1945 goto next_event; 1946 } 1947 1948 handler = evsel->handler; 1949 handler(trace, evsel, &sample); 1950 next_event: 1951 perf_evlist__mmap_consume(evlist, i); 1952 1953 if (interrupted) 1954 goto out_disable; 1955 } 1956 } 1957 1958 if (trace->nr_events == before) { 1959 int timeout = done ? 100 : -1; 1960 1961 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0) 1962 goto again; 1963 } else { 1964 goto again; 1965 } 1966 1967 out_disable: 1968 perf_evlist__disable(evlist); 1969 1970 if (!err) { 1971 if (trace->summary) 1972 trace__fprintf_thread_summary(trace, trace->output); 1973 1974 if (trace->show_tool_stats) { 1975 fprintf(trace->output, "Stats:\n " 1976 " vfs_getname : %" PRIu64 "\n" 1977 " proc_getname: %" PRIu64 "\n", 1978 trace->stats.vfs_getname, 1979 trace->stats.proc_getname); 1980 } 1981 } 1982 1983 perf_evlist__munmap(evlist); 1984 out_close_evlist: 1985 perf_evlist__close(evlist); 1986 out_delete_maps: 1987 perf_evlist__delete_maps(evlist); 1988 out_delete_evlist: 1989 perf_evlist__delete(evlist); 1990 out: 1991 trace->live = false; 1992 return err; 1993 { 1994 char errbuf[BUFSIZ]; 1995 1996 out_error_tp: 1997 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf)); 1998 goto out_error; 1999 2000 out_error_open: 2001 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); 2002 2003 out_error: 2004 fprintf(trace->output, "%s\n", errbuf); 2005 goto out_delete_evlist; 2006 } 2007 } 2008 2009 static int trace__replay(struct trace *trace) 2010 { 2011 const struct perf_evsel_str_handler handlers[] = { 2012 { "raw_syscalls:sys_enter", trace__sys_enter, }, 2013 { "raw_syscalls:sys_exit", trace__sys_exit, }, 2014 { "probe:vfs_getname", trace__vfs_getname, }, 2015 }; 2016 struct perf_data_file file = { 2017 .path = input_name, 2018 .mode = PERF_DATA_MODE_READ, 2019 }; 2020 struct perf_session *session; 2021 int err = -1; 2022 2023 trace->tool.sample = trace__process_sample; 2024 trace->tool.mmap = perf_event__process_mmap; 2025 trace->tool.mmap2 = perf_event__process_mmap2; 2026 trace->tool.comm = perf_event__process_comm; 2027 trace->tool.exit = perf_event__process_exit; 2028 trace->tool.fork = perf_event__process_fork; 2029 trace->tool.attr = perf_event__process_attr; 2030 trace->tool.tracing_data = perf_event__process_tracing_data; 2031 trace->tool.build_id = perf_event__process_build_id; 2032 2033 trace->tool.ordered_samples = true; 2034 trace->tool.ordering_requires_timestamps = true; 2035 2036 /* add tid to output */ 2037 trace->multiple_threads = true; 2038 2039 if (symbol__init() < 0) 2040 return -1; 2041 2042 session = perf_session__new(&file, false, &trace->tool); 2043 if (session == NULL) 2044 return -ENOMEM; 2045 2046 trace->host = &session->machines.host; 2047 2048 err = perf_session__set_tracepoints_handlers(session, handlers); 2049 if (err) 2050 goto out; 2051 2052 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) { 2053 pr_err("Data file does not have raw_syscalls:sys_enter events\n"); 2054 goto out; 2055 } 2056 2057 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) { 2058 pr_err("Data file does not have raw_syscalls:sys_exit events\n"); 2059 goto out; 2060 } 2061 2062 err = parse_target_str(trace); 2063 if (err != 0) 2064 goto out; 2065 2066 setup_pager(); 2067 2068 err = perf_session__process_events(session, &trace->tool); 2069 if (err) 2070 pr_err("Failed to process events, error %d", err); 2071 2072 else if (trace->summary) 2073 trace__fprintf_thread_summary(trace, trace->output); 2074 2075 out: 2076 perf_session__delete(session); 2077 2078 return err; 2079 } 2080 2081 static size_t trace__fprintf_threads_header(FILE *fp) 2082 { 2083 size_t printed; 2084 2085 printed = fprintf(fp, "\n _____________________________________________________________________________\n"); 2086 printed += fprintf(fp, " __) Summary of events (__\n\n"); 2087 printed += fprintf(fp, " [ task - pid ] [ events ] [ ratio ] [ runtime ]\n"); 2088 printed += fprintf(fp, " syscall count min max avg stddev\n"); 2089 printed += fprintf(fp, " msec msec msec %%\n"); 2090 printed += fprintf(fp, " _____________________________________________________________________________\n\n"); 2091 2092 return printed; 2093 } 2094 2095 static size_t thread__dump_stats(struct thread_trace *ttrace, 2096 struct trace *trace, FILE *fp) 2097 { 2098 struct stats *stats; 2099 size_t printed = 0; 2100 struct syscall *sc; 2101 struct int_node *inode = intlist__first(ttrace->syscall_stats); 2102 2103 if (inode == NULL) 2104 return 0; 2105 2106 printed += fprintf(fp, "\n"); 2107 2108 /* each int_node is a syscall */ 2109 while (inode) { 2110 stats = inode->priv; 2111 if (stats) { 2112 double min = (double)(stats->min) / NSEC_PER_MSEC; 2113 double max = (double)(stats->max) / NSEC_PER_MSEC; 2114 double avg = avg_stats(stats); 2115 double pct; 2116 u64 n = (u64) stats->n; 2117 2118 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; 2119 avg /= NSEC_PER_MSEC; 2120 2121 sc = &trace->syscalls.table[inode->i]; 2122 printed += fprintf(fp, "%24s %14s : ", "", sc->name); 2123 printed += fprintf(fp, "%5" PRIu64 " %8.3f %8.3f", 2124 n, min, max); 2125 printed += fprintf(fp, " %8.3f %6.2f\n", avg, pct); 2126 } 2127 2128 inode = intlist__next(inode); 2129 } 2130 2131 printed += fprintf(fp, "\n\n"); 2132 2133 return printed; 2134 } 2135 2136 /* struct used to pass data to per-thread function */ 2137 struct summary_data { 2138 FILE *fp; 2139 struct trace *trace; 2140 size_t printed; 2141 }; 2142 2143 static int trace__fprintf_one_thread(struct thread *thread, void *priv) 2144 { 2145 struct summary_data *data = priv; 2146 FILE *fp = data->fp; 2147 size_t printed = data->printed; 2148 struct trace *trace = data->trace; 2149 struct thread_trace *ttrace = thread->priv; 2150 const char *color; 2151 double ratio; 2152 2153 if (ttrace == NULL) 2154 return 0; 2155 2156 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 2157 2158 color = PERF_COLOR_NORMAL; 2159 if (ratio > 50.0) 2160 color = PERF_COLOR_RED; 2161 else if (ratio > 25.0) 2162 color = PERF_COLOR_GREEN; 2163 else if (ratio > 5.0) 2164 color = PERF_COLOR_YELLOW; 2165 2166 printed += color_fprintf(fp, color, "%20s", thread__comm_str(thread)); 2167 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events); 2168 printed += color_fprintf(fp, color, "%5.1f%%", ratio); 2169 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms); 2170 printed += thread__dump_stats(ttrace, trace, fp); 2171 2172 data->printed += printed; 2173 2174 return 0; 2175 } 2176 2177 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 2178 { 2179 struct summary_data data = { 2180 .fp = fp, 2181 .trace = trace 2182 }; 2183 data.printed = trace__fprintf_threads_header(fp); 2184 2185 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data); 2186 2187 return data.printed; 2188 } 2189 2190 static int trace__set_duration(const struct option *opt, const char *str, 2191 int unset __maybe_unused) 2192 { 2193 struct trace *trace = opt->value; 2194 2195 trace->duration_filter = atof(str); 2196 return 0; 2197 } 2198 2199 static int trace__open_output(struct trace *trace, const char *filename) 2200 { 2201 struct stat st; 2202 2203 if (!stat(filename, &st) && st.st_size) { 2204 char oldname[PATH_MAX]; 2205 2206 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 2207 unlink(oldname); 2208 rename(filename, oldname); 2209 } 2210 2211 trace->output = fopen(filename, "w"); 2212 2213 return trace->output == NULL ? -errno : 0; 2214 } 2215 2216 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) 2217 { 2218 const char * const trace_usage[] = { 2219 "perf trace [<options>] [<command>]", 2220 "perf trace [<options>] -- <command> [<options>]", 2221 "perf trace record [<options>] [<command>]", 2222 "perf trace record [<options>] -- <command> [<options>]", 2223 NULL 2224 }; 2225 struct trace trace = { 2226 .audit = { 2227 .machine = audit_detect_machine(), 2228 .open_id = audit_name_to_syscall("open", trace.audit.machine), 2229 }, 2230 .syscalls = { 2231 . max = -1, 2232 }, 2233 .opts = { 2234 .target = { 2235 .uid = UINT_MAX, 2236 .uses_mmap = true, 2237 }, 2238 .user_freq = UINT_MAX, 2239 .user_interval = ULLONG_MAX, 2240 .no_delay = true, 2241 .mmap_pages = 1024, 2242 }, 2243 .output = stdout, 2244 .show_comm = true, 2245 }; 2246 const char *output_name = NULL; 2247 const char *ev_qualifier_str = NULL; 2248 const struct option trace_options[] = { 2249 OPT_BOOLEAN(0, "comm", &trace.show_comm, 2250 "show the thread COMM next to its id"), 2251 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), 2252 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", 2253 "list of events to trace"), 2254 OPT_STRING('o', "output", &output_name, "file", "output file name"), 2255 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 2256 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 2257 "trace events on existing process id"), 2258 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 2259 "trace events on existing thread id"), 2260 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 2261 "system-wide collection from all CPUs"), 2262 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 2263 "list of cpus to monitor"), 2264 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 2265 "child tasks do not inherit counters"), 2266 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", 2267 "number of mmap data pages", 2268 perf_evlist__parse_mmap_pages), 2269 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 2270 "user to profile"), 2271 OPT_CALLBACK(0, "duration", &trace, "float", 2272 "show only events with duration > N.M ms", 2273 trace__set_duration), 2274 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 2275 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 2276 OPT_BOOLEAN('T', "time", &trace.full_time, 2277 "Show full timestamp, not time relative to first start"), 2278 OPT_BOOLEAN(0, "summary", &trace.summary, 2279 "Show syscall summary with statistics"), 2280 OPT_END() 2281 }; 2282 int err; 2283 char bf[BUFSIZ]; 2284 2285 if ((argc > 1) && (strcmp(argv[1], "record") == 0)) 2286 return trace__record(argc-2, &argv[2]); 2287 2288 argc = parse_options(argc, argv, trace_options, trace_usage, 0); 2289 2290 if (output_name != NULL) { 2291 err = trace__open_output(&trace, output_name); 2292 if (err < 0) { 2293 perror("failed to create output file"); 2294 goto out; 2295 } 2296 } 2297 2298 if (ev_qualifier_str != NULL) { 2299 const char *s = ev_qualifier_str; 2300 2301 trace.not_ev_qualifier = *s == '!'; 2302 if (trace.not_ev_qualifier) 2303 ++s; 2304 trace.ev_qualifier = strlist__new(true, s); 2305 if (trace.ev_qualifier == NULL) { 2306 fputs("Not enough memory to parse event qualifier", 2307 trace.output); 2308 err = -ENOMEM; 2309 goto out_close; 2310 } 2311 } 2312 2313 err = perf_target__validate(&trace.opts.target); 2314 if (err) { 2315 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2316 fprintf(trace.output, "%s", bf); 2317 goto out_close; 2318 } 2319 2320 err = perf_target__parse_uid(&trace.opts.target); 2321 if (err) { 2322 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2323 fprintf(trace.output, "%s", bf); 2324 goto out_close; 2325 } 2326 2327 if (!argc && perf_target__none(&trace.opts.target)) 2328 trace.opts.target.system_wide = true; 2329 2330 if (input_name) 2331 err = trace__replay(&trace); 2332 else 2333 err = trace__run(&trace, argc, argv); 2334 2335 out_close: 2336 if (output_name != NULL) 2337 fclose(trace.output); 2338 out: 2339 return err; 2340 } 2341