1 #include <traceevent/event-parse.h> 2 #include "builtin.h" 3 #include "util/color.h" 4 #include "util/debug.h" 5 #include "util/evlist.h" 6 #include "util/exec_cmd.h" 7 #include "util/machine.h" 8 #include "util/session.h" 9 #include "util/thread.h" 10 #include "util/parse-options.h" 11 #include "util/strlist.h" 12 #include "util/intlist.h" 13 #include "util/thread_map.h" 14 #include "util/stat.h" 15 #include "trace-event.h" 16 #include "util/parse-events.h" 17 18 #include <libaudit.h> 19 #include <stdlib.h> 20 #include <sys/mman.h> 21 #include <linux/futex.h> 22 23 /* For older distros: */ 24 #ifndef MAP_STACK 25 # define MAP_STACK 0x20000 26 #endif 27 28 #ifndef MADV_HWPOISON 29 # define MADV_HWPOISON 100 30 #endif 31 32 #ifndef MADV_MERGEABLE 33 # define MADV_MERGEABLE 12 34 #endif 35 36 #ifndef MADV_UNMERGEABLE 37 # define MADV_UNMERGEABLE 13 38 #endif 39 40 #ifndef EFD_SEMAPHORE 41 # define EFD_SEMAPHORE 1 42 #endif 43 44 #ifndef EFD_NONBLOCK 45 # define EFD_NONBLOCK 00004000 46 #endif 47 48 #ifndef EFD_CLOEXEC 49 # define EFD_CLOEXEC 02000000 50 #endif 51 52 #ifndef O_CLOEXEC 53 # define O_CLOEXEC 02000000 54 #endif 55 56 #ifndef SOCK_DCCP 57 # define SOCK_DCCP 6 58 #endif 59 60 #ifndef SOCK_CLOEXEC 61 # define SOCK_CLOEXEC 02000000 62 #endif 63 64 #ifndef SOCK_NONBLOCK 65 # define SOCK_NONBLOCK 00004000 66 #endif 67 68 #ifndef MSG_CMSG_CLOEXEC 69 # define MSG_CMSG_CLOEXEC 0x40000000 70 #endif 71 72 #ifndef PERF_FLAG_FD_NO_GROUP 73 # define PERF_FLAG_FD_NO_GROUP (1UL << 0) 74 #endif 75 76 #ifndef PERF_FLAG_FD_OUTPUT 77 # define PERF_FLAG_FD_OUTPUT (1UL << 1) 78 #endif 79 80 #ifndef PERF_FLAG_PID_CGROUP 81 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ 82 #endif 83 84 #ifndef PERF_FLAG_FD_CLOEXEC 85 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ 86 #endif 87 88 89 struct tp_field { 90 int offset; 91 union { 92 u64 (*integer)(struct tp_field *field, struct perf_sample *sample); 93 void *(*pointer)(struct tp_field *field, struct perf_sample *sample); 94 }; 95 }; 96 97 #define TP_UINT_FIELD(bits) \ 98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ 99 { \ 100 u##bits value; \ 101 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 102 return value; \ 103 } 104 105 TP_UINT_FIELD(8); 106 TP_UINT_FIELD(16); 107 TP_UINT_FIELD(32); 108 TP_UINT_FIELD(64); 109 110 #define TP_UINT_FIELD__SWAPPED(bits) \ 111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ 112 { \ 113 u##bits value; \ 114 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 115 return bswap_##bits(value);\ 116 } 117 118 TP_UINT_FIELD__SWAPPED(16); 119 TP_UINT_FIELD__SWAPPED(32); 120 TP_UINT_FIELD__SWAPPED(64); 121 122 static int tp_field__init_uint(struct tp_field *field, 123 struct format_field *format_field, 124 bool needs_swap) 125 { 126 field->offset = format_field->offset; 127 128 switch (format_field->size) { 129 case 1: 130 field->integer = tp_field__u8; 131 break; 132 case 2: 133 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; 134 break; 135 case 4: 136 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; 137 break; 138 case 8: 139 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; 140 break; 141 default: 142 return -1; 143 } 144 145 return 0; 146 } 147 148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) 149 { 150 return sample->raw_data + field->offset; 151 } 152 153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) 154 { 155 field->offset = format_field->offset; 156 field->pointer = tp_field__ptr; 157 return 0; 158 } 159 160 struct syscall_tp { 161 struct tp_field id; 162 union { 163 struct tp_field args, ret; 164 }; 165 }; 166 167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, 168 struct tp_field *field, 169 const char *name) 170 { 171 struct format_field *format_field = perf_evsel__field(evsel, name); 172 173 if (format_field == NULL) 174 return -1; 175 176 return tp_field__init_uint(field, format_field, evsel->needs_swap); 177 } 178 179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ 180 ({ struct syscall_tp *sc = evsel->priv;\ 181 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) 182 183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, 184 struct tp_field *field, 185 const char *name) 186 { 187 struct format_field *format_field = perf_evsel__field(evsel, name); 188 189 if (format_field == NULL) 190 return -1; 191 192 return tp_field__init_ptr(field, format_field); 193 } 194 195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ 196 ({ struct syscall_tp *sc = evsel->priv;\ 197 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) 198 199 static void perf_evsel__delete_priv(struct perf_evsel *evsel) 200 { 201 zfree(&evsel->priv); 202 perf_evsel__delete(evsel); 203 } 204 205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler) 206 { 207 evsel->priv = malloc(sizeof(struct syscall_tp)); 208 if (evsel->priv != NULL) { 209 if (perf_evsel__init_sc_tp_uint_field(evsel, id)) 210 goto out_delete; 211 212 evsel->handler = handler; 213 return 0; 214 } 215 216 return -ENOMEM; 217 218 out_delete: 219 zfree(&evsel->priv); 220 return -ENOENT; 221 } 222 223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler) 224 { 225 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); 226 227 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ 228 if (evsel == NULL) 229 evsel = perf_evsel__newtp("syscalls", direction); 230 231 if (evsel) { 232 if (perf_evsel__init_syscall_tp(evsel, handler)) 233 goto out_delete; 234 } 235 236 return evsel; 237 238 out_delete: 239 perf_evsel__delete_priv(evsel); 240 return NULL; 241 } 242 243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \ 244 ({ struct syscall_tp *fields = evsel->priv; \ 245 fields->name.integer(&fields->name, sample); }) 246 247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ 248 ({ struct syscall_tp *fields = evsel->priv; \ 249 fields->name.pointer(&fields->name, sample); }) 250 251 struct syscall_arg { 252 unsigned long val; 253 struct thread *thread; 254 struct trace *trace; 255 void *parm; 256 u8 idx; 257 u8 mask; 258 }; 259 260 struct strarray { 261 int offset; 262 int nr_entries; 263 const char **entries; 264 }; 265 266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \ 267 .nr_entries = ARRAY_SIZE(array), \ 268 .entries = array, \ 269 } 270 271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \ 272 .offset = off, \ 273 .nr_entries = ARRAY_SIZE(array), \ 274 .entries = array, \ 275 } 276 277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, 278 const char *intfmt, 279 struct syscall_arg *arg) 280 { 281 struct strarray *sa = arg->parm; 282 int idx = arg->val - sa->offset; 283 284 if (idx < 0 || idx >= sa->nr_entries) 285 return scnprintf(bf, size, intfmt, arg->val); 286 287 return scnprintf(bf, size, "%s", sa->entries[idx]); 288 } 289 290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, 291 struct syscall_arg *arg) 292 { 293 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg); 294 } 295 296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 297 298 #if defined(__i386__) || defined(__x86_64__) 299 /* 300 * FIXME: Make this available to all arches as soon as the ioctl beautifier 301 * gets rewritten to support all arches. 302 */ 303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size, 304 struct syscall_arg *arg) 305 { 306 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg); 307 } 308 309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray 310 #endif /* defined(__i386__) || defined(__x86_64__) */ 311 312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 313 struct syscall_arg *arg); 314 315 #define SCA_FD syscall_arg__scnprintf_fd 316 317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, 318 struct syscall_arg *arg) 319 { 320 int fd = arg->val; 321 322 if (fd == AT_FDCWD) 323 return scnprintf(bf, size, "CWD"); 324 325 return syscall_arg__scnprintf_fd(bf, size, arg); 326 } 327 328 #define SCA_FDAT syscall_arg__scnprintf_fd_at 329 330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 331 struct syscall_arg *arg); 332 333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd 334 335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, 336 struct syscall_arg *arg) 337 { 338 return scnprintf(bf, size, "%#lx", arg->val); 339 } 340 341 #define SCA_HEX syscall_arg__scnprintf_hex 342 343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size, 344 struct syscall_arg *arg) 345 { 346 return scnprintf(bf, size, "%d", arg->val); 347 } 348 349 #define SCA_INT syscall_arg__scnprintf_int 350 351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, 352 struct syscall_arg *arg) 353 { 354 int printed = 0, prot = arg->val; 355 356 if (prot == PROT_NONE) 357 return scnprintf(bf, size, "NONE"); 358 #define P_MMAP_PROT(n) \ 359 if (prot & PROT_##n) { \ 360 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 361 prot &= ~PROT_##n; \ 362 } 363 364 P_MMAP_PROT(EXEC); 365 P_MMAP_PROT(READ); 366 P_MMAP_PROT(WRITE); 367 #ifdef PROT_SEM 368 P_MMAP_PROT(SEM); 369 #endif 370 P_MMAP_PROT(GROWSDOWN); 371 P_MMAP_PROT(GROWSUP); 372 #undef P_MMAP_PROT 373 374 if (prot) 375 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); 376 377 return printed; 378 } 379 380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot 381 382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, 383 struct syscall_arg *arg) 384 { 385 int printed = 0, flags = arg->val; 386 387 #define P_MMAP_FLAG(n) \ 388 if (flags & MAP_##n) { \ 389 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 390 flags &= ~MAP_##n; \ 391 } 392 393 P_MMAP_FLAG(SHARED); 394 P_MMAP_FLAG(PRIVATE); 395 #ifdef MAP_32BIT 396 P_MMAP_FLAG(32BIT); 397 #endif 398 P_MMAP_FLAG(ANONYMOUS); 399 P_MMAP_FLAG(DENYWRITE); 400 P_MMAP_FLAG(EXECUTABLE); 401 P_MMAP_FLAG(FILE); 402 P_MMAP_FLAG(FIXED); 403 P_MMAP_FLAG(GROWSDOWN); 404 #ifdef MAP_HUGETLB 405 P_MMAP_FLAG(HUGETLB); 406 #endif 407 P_MMAP_FLAG(LOCKED); 408 P_MMAP_FLAG(NONBLOCK); 409 P_MMAP_FLAG(NORESERVE); 410 P_MMAP_FLAG(POPULATE); 411 P_MMAP_FLAG(STACK); 412 #ifdef MAP_UNINITIALIZED 413 P_MMAP_FLAG(UNINITIALIZED); 414 #endif 415 #undef P_MMAP_FLAG 416 417 if (flags) 418 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 419 420 return printed; 421 } 422 423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags 424 425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, 426 struct syscall_arg *arg) 427 { 428 int printed = 0, flags = arg->val; 429 430 #define P_MREMAP_FLAG(n) \ 431 if (flags & MREMAP_##n) { \ 432 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 433 flags &= ~MREMAP_##n; \ 434 } 435 436 P_MREMAP_FLAG(MAYMOVE); 437 #ifdef MREMAP_FIXED 438 P_MREMAP_FLAG(FIXED); 439 #endif 440 #undef P_MREMAP_FLAG 441 442 if (flags) 443 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 444 445 return printed; 446 } 447 448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags 449 450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, 451 struct syscall_arg *arg) 452 { 453 int behavior = arg->val; 454 455 switch (behavior) { 456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) 457 P_MADV_BHV(NORMAL); 458 P_MADV_BHV(RANDOM); 459 P_MADV_BHV(SEQUENTIAL); 460 P_MADV_BHV(WILLNEED); 461 P_MADV_BHV(DONTNEED); 462 P_MADV_BHV(REMOVE); 463 P_MADV_BHV(DONTFORK); 464 P_MADV_BHV(DOFORK); 465 P_MADV_BHV(HWPOISON); 466 #ifdef MADV_SOFT_OFFLINE 467 P_MADV_BHV(SOFT_OFFLINE); 468 #endif 469 P_MADV_BHV(MERGEABLE); 470 P_MADV_BHV(UNMERGEABLE); 471 #ifdef MADV_HUGEPAGE 472 P_MADV_BHV(HUGEPAGE); 473 #endif 474 #ifdef MADV_NOHUGEPAGE 475 P_MADV_BHV(NOHUGEPAGE); 476 #endif 477 #ifdef MADV_DONTDUMP 478 P_MADV_BHV(DONTDUMP); 479 #endif 480 #ifdef MADV_DODUMP 481 P_MADV_BHV(DODUMP); 482 #endif 483 #undef P_MADV_PHV 484 default: break; 485 } 486 487 return scnprintf(bf, size, "%#x", behavior); 488 } 489 490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior 491 492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, 493 struct syscall_arg *arg) 494 { 495 int printed = 0, op = arg->val; 496 497 if (op == 0) 498 return scnprintf(bf, size, "NONE"); 499 #define P_CMD(cmd) \ 500 if ((op & LOCK_##cmd) == LOCK_##cmd) { \ 501 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \ 502 op &= ~LOCK_##cmd; \ 503 } 504 505 P_CMD(SH); 506 P_CMD(EX); 507 P_CMD(NB); 508 P_CMD(UN); 509 P_CMD(MAND); 510 P_CMD(RW); 511 P_CMD(READ); 512 P_CMD(WRITE); 513 #undef P_OP 514 515 if (op) 516 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op); 517 518 return printed; 519 } 520 521 #define SCA_FLOCK syscall_arg__scnprintf_flock 522 523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) 524 { 525 enum syscall_futex_args { 526 SCF_UADDR = (1 << 0), 527 SCF_OP = (1 << 1), 528 SCF_VAL = (1 << 2), 529 SCF_TIMEOUT = (1 << 3), 530 SCF_UADDR2 = (1 << 4), 531 SCF_VAL3 = (1 << 5), 532 }; 533 int op = arg->val; 534 int cmd = op & FUTEX_CMD_MASK; 535 size_t printed = 0; 536 537 switch (cmd) { 538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); 539 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 540 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 541 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 542 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break; 543 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break; 544 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break; 545 P_FUTEX_OP(WAKE_OP); break; 546 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 547 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 548 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 549 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break; 550 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break; 551 P_FUTEX_OP(WAIT_REQUEUE_PI); break; 552 default: printed = scnprintf(bf, size, "%#x", cmd); break; 553 } 554 555 if (op & FUTEX_PRIVATE_FLAG) 556 printed += scnprintf(bf + printed, size - printed, "|PRIV"); 557 558 if (op & FUTEX_CLOCK_REALTIME) 559 printed += scnprintf(bf + printed, size - printed, "|CLKRT"); 560 561 return printed; 562 } 563 564 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op 565 566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); 568 569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 570 static DEFINE_STRARRAY(itimers); 571 572 static const char *whences[] = { "SET", "CUR", "END", 573 #ifdef SEEK_DATA 574 "DATA", 575 #endif 576 #ifdef SEEK_HOLE 577 "HOLE", 578 #endif 579 }; 580 static DEFINE_STRARRAY(whences); 581 582 static const char *fcntl_cmds[] = { 583 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", 584 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64", 585 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX", 586 "F_GETOWNER_UIDS", 587 }; 588 static DEFINE_STRARRAY(fcntl_cmds); 589 590 static const char *rlimit_resources[] = { 591 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", 592 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", 593 "RTTIME", 594 }; 595 static DEFINE_STRARRAY(rlimit_resources); 596 597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; 598 static DEFINE_STRARRAY(sighow); 599 600 static const char *clockid[] = { 601 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", 602 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", 603 }; 604 static DEFINE_STRARRAY(clockid); 605 606 static const char *socket_families[] = { 607 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", 608 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", 609 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", 610 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", 611 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", 612 "ALG", "NFC", "VSOCK", 613 }; 614 static DEFINE_STRARRAY(socket_families); 615 616 #ifndef SOCK_TYPE_MASK 617 #define SOCK_TYPE_MASK 0xf 618 #endif 619 620 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, 621 struct syscall_arg *arg) 622 { 623 size_t printed; 624 int type = arg->val, 625 flags = type & ~SOCK_TYPE_MASK; 626 627 type &= SOCK_TYPE_MASK; 628 /* 629 * Can't use a strarray, MIPS may override for ABI reasons. 630 */ 631 switch (type) { 632 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break; 633 P_SK_TYPE(STREAM); 634 P_SK_TYPE(DGRAM); 635 P_SK_TYPE(RAW); 636 P_SK_TYPE(RDM); 637 P_SK_TYPE(SEQPACKET); 638 P_SK_TYPE(DCCP); 639 P_SK_TYPE(PACKET); 640 #undef P_SK_TYPE 641 default: 642 printed = scnprintf(bf, size, "%#x", type); 643 } 644 645 #define P_SK_FLAG(n) \ 646 if (flags & SOCK_##n) { \ 647 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ 648 flags &= ~SOCK_##n; \ 649 } 650 651 P_SK_FLAG(CLOEXEC); 652 P_SK_FLAG(NONBLOCK); 653 #undef P_SK_FLAG 654 655 if (flags) 656 printed += scnprintf(bf + printed, size - printed, "|%#x", flags); 657 658 return printed; 659 } 660 661 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type 662 663 #ifndef MSG_PROBE 664 #define MSG_PROBE 0x10 665 #endif 666 #ifndef MSG_WAITFORONE 667 #define MSG_WAITFORONE 0x10000 668 #endif 669 #ifndef MSG_SENDPAGE_NOTLAST 670 #define MSG_SENDPAGE_NOTLAST 0x20000 671 #endif 672 #ifndef MSG_FASTOPEN 673 #define MSG_FASTOPEN 0x20000000 674 #endif 675 676 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, 677 struct syscall_arg *arg) 678 { 679 int printed = 0, flags = arg->val; 680 681 if (flags == 0) 682 return scnprintf(bf, size, "NONE"); 683 #define P_MSG_FLAG(n) \ 684 if (flags & MSG_##n) { \ 685 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 686 flags &= ~MSG_##n; \ 687 } 688 689 P_MSG_FLAG(OOB); 690 P_MSG_FLAG(PEEK); 691 P_MSG_FLAG(DONTROUTE); 692 P_MSG_FLAG(TRYHARD); 693 P_MSG_FLAG(CTRUNC); 694 P_MSG_FLAG(PROBE); 695 P_MSG_FLAG(TRUNC); 696 P_MSG_FLAG(DONTWAIT); 697 P_MSG_FLAG(EOR); 698 P_MSG_FLAG(WAITALL); 699 P_MSG_FLAG(FIN); 700 P_MSG_FLAG(SYN); 701 P_MSG_FLAG(CONFIRM); 702 P_MSG_FLAG(RST); 703 P_MSG_FLAG(ERRQUEUE); 704 P_MSG_FLAG(NOSIGNAL); 705 P_MSG_FLAG(MORE); 706 P_MSG_FLAG(WAITFORONE); 707 P_MSG_FLAG(SENDPAGE_NOTLAST); 708 P_MSG_FLAG(FASTOPEN); 709 P_MSG_FLAG(CMSG_CLOEXEC); 710 #undef P_MSG_FLAG 711 712 if (flags) 713 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 714 715 return printed; 716 } 717 718 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags 719 720 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, 721 struct syscall_arg *arg) 722 { 723 size_t printed = 0; 724 int mode = arg->val; 725 726 if (mode == F_OK) /* 0 */ 727 return scnprintf(bf, size, "F"); 728 #define P_MODE(n) \ 729 if (mode & n##_OK) { \ 730 printed += scnprintf(bf + printed, size - printed, "%s", #n); \ 731 mode &= ~n##_OK; \ 732 } 733 734 P_MODE(R); 735 P_MODE(W); 736 P_MODE(X); 737 #undef P_MODE 738 739 if (mode) 740 printed += scnprintf(bf + printed, size - printed, "|%#x", mode); 741 742 return printed; 743 } 744 745 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode 746 747 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, 748 struct syscall_arg *arg) 749 { 750 int printed = 0, flags = arg->val; 751 752 if (!(flags & O_CREAT)) 753 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ 754 755 if (flags == 0) 756 return scnprintf(bf, size, "RDONLY"); 757 #define P_FLAG(n) \ 758 if (flags & O_##n) { \ 759 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 760 flags &= ~O_##n; \ 761 } 762 763 P_FLAG(APPEND); 764 P_FLAG(ASYNC); 765 P_FLAG(CLOEXEC); 766 P_FLAG(CREAT); 767 P_FLAG(DIRECT); 768 P_FLAG(DIRECTORY); 769 P_FLAG(EXCL); 770 P_FLAG(LARGEFILE); 771 P_FLAG(NOATIME); 772 P_FLAG(NOCTTY); 773 #ifdef O_NONBLOCK 774 P_FLAG(NONBLOCK); 775 #elif O_NDELAY 776 P_FLAG(NDELAY); 777 #endif 778 #ifdef O_PATH 779 P_FLAG(PATH); 780 #endif 781 P_FLAG(RDWR); 782 #ifdef O_DSYNC 783 if ((flags & O_SYNC) == O_SYNC) 784 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); 785 else { 786 P_FLAG(DSYNC); 787 } 788 #else 789 P_FLAG(SYNC); 790 #endif 791 P_FLAG(TRUNC); 792 P_FLAG(WRONLY); 793 #undef P_FLAG 794 795 if (flags) 796 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 797 798 return printed; 799 } 800 801 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags 802 803 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size, 804 struct syscall_arg *arg) 805 { 806 int printed = 0, flags = arg->val; 807 808 if (flags == 0) 809 return 0; 810 811 #define P_FLAG(n) \ 812 if (flags & PERF_FLAG_##n) { \ 813 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 814 flags &= ~PERF_FLAG_##n; \ 815 } 816 817 P_FLAG(FD_NO_GROUP); 818 P_FLAG(FD_OUTPUT); 819 P_FLAG(PID_CGROUP); 820 P_FLAG(FD_CLOEXEC); 821 #undef P_FLAG 822 823 if (flags) 824 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 825 826 return printed; 827 } 828 829 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags 830 831 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, 832 struct syscall_arg *arg) 833 { 834 int printed = 0, flags = arg->val; 835 836 if (flags == 0) 837 return scnprintf(bf, size, "NONE"); 838 #define P_FLAG(n) \ 839 if (flags & EFD_##n) { \ 840 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 841 flags &= ~EFD_##n; \ 842 } 843 844 P_FLAG(SEMAPHORE); 845 P_FLAG(CLOEXEC); 846 P_FLAG(NONBLOCK); 847 #undef P_FLAG 848 849 if (flags) 850 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 851 852 return printed; 853 } 854 855 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags 856 857 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, 858 struct syscall_arg *arg) 859 { 860 int printed = 0, flags = arg->val; 861 862 #define P_FLAG(n) \ 863 if (flags & O_##n) { \ 864 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 865 flags &= ~O_##n; \ 866 } 867 868 P_FLAG(CLOEXEC); 869 P_FLAG(NONBLOCK); 870 #undef P_FLAG 871 872 if (flags) 873 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 874 875 return printed; 876 } 877 878 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags 879 880 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) 881 { 882 int sig = arg->val; 883 884 switch (sig) { 885 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n) 886 P_SIGNUM(HUP); 887 P_SIGNUM(INT); 888 P_SIGNUM(QUIT); 889 P_SIGNUM(ILL); 890 P_SIGNUM(TRAP); 891 P_SIGNUM(ABRT); 892 P_SIGNUM(BUS); 893 P_SIGNUM(FPE); 894 P_SIGNUM(KILL); 895 P_SIGNUM(USR1); 896 P_SIGNUM(SEGV); 897 P_SIGNUM(USR2); 898 P_SIGNUM(PIPE); 899 P_SIGNUM(ALRM); 900 P_SIGNUM(TERM); 901 P_SIGNUM(CHLD); 902 P_SIGNUM(CONT); 903 P_SIGNUM(STOP); 904 P_SIGNUM(TSTP); 905 P_SIGNUM(TTIN); 906 P_SIGNUM(TTOU); 907 P_SIGNUM(URG); 908 P_SIGNUM(XCPU); 909 P_SIGNUM(XFSZ); 910 P_SIGNUM(VTALRM); 911 P_SIGNUM(PROF); 912 P_SIGNUM(WINCH); 913 P_SIGNUM(IO); 914 P_SIGNUM(PWR); 915 P_SIGNUM(SYS); 916 #ifdef SIGEMT 917 P_SIGNUM(EMT); 918 #endif 919 #ifdef SIGSTKFLT 920 P_SIGNUM(STKFLT); 921 #endif 922 #ifdef SIGSWI 923 P_SIGNUM(SWI); 924 #endif 925 default: break; 926 } 927 928 return scnprintf(bf, size, "%#x", sig); 929 } 930 931 #define SCA_SIGNUM syscall_arg__scnprintf_signum 932 933 #if defined(__i386__) || defined(__x86_64__) 934 /* 935 * FIXME: Make this available to all arches. 936 */ 937 #define TCGETS 0x5401 938 939 static const char *tioctls[] = { 940 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW", 941 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL", 942 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI", 943 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC", 944 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX", 945 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO", 946 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK", 947 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2", 948 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", 949 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", 950 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL", 951 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", 952 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", 953 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", 954 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE", 955 }; 956 957 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401); 958 #endif /* defined(__i386__) || defined(__x86_64__) */ 959 960 #define STRARRAY(arg, name, array) \ 961 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ 962 .arg_parm = { [arg] = &strarray__##array, } 963 964 static struct syscall_fmt { 965 const char *name; 966 const char *alias; 967 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); 968 void *arg_parm[6]; 969 bool errmsg; 970 bool timeout; 971 bool hexret; 972 } syscall_fmts[] = { 973 { .name = "access", .errmsg = true, 974 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, 975 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, 976 { .name = "brk", .hexret = true, 977 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, 978 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, 979 { .name = "close", .errmsg = true, 980 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 981 { .name = "connect", .errmsg = true, }, 982 { .name = "dup", .errmsg = true, 983 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 984 { .name = "dup2", .errmsg = true, 985 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 986 { .name = "dup3", .errmsg = true, 987 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 988 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, 989 { .name = "eventfd2", .errmsg = true, 990 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, 991 { .name = "faccessat", .errmsg = true, 992 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 993 { .name = "fadvise64", .errmsg = true, 994 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 995 { .name = "fallocate", .errmsg = true, 996 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 997 { .name = "fchdir", .errmsg = true, 998 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 999 { .name = "fchmod", .errmsg = true, 1000 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1001 { .name = "fchmodat", .errmsg = true, 1002 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1003 { .name = "fchown", .errmsg = true, 1004 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1005 { .name = "fchownat", .errmsg = true, 1006 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1007 { .name = "fcntl", .errmsg = true, 1008 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1009 [1] = SCA_STRARRAY, /* cmd */ }, 1010 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, 1011 { .name = "fdatasync", .errmsg = true, 1012 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1013 { .name = "flock", .errmsg = true, 1014 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1015 [1] = SCA_FLOCK, /* cmd */ }, }, 1016 { .name = "fsetxattr", .errmsg = true, 1017 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1018 { .name = "fstat", .errmsg = true, .alias = "newfstat", 1019 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1020 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", 1021 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1022 { .name = "fstatfs", .errmsg = true, 1023 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1024 { .name = "fsync", .errmsg = true, 1025 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1026 { .name = "ftruncate", .errmsg = true, 1027 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1028 { .name = "futex", .errmsg = true, 1029 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, 1030 { .name = "futimesat", .errmsg = true, 1031 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1032 { .name = "getdents", .errmsg = true, 1033 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1034 { .name = "getdents64", .errmsg = true, 1035 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1036 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 1037 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 1038 { .name = "ioctl", .errmsg = true, 1039 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1040 #if defined(__i386__) || defined(__x86_64__) 1041 /* 1042 * FIXME: Make this available to all arches. 1043 */ 1044 [1] = SCA_STRHEXARRAY, /* cmd */ 1045 [2] = SCA_HEX, /* arg */ }, 1046 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, }, 1047 #else 1048 [2] = SCA_HEX, /* arg */ }, }, 1049 #endif 1050 { .name = "kill", .errmsg = true, 1051 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1052 { .name = "linkat", .errmsg = true, 1053 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1054 { .name = "lseek", .errmsg = true, 1055 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1056 [2] = SCA_STRARRAY, /* whence */ }, 1057 .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, 1058 { .name = "lstat", .errmsg = true, .alias = "newlstat", }, 1059 { .name = "madvise", .errmsg = true, 1060 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 1061 [2] = SCA_MADV_BHV, /* behavior */ }, }, 1062 { .name = "mkdirat", .errmsg = true, 1063 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1064 { .name = "mknodat", .errmsg = true, 1065 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1066 { .name = "mlock", .errmsg = true, 1067 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1068 { .name = "mlockall", .errmsg = true, 1069 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1070 { .name = "mmap", .hexret = true, 1071 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 1072 [2] = SCA_MMAP_PROT, /* prot */ 1073 [3] = SCA_MMAP_FLAGS, /* flags */ 1074 [4] = SCA_FD, /* fd */ }, }, 1075 { .name = "mprotect", .errmsg = true, 1076 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 1077 [2] = SCA_MMAP_PROT, /* prot */ }, }, 1078 { .name = "mremap", .hexret = true, 1079 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 1080 [3] = SCA_MREMAP_FLAGS, /* flags */ 1081 [4] = SCA_HEX, /* new_addr */ }, }, 1082 { .name = "munlock", .errmsg = true, 1083 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1084 { .name = "munmap", .errmsg = true, 1085 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1086 { .name = "name_to_handle_at", .errmsg = true, 1087 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1088 { .name = "newfstatat", .errmsg = true, 1089 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1090 { .name = "open", .errmsg = true, 1091 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, 1092 { .name = "open_by_handle_at", .errmsg = true, 1093 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 1094 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 1095 { .name = "openat", .errmsg = true, 1096 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 1097 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 1098 { .name = "perf_event_open", .errmsg = true, 1099 .arg_scnprintf = { [1] = SCA_INT, /* pid */ 1100 [2] = SCA_INT, /* cpu */ 1101 [3] = SCA_FD, /* group_fd */ 1102 [4] = SCA_PERF_FLAGS, /* flags */ }, }, 1103 { .name = "pipe2", .errmsg = true, 1104 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, 1105 { .name = "poll", .errmsg = true, .timeout = true, }, 1106 { .name = "ppoll", .errmsg = true, .timeout = true, }, 1107 { .name = "pread", .errmsg = true, .alias = "pread64", 1108 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1109 { .name = "preadv", .errmsg = true, .alias = "pread", 1110 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1111 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, 1112 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", 1113 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1114 { .name = "pwritev", .errmsg = true, 1115 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1116 { .name = "read", .errmsg = true, 1117 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1118 { .name = "readlinkat", .errmsg = true, 1119 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1120 { .name = "readv", .errmsg = true, 1121 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1122 { .name = "recvfrom", .errmsg = true, 1123 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1124 { .name = "recvmmsg", .errmsg = true, 1125 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1126 { .name = "recvmsg", .errmsg = true, 1127 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1128 { .name = "renameat", .errmsg = true, 1129 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1130 { .name = "rt_sigaction", .errmsg = true, 1131 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, 1132 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, 1133 { .name = "rt_sigqueueinfo", .errmsg = true, 1134 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1135 { .name = "rt_tgsigqueueinfo", .errmsg = true, 1136 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1137 { .name = "select", .errmsg = true, .timeout = true, }, 1138 { .name = "sendmmsg", .errmsg = true, 1139 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1140 { .name = "sendmsg", .errmsg = true, 1141 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1142 { .name = "sendto", .errmsg = true, 1143 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1144 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 1145 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 1146 { .name = "shutdown", .errmsg = true, 1147 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1148 { .name = "socket", .errmsg = true, 1149 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1150 [1] = SCA_SK_TYPE, /* type */ }, 1151 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1152 { .name = "socketpair", .errmsg = true, 1153 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1154 [1] = SCA_SK_TYPE, /* type */ }, 1155 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1156 { .name = "stat", .errmsg = true, .alias = "newstat", }, 1157 { .name = "symlinkat", .errmsg = true, 1158 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1159 { .name = "tgkill", .errmsg = true, 1160 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1161 { .name = "tkill", .errmsg = true, 1162 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1163 { .name = "uname", .errmsg = true, .alias = "newuname", }, 1164 { .name = "unlinkat", .errmsg = true, 1165 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1166 { .name = "utimensat", .errmsg = true, 1167 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, 1168 { .name = "write", .errmsg = true, 1169 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1170 { .name = "writev", .errmsg = true, 1171 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1172 }; 1173 1174 static int syscall_fmt__cmp(const void *name, const void *fmtp) 1175 { 1176 const struct syscall_fmt *fmt = fmtp; 1177 return strcmp(name, fmt->name); 1178 } 1179 1180 static struct syscall_fmt *syscall_fmt__find(const char *name) 1181 { 1182 const int nmemb = ARRAY_SIZE(syscall_fmts); 1183 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 1184 } 1185 1186 struct syscall { 1187 struct event_format *tp_format; 1188 int nr_args; 1189 struct format_field *args; 1190 const char *name; 1191 bool is_exit; 1192 struct syscall_fmt *fmt; 1193 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 1194 void **arg_parm; 1195 }; 1196 1197 static size_t fprintf_duration(unsigned long t, FILE *fp) 1198 { 1199 double duration = (double)t / NSEC_PER_MSEC; 1200 size_t printed = fprintf(fp, "("); 1201 1202 if (duration >= 1.0) 1203 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 1204 else if (duration >= 0.01) 1205 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 1206 else 1207 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 1208 return printed + fprintf(fp, "): "); 1209 } 1210 1211 struct thread_trace { 1212 u64 entry_time; 1213 u64 exit_time; 1214 bool entry_pending; 1215 unsigned long nr_events; 1216 unsigned long pfmaj, pfmin; 1217 char *entry_str; 1218 double runtime_ms; 1219 struct { 1220 int max; 1221 char **table; 1222 } paths; 1223 1224 struct intlist *syscall_stats; 1225 }; 1226 1227 static struct thread_trace *thread_trace__new(void) 1228 { 1229 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); 1230 1231 if (ttrace) 1232 ttrace->paths.max = -1; 1233 1234 ttrace->syscall_stats = intlist__new(NULL); 1235 1236 return ttrace; 1237 } 1238 1239 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 1240 { 1241 struct thread_trace *ttrace; 1242 1243 if (thread == NULL) 1244 goto fail; 1245 1246 if (thread__priv(thread) == NULL) 1247 thread__set_priv(thread, thread_trace__new()); 1248 1249 if (thread__priv(thread) == NULL) 1250 goto fail; 1251 1252 ttrace = thread__priv(thread); 1253 ++ttrace->nr_events; 1254 1255 return ttrace; 1256 fail: 1257 color_fprintf(fp, PERF_COLOR_RED, 1258 "WARNING: not enough memory, dropping samples!\n"); 1259 return NULL; 1260 } 1261 1262 #define TRACE_PFMAJ (1 << 0) 1263 #define TRACE_PFMIN (1 << 1) 1264 1265 struct trace { 1266 struct perf_tool tool; 1267 struct { 1268 int machine; 1269 int open_id; 1270 } audit; 1271 struct { 1272 int max; 1273 struct syscall *table; 1274 struct { 1275 struct perf_evsel *sys_enter, 1276 *sys_exit; 1277 } events; 1278 } syscalls; 1279 struct record_opts opts; 1280 struct perf_evlist *evlist; 1281 struct machine *host; 1282 struct thread *current; 1283 u64 base_time; 1284 FILE *output; 1285 unsigned long nr_events; 1286 struct strlist *ev_qualifier; 1287 struct { 1288 size_t nr; 1289 int *entries; 1290 } ev_qualifier_ids; 1291 const char *last_vfs_getname; 1292 struct intlist *tid_list; 1293 struct intlist *pid_list; 1294 struct { 1295 size_t nr; 1296 pid_t *entries; 1297 } filter_pids; 1298 double duration_filter; 1299 double runtime_ms; 1300 struct { 1301 u64 vfs_getname, 1302 proc_getname; 1303 } stats; 1304 bool not_ev_qualifier; 1305 bool live; 1306 bool full_time; 1307 bool sched; 1308 bool multiple_threads; 1309 bool summary; 1310 bool summary_only; 1311 bool show_comm; 1312 bool show_tool_stats; 1313 bool trace_syscalls; 1314 bool force; 1315 int trace_pgfaults; 1316 }; 1317 1318 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) 1319 { 1320 struct thread_trace *ttrace = thread__priv(thread); 1321 1322 if (fd > ttrace->paths.max) { 1323 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); 1324 1325 if (npath == NULL) 1326 return -1; 1327 1328 if (ttrace->paths.max != -1) { 1329 memset(npath + ttrace->paths.max + 1, 0, 1330 (fd - ttrace->paths.max) * sizeof(char *)); 1331 } else { 1332 memset(npath, 0, (fd + 1) * sizeof(char *)); 1333 } 1334 1335 ttrace->paths.table = npath; 1336 ttrace->paths.max = fd; 1337 } 1338 1339 ttrace->paths.table[fd] = strdup(pathname); 1340 1341 return ttrace->paths.table[fd] != NULL ? 0 : -1; 1342 } 1343 1344 static int thread__read_fd_path(struct thread *thread, int fd) 1345 { 1346 char linkname[PATH_MAX], pathname[PATH_MAX]; 1347 struct stat st; 1348 int ret; 1349 1350 if (thread->pid_ == thread->tid) { 1351 scnprintf(linkname, sizeof(linkname), 1352 "/proc/%d/fd/%d", thread->pid_, fd); 1353 } else { 1354 scnprintf(linkname, sizeof(linkname), 1355 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); 1356 } 1357 1358 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) 1359 return -1; 1360 1361 ret = readlink(linkname, pathname, sizeof(pathname)); 1362 1363 if (ret < 0 || ret > st.st_size) 1364 return -1; 1365 1366 pathname[ret] = '\0'; 1367 return trace__set_fd_pathname(thread, fd, pathname); 1368 } 1369 1370 static const char *thread__fd_path(struct thread *thread, int fd, 1371 struct trace *trace) 1372 { 1373 struct thread_trace *ttrace = thread__priv(thread); 1374 1375 if (ttrace == NULL) 1376 return NULL; 1377 1378 if (fd < 0) 1379 return NULL; 1380 1381 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) { 1382 if (!trace->live) 1383 return NULL; 1384 ++trace->stats.proc_getname; 1385 if (thread__read_fd_path(thread, fd)) 1386 return NULL; 1387 } 1388 1389 return ttrace->paths.table[fd]; 1390 } 1391 1392 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 1393 struct syscall_arg *arg) 1394 { 1395 int fd = arg->val; 1396 size_t printed = scnprintf(bf, size, "%d", fd); 1397 const char *path = thread__fd_path(arg->thread, fd, arg->trace); 1398 1399 if (path) 1400 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 1401 1402 return printed; 1403 } 1404 1405 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 1406 struct syscall_arg *arg) 1407 { 1408 int fd = arg->val; 1409 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); 1410 struct thread_trace *ttrace = thread__priv(arg->thread); 1411 1412 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) 1413 zfree(&ttrace->paths.table[fd]); 1414 1415 return printed; 1416 } 1417 1418 static bool trace__filter_duration(struct trace *trace, double t) 1419 { 1420 return t < (trace->duration_filter * NSEC_PER_MSEC); 1421 } 1422 1423 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1424 { 1425 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 1426 1427 return fprintf(fp, "%10.3f ", ts); 1428 } 1429 1430 static bool done = false; 1431 static bool interrupted = false; 1432 1433 static void sig_handler(int sig) 1434 { 1435 done = true; 1436 interrupted = sig == SIGINT; 1437 } 1438 1439 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 1440 u64 duration, u64 tstamp, FILE *fp) 1441 { 1442 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 1443 printed += fprintf_duration(duration, fp); 1444 1445 if (trace->multiple_threads) { 1446 if (trace->show_comm) 1447 printed += fprintf(fp, "%.14s/", thread__comm_str(thread)); 1448 printed += fprintf(fp, "%d ", thread->tid); 1449 } 1450 1451 return printed; 1452 } 1453 1454 static int trace__process_event(struct trace *trace, struct machine *machine, 1455 union perf_event *event, struct perf_sample *sample) 1456 { 1457 int ret = 0; 1458 1459 switch (event->header.type) { 1460 case PERF_RECORD_LOST: 1461 color_fprintf(trace->output, PERF_COLOR_RED, 1462 "LOST %" PRIu64 " events!\n", event->lost.lost); 1463 ret = machine__process_lost_event(machine, event, sample); 1464 default: 1465 ret = machine__process_event(machine, event, sample); 1466 break; 1467 } 1468 1469 return ret; 1470 } 1471 1472 static int trace__tool_process(struct perf_tool *tool, 1473 union perf_event *event, 1474 struct perf_sample *sample, 1475 struct machine *machine) 1476 { 1477 struct trace *trace = container_of(tool, struct trace, tool); 1478 return trace__process_event(trace, machine, event, sample); 1479 } 1480 1481 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 1482 { 1483 int err = symbol__init(NULL); 1484 1485 if (err) 1486 return err; 1487 1488 trace->host = machine__new_host(); 1489 if (trace->host == NULL) 1490 return -ENOMEM; 1491 1492 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0) 1493 return -errno; 1494 1495 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1496 evlist->threads, trace__tool_process, false, 1497 trace->opts.proc_map_timeout); 1498 if (err) 1499 symbol__exit(); 1500 1501 return err; 1502 } 1503 1504 static int syscall__set_arg_fmts(struct syscall *sc) 1505 { 1506 struct format_field *field; 1507 int idx = 0; 1508 1509 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *)); 1510 if (sc->arg_scnprintf == NULL) 1511 return -1; 1512 1513 if (sc->fmt) 1514 sc->arg_parm = sc->fmt->arg_parm; 1515 1516 for (field = sc->args; field; field = field->next) { 1517 if (sc->fmt && sc->fmt->arg_scnprintf[idx]) 1518 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; 1519 else if (field->flags & FIELD_IS_POINTER) 1520 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; 1521 ++idx; 1522 } 1523 1524 return 0; 1525 } 1526 1527 static int trace__read_syscall_info(struct trace *trace, int id) 1528 { 1529 char tp_name[128]; 1530 struct syscall *sc; 1531 const char *name = audit_syscall_to_name(id, trace->audit.machine); 1532 1533 if (name == NULL) 1534 return -1; 1535 1536 if (id > trace->syscalls.max) { 1537 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1538 1539 if (nsyscalls == NULL) 1540 return -1; 1541 1542 if (trace->syscalls.max != -1) { 1543 memset(nsyscalls + trace->syscalls.max + 1, 0, 1544 (id - trace->syscalls.max) * sizeof(*sc)); 1545 } else { 1546 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 1547 } 1548 1549 trace->syscalls.table = nsyscalls; 1550 trace->syscalls.max = id; 1551 } 1552 1553 sc = trace->syscalls.table + id; 1554 sc->name = name; 1555 1556 sc->fmt = syscall_fmt__find(sc->name); 1557 1558 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1559 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1560 1561 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { 1562 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 1563 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1564 } 1565 1566 if (sc->tp_format == NULL) 1567 return -1; 1568 1569 sc->args = sc->tp_format->format.fields; 1570 sc->nr_args = sc->tp_format->format.nr_fields; 1571 /* drop nr field - not relevant here; does not exist on older kernels */ 1572 if (sc->args && strcmp(sc->args->name, "nr") == 0) { 1573 sc->args = sc->args->next; 1574 --sc->nr_args; 1575 } 1576 1577 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); 1578 1579 return syscall__set_arg_fmts(sc); 1580 } 1581 1582 static int trace__validate_ev_qualifier(struct trace *trace) 1583 { 1584 int err = 0, i; 1585 struct str_node *pos; 1586 1587 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); 1588 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr * 1589 sizeof(trace->ev_qualifier_ids.entries[0])); 1590 1591 if (trace->ev_qualifier_ids.entries == NULL) { 1592 fputs("Error:\tNot enough memory for allocating events qualifier ids\n", 1593 trace->output); 1594 err = -EINVAL; 1595 goto out; 1596 } 1597 1598 i = 0; 1599 1600 strlist__for_each(pos, trace->ev_qualifier) { 1601 const char *sc = pos->s; 1602 int id = audit_name_to_syscall(sc, trace->audit.machine); 1603 1604 if (id < 0) { 1605 if (err == 0) { 1606 fputs("Error:\tInvalid syscall ", trace->output); 1607 err = -EINVAL; 1608 } else { 1609 fputs(", ", trace->output); 1610 } 1611 1612 fputs(sc, trace->output); 1613 } 1614 1615 trace->ev_qualifier_ids.entries[i++] = id; 1616 } 1617 1618 if (err < 0) { 1619 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" 1620 "\nHint:\tand: 'man syscalls'\n", trace->output); 1621 zfree(&trace->ev_qualifier_ids.entries); 1622 trace->ev_qualifier_ids.nr = 0; 1623 } 1624 out: 1625 return err; 1626 } 1627 1628 /* 1629 * args is to be interpreted as a series of longs but we need to handle 1630 * 8-byte unaligned accesses. args points to raw_data within the event 1631 * and raw_data is guaranteed to be 8-byte unaligned because it is 1632 * preceded by raw_size which is a u32. So we need to copy args to a temp 1633 * variable to read it. Most notably this avoids extended load instructions 1634 * on unaligned addresses 1635 */ 1636 1637 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 1638 unsigned char *args, struct trace *trace, 1639 struct thread *thread) 1640 { 1641 size_t printed = 0; 1642 unsigned char *p; 1643 unsigned long val; 1644 1645 if (sc->args != NULL) { 1646 struct format_field *field; 1647 u8 bit = 1; 1648 struct syscall_arg arg = { 1649 .idx = 0, 1650 .mask = 0, 1651 .trace = trace, 1652 .thread = thread, 1653 }; 1654 1655 for (field = sc->args; field; 1656 field = field->next, ++arg.idx, bit <<= 1) { 1657 if (arg.mask & bit) 1658 continue; 1659 1660 /* special care for unaligned accesses */ 1661 p = args + sizeof(unsigned long) * arg.idx; 1662 memcpy(&val, p, sizeof(val)); 1663 1664 /* 1665 * Suppress this argument if its value is zero and 1666 * and we don't have a string associated in an 1667 * strarray for it. 1668 */ 1669 if (val == 0 && 1670 !(sc->arg_scnprintf && 1671 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && 1672 sc->arg_parm[arg.idx])) 1673 continue; 1674 1675 printed += scnprintf(bf + printed, size - printed, 1676 "%s%s: ", printed ? ", " : "", field->name); 1677 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { 1678 arg.val = val; 1679 if (sc->arg_parm) 1680 arg.parm = sc->arg_parm[arg.idx]; 1681 printed += sc->arg_scnprintf[arg.idx](bf + printed, 1682 size - printed, &arg); 1683 } else { 1684 printed += scnprintf(bf + printed, size - printed, 1685 "%ld", val); 1686 } 1687 } 1688 } else { 1689 int i = 0; 1690 1691 while (i < 6) { 1692 /* special care for unaligned accesses */ 1693 p = args + sizeof(unsigned long) * i; 1694 memcpy(&val, p, sizeof(val)); 1695 printed += scnprintf(bf + printed, size - printed, 1696 "%sarg%d: %ld", 1697 printed ? ", " : "", i, val); 1698 ++i; 1699 } 1700 } 1701 1702 return printed; 1703 } 1704 1705 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 1706 union perf_event *event, 1707 struct perf_sample *sample); 1708 1709 static struct syscall *trace__syscall_info(struct trace *trace, 1710 struct perf_evsel *evsel, int id) 1711 { 1712 1713 if (id < 0) { 1714 1715 /* 1716 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 1717 * before that, leaving at a higher verbosity level till that is 1718 * explained. Reproduced with plain ftrace with: 1719 * 1720 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 1721 * grep "NR -1 " /t/trace_pipe 1722 * 1723 * After generating some load on the machine. 1724 */ 1725 if (verbose > 1) { 1726 static u64 n; 1727 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 1728 id, perf_evsel__name(evsel), ++n); 1729 } 1730 return NULL; 1731 } 1732 1733 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 1734 trace__read_syscall_info(trace, id)) 1735 goto out_cant_read; 1736 1737 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 1738 goto out_cant_read; 1739 1740 return &trace->syscalls.table[id]; 1741 1742 out_cant_read: 1743 if (verbose) { 1744 fprintf(trace->output, "Problems reading syscall %d", id); 1745 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 1746 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 1747 fputs(" information\n", trace->output); 1748 } 1749 return NULL; 1750 } 1751 1752 static void thread__update_stats(struct thread_trace *ttrace, 1753 int id, struct perf_sample *sample) 1754 { 1755 struct int_node *inode; 1756 struct stats *stats; 1757 u64 duration = 0; 1758 1759 inode = intlist__findnew(ttrace->syscall_stats, id); 1760 if (inode == NULL) 1761 return; 1762 1763 stats = inode->priv; 1764 if (stats == NULL) { 1765 stats = malloc(sizeof(struct stats)); 1766 if (stats == NULL) 1767 return; 1768 init_stats(stats); 1769 inode->priv = stats; 1770 } 1771 1772 if (ttrace->entry_time && sample->time > ttrace->entry_time) 1773 duration = sample->time - ttrace->entry_time; 1774 1775 update_stats(stats, duration); 1776 } 1777 1778 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample) 1779 { 1780 struct thread_trace *ttrace; 1781 u64 duration; 1782 size_t printed; 1783 1784 if (trace->current == NULL) 1785 return 0; 1786 1787 ttrace = thread__priv(trace->current); 1788 1789 if (!ttrace->entry_pending) 1790 return 0; 1791 1792 duration = sample->time - ttrace->entry_time; 1793 1794 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output); 1795 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); 1796 ttrace->entry_pending = false; 1797 1798 return printed; 1799 } 1800 1801 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 1802 union perf_event *event __maybe_unused, 1803 struct perf_sample *sample) 1804 { 1805 char *msg; 1806 void *args; 1807 size_t printed = 0; 1808 struct thread *thread; 1809 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1810 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1811 struct thread_trace *ttrace; 1812 1813 if (sc == NULL) 1814 return -1; 1815 1816 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1817 ttrace = thread__trace(thread, trace->output); 1818 if (ttrace == NULL) 1819 goto out_put; 1820 1821 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1822 1823 if (ttrace->entry_str == NULL) { 1824 ttrace->entry_str = malloc(1024); 1825 if (!ttrace->entry_str) 1826 goto out_put; 1827 } 1828 1829 if (!trace->summary_only) 1830 trace__printf_interrupted_entry(trace, sample); 1831 1832 ttrace->entry_time = sample->time; 1833 msg = ttrace->entry_str; 1834 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); 1835 1836 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, 1837 args, trace, thread); 1838 1839 if (sc->is_exit) { 1840 if (!trace->duration_filter && !trace->summary_only) { 1841 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); 1842 fprintf(trace->output, "%-70s\n", ttrace->entry_str); 1843 } 1844 } else 1845 ttrace->entry_pending = true; 1846 1847 if (trace->current != thread) { 1848 thread__put(trace->current); 1849 trace->current = thread__get(thread); 1850 } 1851 err = 0; 1852 out_put: 1853 thread__put(thread); 1854 return err; 1855 } 1856 1857 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1858 union perf_event *event __maybe_unused, 1859 struct perf_sample *sample) 1860 { 1861 long ret; 1862 u64 duration = 0; 1863 struct thread *thread; 1864 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1865 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1866 struct thread_trace *ttrace; 1867 1868 if (sc == NULL) 1869 return -1; 1870 1871 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1872 ttrace = thread__trace(thread, trace->output); 1873 if (ttrace == NULL) 1874 goto out_put; 1875 1876 if (trace->summary) 1877 thread__update_stats(ttrace, id, sample); 1878 1879 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 1880 1881 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { 1882 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); 1883 trace->last_vfs_getname = NULL; 1884 ++trace->stats.vfs_getname; 1885 } 1886 1887 ttrace->exit_time = sample->time; 1888 1889 if (ttrace->entry_time) { 1890 duration = sample->time - ttrace->entry_time; 1891 if (trace__filter_duration(trace, duration)) 1892 goto out; 1893 } else if (trace->duration_filter) 1894 goto out; 1895 1896 if (trace->summary_only) 1897 goto out; 1898 1899 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); 1900 1901 if (ttrace->entry_pending) { 1902 fprintf(trace->output, "%-70s", ttrace->entry_str); 1903 } else { 1904 fprintf(trace->output, " ... ["); 1905 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 1906 fprintf(trace->output, "]: %s()", sc->name); 1907 } 1908 1909 if (sc->fmt == NULL) { 1910 signed_print: 1911 fprintf(trace->output, ") = %ld", ret); 1912 } else if (ret < 0 && sc->fmt->errmsg) { 1913 char bf[STRERR_BUFSIZE]; 1914 const char *emsg = strerror_r(-ret, bf, sizeof(bf)), 1915 *e = audit_errno_to_name(-ret); 1916 1917 fprintf(trace->output, ") = -1 %s %s", e, emsg); 1918 } else if (ret == 0 && sc->fmt->timeout) 1919 fprintf(trace->output, ") = 0 Timeout"); 1920 else if (sc->fmt->hexret) 1921 fprintf(trace->output, ") = %#lx", ret); 1922 else 1923 goto signed_print; 1924 1925 fputc('\n', trace->output); 1926 out: 1927 ttrace->entry_pending = false; 1928 err = 0; 1929 out_put: 1930 thread__put(thread); 1931 return err; 1932 } 1933 1934 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 1935 union perf_event *event __maybe_unused, 1936 struct perf_sample *sample) 1937 { 1938 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); 1939 return 0; 1940 } 1941 1942 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 1943 union perf_event *event __maybe_unused, 1944 struct perf_sample *sample) 1945 { 1946 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1947 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 1948 struct thread *thread = machine__findnew_thread(trace->host, 1949 sample->pid, 1950 sample->tid); 1951 struct thread_trace *ttrace = thread__trace(thread, trace->output); 1952 1953 if (ttrace == NULL) 1954 goto out_dump; 1955 1956 ttrace->runtime_ms += runtime_ms; 1957 trace->runtime_ms += runtime_ms; 1958 thread__put(thread); 1959 return 0; 1960 1961 out_dump: 1962 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 1963 evsel->name, 1964 perf_evsel__strval(evsel, sample, "comm"), 1965 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1966 runtime, 1967 perf_evsel__intval(evsel, sample, "vruntime")); 1968 thread__put(thread); 1969 return 0; 1970 } 1971 1972 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, 1973 union perf_event *event __maybe_unused, 1974 struct perf_sample *sample) 1975 { 1976 trace__printf_interrupted_entry(trace, sample); 1977 trace__fprintf_tstamp(trace, sample->time, trace->output); 1978 1979 if (trace->trace_syscalls) 1980 fprintf(trace->output, "( ): "); 1981 1982 fprintf(trace->output, "%s:", evsel->name); 1983 1984 if (evsel->tp_format) { 1985 event_format__fprintf(evsel->tp_format, sample->cpu, 1986 sample->raw_data, sample->raw_size, 1987 trace->output); 1988 } 1989 1990 fprintf(trace->output, ")\n"); 1991 return 0; 1992 } 1993 1994 static void print_location(FILE *f, struct perf_sample *sample, 1995 struct addr_location *al, 1996 bool print_dso, bool print_sym) 1997 { 1998 1999 if ((verbose || print_dso) && al->map) 2000 fprintf(f, "%s@", al->map->dso->long_name); 2001 2002 if ((verbose || print_sym) && al->sym) 2003 fprintf(f, "%s+0x%" PRIx64, al->sym->name, 2004 al->addr - al->sym->start); 2005 else if (al->map) 2006 fprintf(f, "0x%" PRIx64, al->addr); 2007 else 2008 fprintf(f, "0x%" PRIx64, sample->addr); 2009 } 2010 2011 static int trace__pgfault(struct trace *trace, 2012 struct perf_evsel *evsel, 2013 union perf_event *event, 2014 struct perf_sample *sample) 2015 { 2016 struct thread *thread; 2017 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 2018 struct addr_location al; 2019 char map_type = 'd'; 2020 struct thread_trace *ttrace; 2021 int err = -1; 2022 2023 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 2024 ttrace = thread__trace(thread, trace->output); 2025 if (ttrace == NULL) 2026 goto out_put; 2027 2028 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) 2029 ttrace->pfmaj++; 2030 else 2031 ttrace->pfmin++; 2032 2033 if (trace->summary_only) 2034 goto out; 2035 2036 thread__find_addr_location(thread, cpumode, MAP__FUNCTION, 2037 sample->ip, &al); 2038 2039 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); 2040 2041 fprintf(trace->output, "%sfault [", 2042 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? 2043 "maj" : "min"); 2044 2045 print_location(trace->output, sample, &al, false, true); 2046 2047 fprintf(trace->output, "] => "); 2048 2049 thread__find_addr_location(thread, cpumode, MAP__VARIABLE, 2050 sample->addr, &al); 2051 2052 if (!al.map) { 2053 thread__find_addr_location(thread, cpumode, 2054 MAP__FUNCTION, sample->addr, &al); 2055 2056 if (al.map) 2057 map_type = 'x'; 2058 else 2059 map_type = '?'; 2060 } 2061 2062 print_location(trace->output, sample, &al, true, false); 2063 2064 fprintf(trace->output, " (%c%c)\n", map_type, al.level); 2065 out: 2066 err = 0; 2067 out_put: 2068 thread__put(thread); 2069 return err; 2070 } 2071 2072 static bool skip_sample(struct trace *trace, struct perf_sample *sample) 2073 { 2074 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || 2075 (trace->tid_list && intlist__find(trace->tid_list, sample->tid))) 2076 return false; 2077 2078 if (trace->pid_list || trace->tid_list) 2079 return true; 2080 2081 return false; 2082 } 2083 2084 static int trace__process_sample(struct perf_tool *tool, 2085 union perf_event *event, 2086 struct perf_sample *sample, 2087 struct perf_evsel *evsel, 2088 struct machine *machine __maybe_unused) 2089 { 2090 struct trace *trace = container_of(tool, struct trace, tool); 2091 int err = 0; 2092 2093 tracepoint_handler handler = evsel->handler; 2094 2095 if (skip_sample(trace, sample)) 2096 return 0; 2097 2098 if (!trace->full_time && trace->base_time == 0) 2099 trace->base_time = sample->time; 2100 2101 if (handler) { 2102 ++trace->nr_events; 2103 handler(trace, evsel, event, sample); 2104 } 2105 2106 return err; 2107 } 2108 2109 static int parse_target_str(struct trace *trace) 2110 { 2111 if (trace->opts.target.pid) { 2112 trace->pid_list = intlist__new(trace->opts.target.pid); 2113 if (trace->pid_list == NULL) { 2114 pr_err("Error parsing process id string\n"); 2115 return -EINVAL; 2116 } 2117 } 2118 2119 if (trace->opts.target.tid) { 2120 trace->tid_list = intlist__new(trace->opts.target.tid); 2121 if (trace->tid_list == NULL) { 2122 pr_err("Error parsing thread id string\n"); 2123 return -EINVAL; 2124 } 2125 } 2126 2127 return 0; 2128 } 2129 2130 static int trace__record(struct trace *trace, int argc, const char **argv) 2131 { 2132 unsigned int rec_argc, i, j; 2133 const char **rec_argv; 2134 const char * const record_args[] = { 2135 "record", 2136 "-R", 2137 "-m", "1024", 2138 "-c", "1", 2139 }; 2140 2141 const char * const sc_args[] = { "-e", }; 2142 unsigned int sc_args_nr = ARRAY_SIZE(sc_args); 2143 const char * const majpf_args[] = { "-e", "major-faults" }; 2144 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args); 2145 const char * const minpf_args[] = { "-e", "minor-faults" }; 2146 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args); 2147 2148 /* +1 is for the event string below */ 2149 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 + 2150 majpf_args_nr + minpf_args_nr + argc; 2151 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2152 2153 if (rec_argv == NULL) 2154 return -ENOMEM; 2155 2156 j = 0; 2157 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2158 rec_argv[j++] = record_args[i]; 2159 2160 if (trace->trace_syscalls) { 2161 for (i = 0; i < sc_args_nr; i++) 2162 rec_argv[j++] = sc_args[i]; 2163 2164 /* event string may be different for older kernels - e.g., RHEL6 */ 2165 if (is_valid_tracepoint("raw_syscalls:sys_enter")) 2166 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; 2167 else if (is_valid_tracepoint("syscalls:sys_enter")) 2168 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; 2169 else { 2170 pr_err("Neither raw_syscalls nor syscalls events exist.\n"); 2171 return -1; 2172 } 2173 } 2174 2175 if (trace->trace_pgfaults & TRACE_PFMAJ) 2176 for (i = 0; i < majpf_args_nr; i++) 2177 rec_argv[j++] = majpf_args[i]; 2178 2179 if (trace->trace_pgfaults & TRACE_PFMIN) 2180 for (i = 0; i < minpf_args_nr; i++) 2181 rec_argv[j++] = minpf_args[i]; 2182 2183 for (i = 0; i < (unsigned int)argc; i++) 2184 rec_argv[j++] = argv[i]; 2185 2186 return cmd_record(j, rec_argv, NULL); 2187 } 2188 2189 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 2190 2191 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 2192 { 2193 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); 2194 if (evsel == NULL) 2195 return; 2196 2197 if (perf_evsel__field(evsel, "pathname") == NULL) { 2198 perf_evsel__delete(evsel); 2199 return; 2200 } 2201 2202 evsel->handler = trace__vfs_getname; 2203 perf_evlist__add(evlist, evsel); 2204 } 2205 2206 static int perf_evlist__add_pgfault(struct perf_evlist *evlist, 2207 u64 config) 2208 { 2209 struct perf_evsel *evsel; 2210 struct perf_event_attr attr = { 2211 .type = PERF_TYPE_SOFTWARE, 2212 .mmap_data = 1, 2213 }; 2214 2215 attr.config = config; 2216 attr.sample_period = 1; 2217 2218 event_attr_init(&attr); 2219 2220 evsel = perf_evsel__new(&attr); 2221 if (!evsel) 2222 return -ENOMEM; 2223 2224 evsel->handler = trace__pgfault; 2225 perf_evlist__add(evlist, evsel); 2226 2227 return 0; 2228 } 2229 2230 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) 2231 { 2232 const u32 type = event->header.type; 2233 struct perf_evsel *evsel; 2234 2235 if (!trace->full_time && trace->base_time == 0) 2236 trace->base_time = sample->time; 2237 2238 if (type != PERF_RECORD_SAMPLE) { 2239 trace__process_event(trace, trace->host, event, sample); 2240 return; 2241 } 2242 2243 evsel = perf_evlist__id2evsel(trace->evlist, sample->id); 2244 if (evsel == NULL) { 2245 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); 2246 return; 2247 } 2248 2249 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 2250 sample->raw_data == NULL) { 2251 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 2252 perf_evsel__name(evsel), sample->tid, 2253 sample->cpu, sample->raw_size); 2254 } else { 2255 tracepoint_handler handler = evsel->handler; 2256 handler(trace, evsel, event, sample); 2257 } 2258 } 2259 2260 static int trace__add_syscall_newtp(struct trace *trace) 2261 { 2262 int ret = -1; 2263 struct perf_evlist *evlist = trace->evlist; 2264 struct perf_evsel *sys_enter, *sys_exit; 2265 2266 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter); 2267 if (sys_enter == NULL) 2268 goto out; 2269 2270 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) 2271 goto out_delete_sys_enter; 2272 2273 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit); 2274 if (sys_exit == NULL) 2275 goto out_delete_sys_enter; 2276 2277 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) 2278 goto out_delete_sys_exit; 2279 2280 perf_evlist__add(evlist, sys_enter); 2281 perf_evlist__add(evlist, sys_exit); 2282 2283 trace->syscalls.events.sys_enter = sys_enter; 2284 trace->syscalls.events.sys_exit = sys_exit; 2285 2286 ret = 0; 2287 out: 2288 return ret; 2289 2290 out_delete_sys_exit: 2291 perf_evsel__delete_priv(sys_exit); 2292 out_delete_sys_enter: 2293 perf_evsel__delete_priv(sys_enter); 2294 goto out; 2295 } 2296 2297 static int trace__set_ev_qualifier_filter(struct trace *trace) 2298 { 2299 int err = -1; 2300 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, 2301 trace->ev_qualifier_ids.nr, 2302 trace->ev_qualifier_ids.entries); 2303 2304 if (filter == NULL) 2305 goto out_enomem; 2306 2307 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter)) 2308 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter); 2309 2310 free(filter); 2311 out: 2312 return err; 2313 out_enomem: 2314 errno = ENOMEM; 2315 goto out; 2316 } 2317 2318 static int trace__run(struct trace *trace, int argc, const char **argv) 2319 { 2320 struct perf_evlist *evlist = trace->evlist; 2321 struct perf_evsel *evsel; 2322 int err = -1, i; 2323 unsigned long before; 2324 const bool forks = argc > 0; 2325 bool draining = false; 2326 2327 trace->live = true; 2328 2329 if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) 2330 goto out_error_raw_syscalls; 2331 2332 if (trace->trace_syscalls) 2333 perf_evlist__add_vfs_getname(evlist); 2334 2335 if ((trace->trace_pgfaults & TRACE_PFMAJ) && 2336 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { 2337 goto out_error_mem; 2338 } 2339 2340 if ((trace->trace_pgfaults & TRACE_PFMIN) && 2341 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) 2342 goto out_error_mem; 2343 2344 if (trace->sched && 2345 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 2346 trace__sched_stat_runtime)) 2347 goto out_error_sched_stat_runtime; 2348 2349 err = perf_evlist__create_maps(evlist, &trace->opts.target); 2350 if (err < 0) { 2351 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 2352 goto out_delete_evlist; 2353 } 2354 2355 err = trace__symbols_init(trace, evlist); 2356 if (err < 0) { 2357 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 2358 goto out_delete_evlist; 2359 } 2360 2361 perf_evlist__config(evlist, &trace->opts); 2362 2363 signal(SIGCHLD, sig_handler); 2364 signal(SIGINT, sig_handler); 2365 2366 if (forks) { 2367 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 2368 argv, false, NULL); 2369 if (err < 0) { 2370 fprintf(trace->output, "Couldn't run the workload!\n"); 2371 goto out_delete_evlist; 2372 } 2373 } 2374 2375 err = perf_evlist__open(evlist); 2376 if (err < 0) 2377 goto out_error_open; 2378 2379 /* 2380 * Better not use !target__has_task() here because we need to cover the 2381 * case where no threads were specified in the command line, but a 2382 * workload was, and in that case we will fill in the thread_map when 2383 * we fork the workload in perf_evlist__prepare_workload. 2384 */ 2385 if (trace->filter_pids.nr > 0) 2386 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); 2387 else if (thread_map__pid(evlist->threads, 0) == -1) 2388 err = perf_evlist__set_filter_pid(evlist, getpid()); 2389 2390 if (err < 0) 2391 goto out_error_mem; 2392 2393 if (trace->ev_qualifier_ids.nr > 0) { 2394 err = trace__set_ev_qualifier_filter(trace); 2395 if (err < 0) 2396 goto out_errno; 2397 } 2398 2399 pr_debug("%s\n", trace->syscalls.events.sys_exit->filter); 2400 2401 err = perf_evlist__apply_filters(evlist, &evsel); 2402 if (err < 0) 2403 goto out_error_apply_filters; 2404 2405 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); 2406 if (err < 0) 2407 goto out_error_mmap; 2408 2409 if (!target__none(&trace->opts.target)) 2410 perf_evlist__enable(evlist); 2411 2412 if (forks) 2413 perf_evlist__start_workload(evlist); 2414 2415 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || 2416 evlist->threads->nr > 1 || 2417 perf_evlist__first(evlist)->attr.inherit; 2418 again: 2419 before = trace->nr_events; 2420 2421 for (i = 0; i < evlist->nr_mmaps; i++) { 2422 union perf_event *event; 2423 2424 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 2425 struct perf_sample sample; 2426 2427 ++trace->nr_events; 2428 2429 err = perf_evlist__parse_sample(evlist, event, &sample); 2430 if (err) { 2431 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 2432 goto next_event; 2433 } 2434 2435 trace__handle_event(trace, event, &sample); 2436 next_event: 2437 perf_evlist__mmap_consume(evlist, i); 2438 2439 if (interrupted) 2440 goto out_disable; 2441 2442 if (done && !draining) { 2443 perf_evlist__disable(evlist); 2444 draining = true; 2445 } 2446 } 2447 } 2448 2449 if (trace->nr_events == before) { 2450 int timeout = done ? 100 : -1; 2451 2452 if (!draining && perf_evlist__poll(evlist, timeout) > 0) { 2453 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0) 2454 draining = true; 2455 2456 goto again; 2457 } 2458 } else { 2459 goto again; 2460 } 2461 2462 out_disable: 2463 thread__zput(trace->current); 2464 2465 perf_evlist__disable(evlist); 2466 2467 if (!err) { 2468 if (trace->summary) 2469 trace__fprintf_thread_summary(trace, trace->output); 2470 2471 if (trace->show_tool_stats) { 2472 fprintf(trace->output, "Stats:\n " 2473 " vfs_getname : %" PRIu64 "\n" 2474 " proc_getname: %" PRIu64 "\n", 2475 trace->stats.vfs_getname, 2476 trace->stats.proc_getname); 2477 } 2478 } 2479 2480 out_delete_evlist: 2481 perf_evlist__delete(evlist); 2482 trace->evlist = NULL; 2483 trace->live = false; 2484 return err; 2485 { 2486 char errbuf[BUFSIZ]; 2487 2488 out_error_sched_stat_runtime: 2489 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); 2490 goto out_error; 2491 2492 out_error_raw_syscalls: 2493 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); 2494 goto out_error; 2495 2496 out_error_mmap: 2497 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); 2498 goto out_error; 2499 2500 out_error_open: 2501 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); 2502 2503 out_error: 2504 fprintf(trace->output, "%s\n", errbuf); 2505 goto out_delete_evlist; 2506 2507 out_error_apply_filters: 2508 fprintf(trace->output, 2509 "Failed to set filter \"%s\" on event %s with %d (%s)\n", 2510 evsel->filter, perf_evsel__name(evsel), errno, 2511 strerror_r(errno, errbuf, sizeof(errbuf))); 2512 goto out_delete_evlist; 2513 } 2514 out_error_mem: 2515 fprintf(trace->output, "Not enough memory to run!\n"); 2516 goto out_delete_evlist; 2517 2518 out_errno: 2519 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno)); 2520 goto out_delete_evlist; 2521 } 2522 2523 static int trace__replay(struct trace *trace) 2524 { 2525 const struct perf_evsel_str_handler handlers[] = { 2526 { "probe:vfs_getname", trace__vfs_getname, }, 2527 }; 2528 struct perf_data_file file = { 2529 .path = input_name, 2530 .mode = PERF_DATA_MODE_READ, 2531 .force = trace->force, 2532 }; 2533 struct perf_session *session; 2534 struct perf_evsel *evsel; 2535 int err = -1; 2536 2537 trace->tool.sample = trace__process_sample; 2538 trace->tool.mmap = perf_event__process_mmap; 2539 trace->tool.mmap2 = perf_event__process_mmap2; 2540 trace->tool.comm = perf_event__process_comm; 2541 trace->tool.exit = perf_event__process_exit; 2542 trace->tool.fork = perf_event__process_fork; 2543 trace->tool.attr = perf_event__process_attr; 2544 trace->tool.tracing_data = perf_event__process_tracing_data; 2545 trace->tool.build_id = perf_event__process_build_id; 2546 2547 trace->tool.ordered_events = true; 2548 trace->tool.ordering_requires_timestamps = true; 2549 2550 /* add tid to output */ 2551 trace->multiple_threads = true; 2552 2553 session = perf_session__new(&file, false, &trace->tool); 2554 if (session == NULL) 2555 return -1; 2556 2557 if (symbol__init(&session->header.env) < 0) 2558 goto out; 2559 2560 trace->host = &session->machines.host; 2561 2562 err = perf_session__set_tracepoints_handlers(session, handlers); 2563 if (err) 2564 goto out; 2565 2566 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2567 "raw_syscalls:sys_enter"); 2568 /* older kernels have syscalls tp versus raw_syscalls */ 2569 if (evsel == NULL) 2570 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2571 "syscalls:sys_enter"); 2572 2573 if (evsel && 2574 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || 2575 perf_evsel__init_sc_tp_ptr_field(evsel, args))) { 2576 pr_err("Error during initialize raw_syscalls:sys_enter event\n"); 2577 goto out; 2578 } 2579 2580 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2581 "raw_syscalls:sys_exit"); 2582 if (evsel == NULL) 2583 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2584 "syscalls:sys_exit"); 2585 if (evsel && 2586 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || 2587 perf_evsel__init_sc_tp_uint_field(evsel, ret))) { 2588 pr_err("Error during initialize raw_syscalls:sys_exit event\n"); 2589 goto out; 2590 } 2591 2592 evlist__for_each(session->evlist, evsel) { 2593 if (evsel->attr.type == PERF_TYPE_SOFTWARE && 2594 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || 2595 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || 2596 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS)) 2597 evsel->handler = trace__pgfault; 2598 } 2599 2600 err = parse_target_str(trace); 2601 if (err != 0) 2602 goto out; 2603 2604 setup_pager(); 2605 2606 err = perf_session__process_events(session); 2607 if (err) 2608 pr_err("Failed to process events, error %d", err); 2609 2610 else if (trace->summary) 2611 trace__fprintf_thread_summary(trace, trace->output); 2612 2613 out: 2614 perf_session__delete(session); 2615 2616 return err; 2617 } 2618 2619 static size_t trace__fprintf_threads_header(FILE *fp) 2620 { 2621 size_t printed; 2622 2623 printed = fprintf(fp, "\n Summary of events:\n\n"); 2624 2625 return printed; 2626 } 2627 2628 static size_t thread__dump_stats(struct thread_trace *ttrace, 2629 struct trace *trace, FILE *fp) 2630 { 2631 struct stats *stats; 2632 size_t printed = 0; 2633 struct syscall *sc; 2634 struct int_node *inode = intlist__first(ttrace->syscall_stats); 2635 2636 if (inode == NULL) 2637 return 0; 2638 2639 printed += fprintf(fp, "\n"); 2640 2641 printed += fprintf(fp, " syscall calls min avg max stddev\n"); 2642 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); 2643 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); 2644 2645 /* each int_node is a syscall */ 2646 while (inode) { 2647 stats = inode->priv; 2648 if (stats) { 2649 double min = (double)(stats->min) / NSEC_PER_MSEC; 2650 double max = (double)(stats->max) / NSEC_PER_MSEC; 2651 double avg = avg_stats(stats); 2652 double pct; 2653 u64 n = (u64) stats->n; 2654 2655 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; 2656 avg /= NSEC_PER_MSEC; 2657 2658 sc = &trace->syscalls.table[inode->i]; 2659 printed += fprintf(fp, " %-15s", sc->name); 2660 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", 2661 n, min, avg); 2662 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); 2663 } 2664 2665 inode = intlist__next(inode); 2666 } 2667 2668 printed += fprintf(fp, "\n\n"); 2669 2670 return printed; 2671 } 2672 2673 /* struct used to pass data to per-thread function */ 2674 struct summary_data { 2675 FILE *fp; 2676 struct trace *trace; 2677 size_t printed; 2678 }; 2679 2680 static int trace__fprintf_one_thread(struct thread *thread, void *priv) 2681 { 2682 struct summary_data *data = priv; 2683 FILE *fp = data->fp; 2684 size_t printed = data->printed; 2685 struct trace *trace = data->trace; 2686 struct thread_trace *ttrace = thread__priv(thread); 2687 double ratio; 2688 2689 if (ttrace == NULL) 2690 return 0; 2691 2692 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 2693 2694 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid); 2695 printed += fprintf(fp, "%lu events, ", ttrace->nr_events); 2696 printed += fprintf(fp, "%.1f%%", ratio); 2697 if (ttrace->pfmaj) 2698 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj); 2699 if (ttrace->pfmin) 2700 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin); 2701 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); 2702 printed += thread__dump_stats(ttrace, trace, fp); 2703 2704 data->printed += printed; 2705 2706 return 0; 2707 } 2708 2709 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 2710 { 2711 struct summary_data data = { 2712 .fp = fp, 2713 .trace = trace 2714 }; 2715 data.printed = trace__fprintf_threads_header(fp); 2716 2717 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data); 2718 2719 return data.printed; 2720 } 2721 2722 static int trace__set_duration(const struct option *opt, const char *str, 2723 int unset __maybe_unused) 2724 { 2725 struct trace *trace = opt->value; 2726 2727 trace->duration_filter = atof(str); 2728 return 0; 2729 } 2730 2731 static int trace__set_filter_pids(const struct option *opt, const char *str, 2732 int unset __maybe_unused) 2733 { 2734 int ret = -1; 2735 size_t i; 2736 struct trace *trace = opt->value; 2737 /* 2738 * FIXME: introduce a intarray class, plain parse csv and create a 2739 * { int nr, int entries[] } struct... 2740 */ 2741 struct intlist *list = intlist__new(str); 2742 2743 if (list == NULL) 2744 return -1; 2745 2746 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; 2747 trace->filter_pids.entries = calloc(i, sizeof(pid_t)); 2748 2749 if (trace->filter_pids.entries == NULL) 2750 goto out; 2751 2752 trace->filter_pids.entries[0] = getpid(); 2753 2754 for (i = 1; i < trace->filter_pids.nr; ++i) 2755 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; 2756 2757 intlist__delete(list); 2758 ret = 0; 2759 out: 2760 return ret; 2761 } 2762 2763 static int trace__open_output(struct trace *trace, const char *filename) 2764 { 2765 struct stat st; 2766 2767 if (!stat(filename, &st) && st.st_size) { 2768 char oldname[PATH_MAX]; 2769 2770 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 2771 unlink(oldname); 2772 rename(filename, oldname); 2773 } 2774 2775 trace->output = fopen(filename, "w"); 2776 2777 return trace->output == NULL ? -errno : 0; 2778 } 2779 2780 static int parse_pagefaults(const struct option *opt, const char *str, 2781 int unset __maybe_unused) 2782 { 2783 int *trace_pgfaults = opt->value; 2784 2785 if (strcmp(str, "all") == 0) 2786 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN; 2787 else if (strcmp(str, "maj") == 0) 2788 *trace_pgfaults |= TRACE_PFMAJ; 2789 else if (strcmp(str, "min") == 0) 2790 *trace_pgfaults |= TRACE_PFMIN; 2791 else 2792 return -1; 2793 2794 return 0; 2795 } 2796 2797 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) 2798 { 2799 struct perf_evsel *evsel; 2800 2801 evlist__for_each(evlist, evsel) 2802 evsel->handler = handler; 2803 } 2804 2805 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) 2806 { 2807 const char *trace_usage[] = { 2808 "perf trace [<options>] [<command>]", 2809 "perf trace [<options>] -- <command> [<options>]", 2810 "perf trace record [<options>] [<command>]", 2811 "perf trace record [<options>] -- <command> [<options>]", 2812 NULL 2813 }; 2814 struct trace trace = { 2815 .audit = { 2816 .machine = audit_detect_machine(), 2817 .open_id = audit_name_to_syscall("open", trace.audit.machine), 2818 }, 2819 .syscalls = { 2820 . max = -1, 2821 }, 2822 .opts = { 2823 .target = { 2824 .uid = UINT_MAX, 2825 .uses_mmap = true, 2826 }, 2827 .user_freq = UINT_MAX, 2828 .user_interval = ULLONG_MAX, 2829 .no_buffering = true, 2830 .mmap_pages = UINT_MAX, 2831 .proc_map_timeout = 500, 2832 }, 2833 .output = stdout, 2834 .show_comm = true, 2835 .trace_syscalls = true, 2836 }; 2837 const char *output_name = NULL; 2838 const char *ev_qualifier_str = NULL; 2839 const struct option trace_options[] = { 2840 OPT_CALLBACK(0, "event", &trace.evlist, "event", 2841 "event selector. use 'perf list' to list available events", 2842 parse_events_option), 2843 OPT_BOOLEAN(0, "comm", &trace.show_comm, 2844 "show the thread COMM next to its id"), 2845 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), 2846 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"), 2847 OPT_STRING('o', "output", &output_name, "file", "output file name"), 2848 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 2849 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 2850 "trace events on existing process id"), 2851 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 2852 "trace events on existing thread id"), 2853 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids", 2854 "pids to filter (by the kernel)", trace__set_filter_pids), 2855 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 2856 "system-wide collection from all CPUs"), 2857 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 2858 "list of cpus to monitor"), 2859 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 2860 "child tasks do not inherit counters"), 2861 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", 2862 "number of mmap data pages", 2863 perf_evlist__parse_mmap_pages), 2864 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 2865 "user to profile"), 2866 OPT_CALLBACK(0, "duration", &trace, "float", 2867 "show only events with duration > N.M ms", 2868 trace__set_duration), 2869 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 2870 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 2871 OPT_BOOLEAN('T', "time", &trace.full_time, 2872 "Show full timestamp, not time relative to first start"), 2873 OPT_BOOLEAN('s', "summary", &trace.summary_only, 2874 "Show only syscall summary with statistics"), 2875 OPT_BOOLEAN('S', "with-summary", &trace.summary, 2876 "Show all syscalls and summary with statistics"), 2877 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", 2878 "Trace pagefaults", parse_pagefaults, "maj"), 2879 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), 2880 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), 2881 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, 2882 "per thread proc mmap processing timeout in ms"), 2883 OPT_END() 2884 }; 2885 const char * const trace_subcommands[] = { "record", NULL }; 2886 int err; 2887 char bf[BUFSIZ]; 2888 2889 signal(SIGSEGV, sighandler_dump_stack); 2890 signal(SIGFPE, sighandler_dump_stack); 2891 2892 trace.evlist = perf_evlist__new(); 2893 2894 if (trace.evlist == NULL) { 2895 pr_err("Not enough memory to run!\n"); 2896 err = -ENOMEM; 2897 goto out; 2898 } 2899 2900 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, 2901 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); 2902 2903 if (trace.trace_pgfaults) { 2904 trace.opts.sample_address = true; 2905 trace.opts.sample_time = true; 2906 } 2907 2908 if (trace.evlist->nr_entries > 0) 2909 evlist__set_evsel_handler(trace.evlist, trace__event_handler); 2910 2911 if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) 2912 return trace__record(&trace, argc-1, &argv[1]); 2913 2914 /* summary_only implies summary option, but don't overwrite summary if set */ 2915 if (trace.summary_only) 2916 trace.summary = trace.summary_only; 2917 2918 if (!trace.trace_syscalls && !trace.trace_pgfaults && 2919 trace.evlist->nr_entries == 0 /* Was --events used? */) { 2920 pr_err("Please specify something to trace.\n"); 2921 return -1; 2922 } 2923 2924 if (output_name != NULL) { 2925 err = trace__open_output(&trace, output_name); 2926 if (err < 0) { 2927 perror("failed to create output file"); 2928 goto out; 2929 } 2930 } 2931 2932 if (ev_qualifier_str != NULL) { 2933 const char *s = ev_qualifier_str; 2934 struct strlist_config slist_config = { 2935 .dirname = system_path(STRACE_GROUPS_DIR), 2936 }; 2937 2938 trace.not_ev_qualifier = *s == '!'; 2939 if (trace.not_ev_qualifier) 2940 ++s; 2941 trace.ev_qualifier = strlist__new(s, &slist_config); 2942 if (trace.ev_qualifier == NULL) { 2943 fputs("Not enough memory to parse event qualifier", 2944 trace.output); 2945 err = -ENOMEM; 2946 goto out_close; 2947 } 2948 2949 err = trace__validate_ev_qualifier(&trace); 2950 if (err) 2951 goto out_close; 2952 } 2953 2954 err = target__validate(&trace.opts.target); 2955 if (err) { 2956 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2957 fprintf(trace.output, "%s", bf); 2958 goto out_close; 2959 } 2960 2961 err = target__parse_uid(&trace.opts.target); 2962 if (err) { 2963 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2964 fprintf(trace.output, "%s", bf); 2965 goto out_close; 2966 } 2967 2968 if (!argc && target__none(&trace.opts.target)) 2969 trace.opts.target.system_wide = true; 2970 2971 if (input_name) 2972 err = trace__replay(&trace); 2973 else 2974 err = trace__run(&trace, argc, argv); 2975 2976 out_close: 2977 if (output_name != NULL) 2978 fclose(trace.output); 2979 out: 2980 return err; 2981 } 2982