1 #include <traceevent/event-parse.h> 2 #include "builtin.h" 3 #include "util/color.h" 4 #include "util/debug.h" 5 #include "util/evlist.h" 6 #include "util/machine.h" 7 #include "util/session.h" 8 #include "util/thread.h" 9 #include "util/parse-options.h" 10 #include "util/strlist.h" 11 #include "util/intlist.h" 12 #include "util/thread_map.h" 13 #include "util/stat.h" 14 #include "trace-event.h" 15 #include "util/parse-events.h" 16 17 #include <libaudit.h> 18 #include <stdlib.h> 19 #include <sys/mman.h> 20 #include <linux/futex.h> 21 22 /* For older distros: */ 23 #ifndef MAP_STACK 24 # define MAP_STACK 0x20000 25 #endif 26 27 #ifndef MADV_HWPOISON 28 # define MADV_HWPOISON 100 29 #endif 30 31 #ifndef MADV_MERGEABLE 32 # define MADV_MERGEABLE 12 33 #endif 34 35 #ifndef MADV_UNMERGEABLE 36 # define MADV_UNMERGEABLE 13 37 #endif 38 39 #ifndef EFD_SEMAPHORE 40 # define EFD_SEMAPHORE 1 41 #endif 42 43 #ifndef EFD_NONBLOCK 44 # define EFD_NONBLOCK 00004000 45 #endif 46 47 #ifndef EFD_CLOEXEC 48 # define EFD_CLOEXEC 02000000 49 #endif 50 51 #ifndef O_CLOEXEC 52 # define O_CLOEXEC 02000000 53 #endif 54 55 #ifndef SOCK_DCCP 56 # define SOCK_DCCP 6 57 #endif 58 59 #ifndef SOCK_CLOEXEC 60 # define SOCK_CLOEXEC 02000000 61 #endif 62 63 #ifndef SOCK_NONBLOCK 64 # define SOCK_NONBLOCK 00004000 65 #endif 66 67 #ifndef MSG_CMSG_CLOEXEC 68 # define MSG_CMSG_CLOEXEC 0x40000000 69 #endif 70 71 struct tp_field { 72 int offset; 73 union { 74 u64 (*integer)(struct tp_field *field, struct perf_sample *sample); 75 void *(*pointer)(struct tp_field *field, struct perf_sample *sample); 76 }; 77 }; 78 79 #define TP_UINT_FIELD(bits) \ 80 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ 81 { \ 82 u##bits value; \ 83 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 84 return value; \ 85 } 86 87 TP_UINT_FIELD(8); 88 TP_UINT_FIELD(16); 89 TP_UINT_FIELD(32); 90 TP_UINT_FIELD(64); 91 92 #define TP_UINT_FIELD__SWAPPED(bits) \ 93 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ 94 { \ 95 u##bits value; \ 96 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 97 return bswap_##bits(value);\ 98 } 99 100 TP_UINT_FIELD__SWAPPED(16); 101 TP_UINT_FIELD__SWAPPED(32); 102 TP_UINT_FIELD__SWAPPED(64); 103 104 static int tp_field__init_uint(struct tp_field *field, 105 struct format_field *format_field, 106 bool needs_swap) 107 { 108 field->offset = format_field->offset; 109 110 switch (format_field->size) { 111 case 1: 112 field->integer = tp_field__u8; 113 break; 114 case 2: 115 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; 116 break; 117 case 4: 118 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; 119 break; 120 case 8: 121 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; 122 break; 123 default: 124 return -1; 125 } 126 127 return 0; 128 } 129 130 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) 131 { 132 return sample->raw_data + field->offset; 133 } 134 135 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) 136 { 137 field->offset = format_field->offset; 138 field->pointer = tp_field__ptr; 139 return 0; 140 } 141 142 struct syscall_tp { 143 struct tp_field id; 144 union { 145 struct tp_field args, ret; 146 }; 147 }; 148 149 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, 150 struct tp_field *field, 151 const char *name) 152 { 153 struct format_field *format_field = perf_evsel__field(evsel, name); 154 155 if (format_field == NULL) 156 return -1; 157 158 return tp_field__init_uint(field, format_field, evsel->needs_swap); 159 } 160 161 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ 162 ({ struct syscall_tp *sc = evsel->priv;\ 163 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) 164 165 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, 166 struct tp_field *field, 167 const char *name) 168 { 169 struct format_field *format_field = perf_evsel__field(evsel, name); 170 171 if (format_field == NULL) 172 return -1; 173 174 return tp_field__init_ptr(field, format_field); 175 } 176 177 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ 178 ({ struct syscall_tp *sc = evsel->priv;\ 179 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) 180 181 static void perf_evsel__delete_priv(struct perf_evsel *evsel) 182 { 183 zfree(&evsel->priv); 184 perf_evsel__delete(evsel); 185 } 186 187 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler) 188 { 189 evsel->priv = malloc(sizeof(struct syscall_tp)); 190 if (evsel->priv != NULL) { 191 if (perf_evsel__init_sc_tp_uint_field(evsel, id)) 192 goto out_delete; 193 194 evsel->handler = handler; 195 return 0; 196 } 197 198 return -ENOMEM; 199 200 out_delete: 201 zfree(&evsel->priv); 202 return -ENOENT; 203 } 204 205 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler) 206 { 207 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); 208 209 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ 210 if (evsel == NULL) 211 evsel = perf_evsel__newtp("syscalls", direction); 212 213 if (evsel) { 214 if (perf_evsel__init_syscall_tp(evsel, handler)) 215 goto out_delete; 216 } 217 218 return evsel; 219 220 out_delete: 221 perf_evsel__delete_priv(evsel); 222 return NULL; 223 } 224 225 #define perf_evsel__sc_tp_uint(evsel, name, sample) \ 226 ({ struct syscall_tp *fields = evsel->priv; \ 227 fields->name.integer(&fields->name, sample); }) 228 229 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ 230 ({ struct syscall_tp *fields = evsel->priv; \ 231 fields->name.pointer(&fields->name, sample); }) 232 233 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist, 234 void *sys_enter_handler, 235 void *sys_exit_handler) 236 { 237 int ret = -1; 238 struct perf_evsel *sys_enter, *sys_exit; 239 240 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler); 241 if (sys_enter == NULL) 242 goto out; 243 244 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) 245 goto out_delete_sys_enter; 246 247 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler); 248 if (sys_exit == NULL) 249 goto out_delete_sys_enter; 250 251 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) 252 goto out_delete_sys_exit; 253 254 perf_evlist__add(evlist, sys_enter); 255 perf_evlist__add(evlist, sys_exit); 256 257 ret = 0; 258 out: 259 return ret; 260 261 out_delete_sys_exit: 262 perf_evsel__delete_priv(sys_exit); 263 out_delete_sys_enter: 264 perf_evsel__delete_priv(sys_enter); 265 goto out; 266 } 267 268 269 struct syscall_arg { 270 unsigned long val; 271 struct thread *thread; 272 struct trace *trace; 273 void *parm; 274 u8 idx; 275 u8 mask; 276 }; 277 278 struct strarray { 279 int offset; 280 int nr_entries; 281 const char **entries; 282 }; 283 284 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \ 285 .nr_entries = ARRAY_SIZE(array), \ 286 .entries = array, \ 287 } 288 289 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \ 290 .offset = off, \ 291 .nr_entries = ARRAY_SIZE(array), \ 292 .entries = array, \ 293 } 294 295 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, 296 const char *intfmt, 297 struct syscall_arg *arg) 298 { 299 struct strarray *sa = arg->parm; 300 int idx = arg->val - sa->offset; 301 302 if (idx < 0 || idx >= sa->nr_entries) 303 return scnprintf(bf, size, intfmt, arg->val); 304 305 return scnprintf(bf, size, "%s", sa->entries[idx]); 306 } 307 308 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, 309 struct syscall_arg *arg) 310 { 311 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg); 312 } 313 314 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 315 316 #if defined(__i386__) || defined(__x86_64__) 317 /* 318 * FIXME: Make this available to all arches as soon as the ioctl beautifier 319 * gets rewritten to support all arches. 320 */ 321 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size, 322 struct syscall_arg *arg) 323 { 324 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg); 325 } 326 327 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray 328 #endif /* defined(__i386__) || defined(__x86_64__) */ 329 330 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 331 struct syscall_arg *arg); 332 333 #define SCA_FD syscall_arg__scnprintf_fd 334 335 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, 336 struct syscall_arg *arg) 337 { 338 int fd = arg->val; 339 340 if (fd == AT_FDCWD) 341 return scnprintf(bf, size, "CWD"); 342 343 return syscall_arg__scnprintf_fd(bf, size, arg); 344 } 345 346 #define SCA_FDAT syscall_arg__scnprintf_fd_at 347 348 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 349 struct syscall_arg *arg); 350 351 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd 352 353 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, 354 struct syscall_arg *arg) 355 { 356 return scnprintf(bf, size, "%#lx", arg->val); 357 } 358 359 #define SCA_HEX syscall_arg__scnprintf_hex 360 361 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, 362 struct syscall_arg *arg) 363 { 364 int printed = 0, prot = arg->val; 365 366 if (prot == PROT_NONE) 367 return scnprintf(bf, size, "NONE"); 368 #define P_MMAP_PROT(n) \ 369 if (prot & PROT_##n) { \ 370 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 371 prot &= ~PROT_##n; \ 372 } 373 374 P_MMAP_PROT(EXEC); 375 P_MMAP_PROT(READ); 376 P_MMAP_PROT(WRITE); 377 #ifdef PROT_SEM 378 P_MMAP_PROT(SEM); 379 #endif 380 P_MMAP_PROT(GROWSDOWN); 381 P_MMAP_PROT(GROWSUP); 382 #undef P_MMAP_PROT 383 384 if (prot) 385 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); 386 387 return printed; 388 } 389 390 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot 391 392 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, 393 struct syscall_arg *arg) 394 { 395 int printed = 0, flags = arg->val; 396 397 #define P_MMAP_FLAG(n) \ 398 if (flags & MAP_##n) { \ 399 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 400 flags &= ~MAP_##n; \ 401 } 402 403 P_MMAP_FLAG(SHARED); 404 P_MMAP_FLAG(PRIVATE); 405 #ifdef MAP_32BIT 406 P_MMAP_FLAG(32BIT); 407 #endif 408 P_MMAP_FLAG(ANONYMOUS); 409 P_MMAP_FLAG(DENYWRITE); 410 P_MMAP_FLAG(EXECUTABLE); 411 P_MMAP_FLAG(FILE); 412 P_MMAP_FLAG(FIXED); 413 P_MMAP_FLAG(GROWSDOWN); 414 #ifdef MAP_HUGETLB 415 P_MMAP_FLAG(HUGETLB); 416 #endif 417 P_MMAP_FLAG(LOCKED); 418 P_MMAP_FLAG(NONBLOCK); 419 P_MMAP_FLAG(NORESERVE); 420 P_MMAP_FLAG(POPULATE); 421 P_MMAP_FLAG(STACK); 422 #ifdef MAP_UNINITIALIZED 423 P_MMAP_FLAG(UNINITIALIZED); 424 #endif 425 #undef P_MMAP_FLAG 426 427 if (flags) 428 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 429 430 return printed; 431 } 432 433 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags 434 435 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, 436 struct syscall_arg *arg) 437 { 438 int printed = 0, flags = arg->val; 439 440 #define P_MREMAP_FLAG(n) \ 441 if (flags & MREMAP_##n) { \ 442 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 443 flags &= ~MREMAP_##n; \ 444 } 445 446 P_MREMAP_FLAG(MAYMOVE); 447 #ifdef MREMAP_FIXED 448 P_MREMAP_FLAG(FIXED); 449 #endif 450 #undef P_MREMAP_FLAG 451 452 if (flags) 453 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 454 455 return printed; 456 } 457 458 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags 459 460 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, 461 struct syscall_arg *arg) 462 { 463 int behavior = arg->val; 464 465 switch (behavior) { 466 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) 467 P_MADV_BHV(NORMAL); 468 P_MADV_BHV(RANDOM); 469 P_MADV_BHV(SEQUENTIAL); 470 P_MADV_BHV(WILLNEED); 471 P_MADV_BHV(DONTNEED); 472 P_MADV_BHV(REMOVE); 473 P_MADV_BHV(DONTFORK); 474 P_MADV_BHV(DOFORK); 475 P_MADV_BHV(HWPOISON); 476 #ifdef MADV_SOFT_OFFLINE 477 P_MADV_BHV(SOFT_OFFLINE); 478 #endif 479 P_MADV_BHV(MERGEABLE); 480 P_MADV_BHV(UNMERGEABLE); 481 #ifdef MADV_HUGEPAGE 482 P_MADV_BHV(HUGEPAGE); 483 #endif 484 #ifdef MADV_NOHUGEPAGE 485 P_MADV_BHV(NOHUGEPAGE); 486 #endif 487 #ifdef MADV_DONTDUMP 488 P_MADV_BHV(DONTDUMP); 489 #endif 490 #ifdef MADV_DODUMP 491 P_MADV_BHV(DODUMP); 492 #endif 493 #undef P_MADV_PHV 494 default: break; 495 } 496 497 return scnprintf(bf, size, "%#x", behavior); 498 } 499 500 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior 501 502 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, 503 struct syscall_arg *arg) 504 { 505 int printed = 0, op = arg->val; 506 507 if (op == 0) 508 return scnprintf(bf, size, "NONE"); 509 #define P_CMD(cmd) \ 510 if ((op & LOCK_##cmd) == LOCK_##cmd) { \ 511 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \ 512 op &= ~LOCK_##cmd; \ 513 } 514 515 P_CMD(SH); 516 P_CMD(EX); 517 P_CMD(NB); 518 P_CMD(UN); 519 P_CMD(MAND); 520 P_CMD(RW); 521 P_CMD(READ); 522 P_CMD(WRITE); 523 #undef P_OP 524 525 if (op) 526 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op); 527 528 return printed; 529 } 530 531 #define SCA_FLOCK syscall_arg__scnprintf_flock 532 533 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) 534 { 535 enum syscall_futex_args { 536 SCF_UADDR = (1 << 0), 537 SCF_OP = (1 << 1), 538 SCF_VAL = (1 << 2), 539 SCF_TIMEOUT = (1 << 3), 540 SCF_UADDR2 = (1 << 4), 541 SCF_VAL3 = (1 << 5), 542 }; 543 int op = arg->val; 544 int cmd = op & FUTEX_CMD_MASK; 545 size_t printed = 0; 546 547 switch (cmd) { 548 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); 549 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 550 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 551 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 552 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break; 553 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break; 554 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break; 555 P_FUTEX_OP(WAKE_OP); break; 556 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 557 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 558 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 559 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break; 560 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break; 561 P_FUTEX_OP(WAIT_REQUEUE_PI); break; 562 default: printed = scnprintf(bf, size, "%#x", cmd); break; 563 } 564 565 if (op & FUTEX_PRIVATE_FLAG) 566 printed += scnprintf(bf + printed, size - printed, "|PRIV"); 567 568 if (op & FUTEX_CLOCK_REALTIME) 569 printed += scnprintf(bf + printed, size - printed, "|CLKRT"); 570 571 return printed; 572 } 573 574 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op 575 576 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 577 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); 578 579 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 580 static DEFINE_STRARRAY(itimers); 581 582 static const char *whences[] = { "SET", "CUR", "END", 583 #ifdef SEEK_DATA 584 "DATA", 585 #endif 586 #ifdef SEEK_HOLE 587 "HOLE", 588 #endif 589 }; 590 static DEFINE_STRARRAY(whences); 591 592 static const char *fcntl_cmds[] = { 593 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", 594 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64", 595 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX", 596 "F_GETOWNER_UIDS", 597 }; 598 static DEFINE_STRARRAY(fcntl_cmds); 599 600 static const char *rlimit_resources[] = { 601 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", 602 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", 603 "RTTIME", 604 }; 605 static DEFINE_STRARRAY(rlimit_resources); 606 607 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; 608 static DEFINE_STRARRAY(sighow); 609 610 static const char *clockid[] = { 611 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", 612 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", 613 }; 614 static DEFINE_STRARRAY(clockid); 615 616 static const char *socket_families[] = { 617 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", 618 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", 619 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", 620 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", 621 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", 622 "ALG", "NFC", "VSOCK", 623 }; 624 static DEFINE_STRARRAY(socket_families); 625 626 #ifndef SOCK_TYPE_MASK 627 #define SOCK_TYPE_MASK 0xf 628 #endif 629 630 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, 631 struct syscall_arg *arg) 632 { 633 size_t printed; 634 int type = arg->val, 635 flags = type & ~SOCK_TYPE_MASK; 636 637 type &= SOCK_TYPE_MASK; 638 /* 639 * Can't use a strarray, MIPS may override for ABI reasons. 640 */ 641 switch (type) { 642 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break; 643 P_SK_TYPE(STREAM); 644 P_SK_TYPE(DGRAM); 645 P_SK_TYPE(RAW); 646 P_SK_TYPE(RDM); 647 P_SK_TYPE(SEQPACKET); 648 P_SK_TYPE(DCCP); 649 P_SK_TYPE(PACKET); 650 #undef P_SK_TYPE 651 default: 652 printed = scnprintf(bf, size, "%#x", type); 653 } 654 655 #define P_SK_FLAG(n) \ 656 if (flags & SOCK_##n) { \ 657 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ 658 flags &= ~SOCK_##n; \ 659 } 660 661 P_SK_FLAG(CLOEXEC); 662 P_SK_FLAG(NONBLOCK); 663 #undef P_SK_FLAG 664 665 if (flags) 666 printed += scnprintf(bf + printed, size - printed, "|%#x", flags); 667 668 return printed; 669 } 670 671 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type 672 673 #ifndef MSG_PROBE 674 #define MSG_PROBE 0x10 675 #endif 676 #ifndef MSG_WAITFORONE 677 #define MSG_WAITFORONE 0x10000 678 #endif 679 #ifndef MSG_SENDPAGE_NOTLAST 680 #define MSG_SENDPAGE_NOTLAST 0x20000 681 #endif 682 #ifndef MSG_FASTOPEN 683 #define MSG_FASTOPEN 0x20000000 684 #endif 685 686 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, 687 struct syscall_arg *arg) 688 { 689 int printed = 0, flags = arg->val; 690 691 if (flags == 0) 692 return scnprintf(bf, size, "NONE"); 693 #define P_MSG_FLAG(n) \ 694 if (flags & MSG_##n) { \ 695 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 696 flags &= ~MSG_##n; \ 697 } 698 699 P_MSG_FLAG(OOB); 700 P_MSG_FLAG(PEEK); 701 P_MSG_FLAG(DONTROUTE); 702 P_MSG_FLAG(TRYHARD); 703 P_MSG_FLAG(CTRUNC); 704 P_MSG_FLAG(PROBE); 705 P_MSG_FLAG(TRUNC); 706 P_MSG_FLAG(DONTWAIT); 707 P_MSG_FLAG(EOR); 708 P_MSG_FLAG(WAITALL); 709 P_MSG_FLAG(FIN); 710 P_MSG_FLAG(SYN); 711 P_MSG_FLAG(CONFIRM); 712 P_MSG_FLAG(RST); 713 P_MSG_FLAG(ERRQUEUE); 714 P_MSG_FLAG(NOSIGNAL); 715 P_MSG_FLAG(MORE); 716 P_MSG_FLAG(WAITFORONE); 717 P_MSG_FLAG(SENDPAGE_NOTLAST); 718 P_MSG_FLAG(FASTOPEN); 719 P_MSG_FLAG(CMSG_CLOEXEC); 720 #undef P_MSG_FLAG 721 722 if (flags) 723 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 724 725 return printed; 726 } 727 728 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags 729 730 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, 731 struct syscall_arg *arg) 732 { 733 size_t printed = 0; 734 int mode = arg->val; 735 736 if (mode == F_OK) /* 0 */ 737 return scnprintf(bf, size, "F"); 738 #define P_MODE(n) \ 739 if (mode & n##_OK) { \ 740 printed += scnprintf(bf + printed, size - printed, "%s", #n); \ 741 mode &= ~n##_OK; \ 742 } 743 744 P_MODE(R); 745 P_MODE(W); 746 P_MODE(X); 747 #undef P_MODE 748 749 if (mode) 750 printed += scnprintf(bf + printed, size - printed, "|%#x", mode); 751 752 return printed; 753 } 754 755 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode 756 757 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, 758 struct syscall_arg *arg) 759 { 760 int printed = 0, flags = arg->val; 761 762 if (!(flags & O_CREAT)) 763 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ 764 765 if (flags == 0) 766 return scnprintf(bf, size, "RDONLY"); 767 #define P_FLAG(n) \ 768 if (flags & O_##n) { \ 769 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 770 flags &= ~O_##n; \ 771 } 772 773 P_FLAG(APPEND); 774 P_FLAG(ASYNC); 775 P_FLAG(CLOEXEC); 776 P_FLAG(CREAT); 777 P_FLAG(DIRECT); 778 P_FLAG(DIRECTORY); 779 P_FLAG(EXCL); 780 P_FLAG(LARGEFILE); 781 P_FLAG(NOATIME); 782 P_FLAG(NOCTTY); 783 #ifdef O_NONBLOCK 784 P_FLAG(NONBLOCK); 785 #elif O_NDELAY 786 P_FLAG(NDELAY); 787 #endif 788 #ifdef O_PATH 789 P_FLAG(PATH); 790 #endif 791 P_FLAG(RDWR); 792 #ifdef O_DSYNC 793 if ((flags & O_SYNC) == O_SYNC) 794 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); 795 else { 796 P_FLAG(DSYNC); 797 } 798 #else 799 P_FLAG(SYNC); 800 #endif 801 P_FLAG(TRUNC); 802 P_FLAG(WRONLY); 803 #undef P_FLAG 804 805 if (flags) 806 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 807 808 return printed; 809 } 810 811 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags 812 813 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, 814 struct syscall_arg *arg) 815 { 816 int printed = 0, flags = arg->val; 817 818 if (flags == 0) 819 return scnprintf(bf, size, "NONE"); 820 #define P_FLAG(n) \ 821 if (flags & EFD_##n) { \ 822 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 823 flags &= ~EFD_##n; \ 824 } 825 826 P_FLAG(SEMAPHORE); 827 P_FLAG(CLOEXEC); 828 P_FLAG(NONBLOCK); 829 #undef P_FLAG 830 831 if (flags) 832 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 833 834 return printed; 835 } 836 837 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags 838 839 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, 840 struct syscall_arg *arg) 841 { 842 int printed = 0, flags = arg->val; 843 844 #define P_FLAG(n) \ 845 if (flags & O_##n) { \ 846 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 847 flags &= ~O_##n; \ 848 } 849 850 P_FLAG(CLOEXEC); 851 P_FLAG(NONBLOCK); 852 #undef P_FLAG 853 854 if (flags) 855 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 856 857 return printed; 858 } 859 860 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags 861 862 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) 863 { 864 int sig = arg->val; 865 866 switch (sig) { 867 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n) 868 P_SIGNUM(HUP); 869 P_SIGNUM(INT); 870 P_SIGNUM(QUIT); 871 P_SIGNUM(ILL); 872 P_SIGNUM(TRAP); 873 P_SIGNUM(ABRT); 874 P_SIGNUM(BUS); 875 P_SIGNUM(FPE); 876 P_SIGNUM(KILL); 877 P_SIGNUM(USR1); 878 P_SIGNUM(SEGV); 879 P_SIGNUM(USR2); 880 P_SIGNUM(PIPE); 881 P_SIGNUM(ALRM); 882 P_SIGNUM(TERM); 883 P_SIGNUM(CHLD); 884 P_SIGNUM(CONT); 885 P_SIGNUM(STOP); 886 P_SIGNUM(TSTP); 887 P_SIGNUM(TTIN); 888 P_SIGNUM(TTOU); 889 P_SIGNUM(URG); 890 P_SIGNUM(XCPU); 891 P_SIGNUM(XFSZ); 892 P_SIGNUM(VTALRM); 893 P_SIGNUM(PROF); 894 P_SIGNUM(WINCH); 895 P_SIGNUM(IO); 896 P_SIGNUM(PWR); 897 P_SIGNUM(SYS); 898 #ifdef SIGEMT 899 P_SIGNUM(EMT); 900 #endif 901 #ifdef SIGSTKFLT 902 P_SIGNUM(STKFLT); 903 #endif 904 #ifdef SIGSWI 905 P_SIGNUM(SWI); 906 #endif 907 default: break; 908 } 909 910 return scnprintf(bf, size, "%#x", sig); 911 } 912 913 #define SCA_SIGNUM syscall_arg__scnprintf_signum 914 915 #if defined(__i386__) || defined(__x86_64__) 916 /* 917 * FIXME: Make this available to all arches. 918 */ 919 #define TCGETS 0x5401 920 921 static const char *tioctls[] = { 922 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW", 923 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL", 924 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI", 925 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC", 926 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX", 927 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO", 928 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK", 929 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2", 930 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", 931 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", 932 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL", 933 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", 934 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", 935 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", 936 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE", 937 }; 938 939 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401); 940 #endif /* defined(__i386__) || defined(__x86_64__) */ 941 942 #define STRARRAY(arg, name, array) \ 943 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ 944 .arg_parm = { [arg] = &strarray__##array, } 945 946 static struct syscall_fmt { 947 const char *name; 948 const char *alias; 949 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); 950 void *arg_parm[6]; 951 bool errmsg; 952 bool timeout; 953 bool hexret; 954 } syscall_fmts[] = { 955 { .name = "access", .errmsg = true, 956 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, 957 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, 958 { .name = "brk", .hexret = true, 959 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, 960 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, 961 { .name = "close", .errmsg = true, 962 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 963 { .name = "connect", .errmsg = true, }, 964 { .name = "dup", .errmsg = true, 965 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 966 { .name = "dup2", .errmsg = true, 967 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 968 { .name = "dup3", .errmsg = true, 969 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 970 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, 971 { .name = "eventfd2", .errmsg = true, 972 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, 973 { .name = "faccessat", .errmsg = true, 974 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 975 { .name = "fadvise64", .errmsg = true, 976 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 977 { .name = "fallocate", .errmsg = true, 978 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 979 { .name = "fchdir", .errmsg = true, 980 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 981 { .name = "fchmod", .errmsg = true, 982 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 983 { .name = "fchmodat", .errmsg = true, 984 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 985 { .name = "fchown", .errmsg = true, 986 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 987 { .name = "fchownat", .errmsg = true, 988 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 989 { .name = "fcntl", .errmsg = true, 990 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 991 [1] = SCA_STRARRAY, /* cmd */ }, 992 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, 993 { .name = "fdatasync", .errmsg = true, 994 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 995 { .name = "flock", .errmsg = true, 996 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 997 [1] = SCA_FLOCK, /* cmd */ }, }, 998 { .name = "fsetxattr", .errmsg = true, 999 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1000 { .name = "fstat", .errmsg = true, .alias = "newfstat", 1001 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1002 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", 1003 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1004 { .name = "fstatfs", .errmsg = true, 1005 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1006 { .name = "fsync", .errmsg = true, 1007 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1008 { .name = "ftruncate", .errmsg = true, 1009 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1010 { .name = "futex", .errmsg = true, 1011 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, 1012 { .name = "futimesat", .errmsg = true, 1013 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1014 { .name = "getdents", .errmsg = true, 1015 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1016 { .name = "getdents64", .errmsg = true, 1017 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1018 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 1019 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 1020 { .name = "ioctl", .errmsg = true, 1021 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1022 #if defined(__i386__) || defined(__x86_64__) 1023 /* 1024 * FIXME: Make this available to all arches. 1025 */ 1026 [1] = SCA_STRHEXARRAY, /* cmd */ 1027 [2] = SCA_HEX, /* arg */ }, 1028 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, }, 1029 #else 1030 [2] = SCA_HEX, /* arg */ }, }, 1031 #endif 1032 { .name = "kill", .errmsg = true, 1033 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1034 { .name = "linkat", .errmsg = true, 1035 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1036 { .name = "lseek", .errmsg = true, 1037 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1038 [2] = SCA_STRARRAY, /* whence */ }, 1039 .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, 1040 { .name = "lstat", .errmsg = true, .alias = "newlstat", }, 1041 { .name = "madvise", .errmsg = true, 1042 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 1043 [2] = SCA_MADV_BHV, /* behavior */ }, }, 1044 { .name = "mkdirat", .errmsg = true, 1045 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1046 { .name = "mknodat", .errmsg = true, 1047 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1048 { .name = "mlock", .errmsg = true, 1049 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1050 { .name = "mlockall", .errmsg = true, 1051 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1052 { .name = "mmap", .hexret = true, 1053 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 1054 [2] = SCA_MMAP_PROT, /* prot */ 1055 [3] = SCA_MMAP_FLAGS, /* flags */ 1056 [4] = SCA_FD, /* fd */ }, }, 1057 { .name = "mprotect", .errmsg = true, 1058 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 1059 [2] = SCA_MMAP_PROT, /* prot */ }, }, 1060 { .name = "mremap", .hexret = true, 1061 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 1062 [3] = SCA_MREMAP_FLAGS, /* flags */ 1063 [4] = SCA_HEX, /* new_addr */ }, }, 1064 { .name = "munlock", .errmsg = true, 1065 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1066 { .name = "munmap", .errmsg = true, 1067 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1068 { .name = "name_to_handle_at", .errmsg = true, 1069 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1070 { .name = "newfstatat", .errmsg = true, 1071 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1072 { .name = "open", .errmsg = true, 1073 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, 1074 { .name = "open_by_handle_at", .errmsg = true, 1075 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 1076 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 1077 { .name = "openat", .errmsg = true, 1078 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 1079 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 1080 { .name = "pipe2", .errmsg = true, 1081 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, 1082 { .name = "poll", .errmsg = true, .timeout = true, }, 1083 { .name = "ppoll", .errmsg = true, .timeout = true, }, 1084 { .name = "pread", .errmsg = true, .alias = "pread64", 1085 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1086 { .name = "preadv", .errmsg = true, .alias = "pread", 1087 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1088 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, 1089 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", 1090 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1091 { .name = "pwritev", .errmsg = true, 1092 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1093 { .name = "read", .errmsg = true, 1094 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1095 { .name = "readlinkat", .errmsg = true, 1096 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1097 { .name = "readv", .errmsg = true, 1098 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1099 { .name = "recvfrom", .errmsg = true, 1100 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1101 { .name = "recvmmsg", .errmsg = true, 1102 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1103 { .name = "recvmsg", .errmsg = true, 1104 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1105 { .name = "renameat", .errmsg = true, 1106 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1107 { .name = "rt_sigaction", .errmsg = true, 1108 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, 1109 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, 1110 { .name = "rt_sigqueueinfo", .errmsg = true, 1111 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1112 { .name = "rt_tgsigqueueinfo", .errmsg = true, 1113 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1114 { .name = "select", .errmsg = true, .timeout = true, }, 1115 { .name = "sendmmsg", .errmsg = true, 1116 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1117 { .name = "sendmsg", .errmsg = true, 1118 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1119 { .name = "sendto", .errmsg = true, 1120 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1121 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 1122 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 1123 { .name = "shutdown", .errmsg = true, 1124 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1125 { .name = "socket", .errmsg = true, 1126 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1127 [1] = SCA_SK_TYPE, /* type */ }, 1128 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1129 { .name = "socketpair", .errmsg = true, 1130 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1131 [1] = SCA_SK_TYPE, /* type */ }, 1132 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1133 { .name = "stat", .errmsg = true, .alias = "newstat", }, 1134 { .name = "symlinkat", .errmsg = true, 1135 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1136 { .name = "tgkill", .errmsg = true, 1137 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1138 { .name = "tkill", .errmsg = true, 1139 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1140 { .name = "uname", .errmsg = true, .alias = "newuname", }, 1141 { .name = "unlinkat", .errmsg = true, 1142 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1143 { .name = "utimensat", .errmsg = true, 1144 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, 1145 { .name = "write", .errmsg = true, 1146 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1147 { .name = "writev", .errmsg = true, 1148 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1149 }; 1150 1151 static int syscall_fmt__cmp(const void *name, const void *fmtp) 1152 { 1153 const struct syscall_fmt *fmt = fmtp; 1154 return strcmp(name, fmt->name); 1155 } 1156 1157 static struct syscall_fmt *syscall_fmt__find(const char *name) 1158 { 1159 const int nmemb = ARRAY_SIZE(syscall_fmts); 1160 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 1161 } 1162 1163 struct syscall { 1164 struct event_format *tp_format; 1165 int nr_args; 1166 struct format_field *args; 1167 const char *name; 1168 bool filtered; 1169 bool is_exit; 1170 struct syscall_fmt *fmt; 1171 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 1172 void **arg_parm; 1173 }; 1174 1175 static size_t fprintf_duration(unsigned long t, FILE *fp) 1176 { 1177 double duration = (double)t / NSEC_PER_MSEC; 1178 size_t printed = fprintf(fp, "("); 1179 1180 if (duration >= 1.0) 1181 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 1182 else if (duration >= 0.01) 1183 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 1184 else 1185 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 1186 return printed + fprintf(fp, "): "); 1187 } 1188 1189 struct thread_trace { 1190 u64 entry_time; 1191 u64 exit_time; 1192 bool entry_pending; 1193 unsigned long nr_events; 1194 unsigned long pfmaj, pfmin; 1195 char *entry_str; 1196 double runtime_ms; 1197 struct { 1198 int max; 1199 char **table; 1200 } paths; 1201 1202 struct intlist *syscall_stats; 1203 }; 1204 1205 static struct thread_trace *thread_trace__new(void) 1206 { 1207 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); 1208 1209 if (ttrace) 1210 ttrace->paths.max = -1; 1211 1212 ttrace->syscall_stats = intlist__new(NULL); 1213 1214 return ttrace; 1215 } 1216 1217 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 1218 { 1219 struct thread_trace *ttrace; 1220 1221 if (thread == NULL) 1222 goto fail; 1223 1224 if (thread__priv(thread) == NULL) 1225 thread__set_priv(thread, thread_trace__new()); 1226 1227 if (thread__priv(thread) == NULL) 1228 goto fail; 1229 1230 ttrace = thread__priv(thread); 1231 ++ttrace->nr_events; 1232 1233 return ttrace; 1234 fail: 1235 color_fprintf(fp, PERF_COLOR_RED, 1236 "WARNING: not enough memory, dropping samples!\n"); 1237 return NULL; 1238 } 1239 1240 #define TRACE_PFMAJ (1 << 0) 1241 #define TRACE_PFMIN (1 << 1) 1242 1243 struct trace { 1244 struct perf_tool tool; 1245 struct { 1246 int machine; 1247 int open_id; 1248 } audit; 1249 struct { 1250 int max; 1251 struct syscall *table; 1252 } syscalls; 1253 struct record_opts opts; 1254 struct perf_evlist *evlist; 1255 struct machine *host; 1256 struct thread *current; 1257 u64 base_time; 1258 FILE *output; 1259 unsigned long nr_events; 1260 struct strlist *ev_qualifier; 1261 const char *last_vfs_getname; 1262 struct intlist *tid_list; 1263 struct intlist *pid_list; 1264 struct { 1265 size_t nr; 1266 pid_t *entries; 1267 } filter_pids; 1268 double duration_filter; 1269 double runtime_ms; 1270 struct { 1271 u64 vfs_getname, 1272 proc_getname; 1273 } stats; 1274 bool not_ev_qualifier; 1275 bool live; 1276 bool full_time; 1277 bool sched; 1278 bool multiple_threads; 1279 bool summary; 1280 bool summary_only; 1281 bool show_comm; 1282 bool show_tool_stats; 1283 bool trace_syscalls; 1284 bool force; 1285 int trace_pgfaults; 1286 }; 1287 1288 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) 1289 { 1290 struct thread_trace *ttrace = thread__priv(thread); 1291 1292 if (fd > ttrace->paths.max) { 1293 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); 1294 1295 if (npath == NULL) 1296 return -1; 1297 1298 if (ttrace->paths.max != -1) { 1299 memset(npath + ttrace->paths.max + 1, 0, 1300 (fd - ttrace->paths.max) * sizeof(char *)); 1301 } else { 1302 memset(npath, 0, (fd + 1) * sizeof(char *)); 1303 } 1304 1305 ttrace->paths.table = npath; 1306 ttrace->paths.max = fd; 1307 } 1308 1309 ttrace->paths.table[fd] = strdup(pathname); 1310 1311 return ttrace->paths.table[fd] != NULL ? 0 : -1; 1312 } 1313 1314 static int thread__read_fd_path(struct thread *thread, int fd) 1315 { 1316 char linkname[PATH_MAX], pathname[PATH_MAX]; 1317 struct stat st; 1318 int ret; 1319 1320 if (thread->pid_ == thread->tid) { 1321 scnprintf(linkname, sizeof(linkname), 1322 "/proc/%d/fd/%d", thread->pid_, fd); 1323 } else { 1324 scnprintf(linkname, sizeof(linkname), 1325 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); 1326 } 1327 1328 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) 1329 return -1; 1330 1331 ret = readlink(linkname, pathname, sizeof(pathname)); 1332 1333 if (ret < 0 || ret > st.st_size) 1334 return -1; 1335 1336 pathname[ret] = '\0'; 1337 return trace__set_fd_pathname(thread, fd, pathname); 1338 } 1339 1340 static const char *thread__fd_path(struct thread *thread, int fd, 1341 struct trace *trace) 1342 { 1343 struct thread_trace *ttrace = thread__priv(thread); 1344 1345 if (ttrace == NULL) 1346 return NULL; 1347 1348 if (fd < 0) 1349 return NULL; 1350 1351 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) { 1352 if (!trace->live) 1353 return NULL; 1354 ++trace->stats.proc_getname; 1355 if (thread__read_fd_path(thread, fd)) 1356 return NULL; 1357 } 1358 1359 return ttrace->paths.table[fd]; 1360 } 1361 1362 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 1363 struct syscall_arg *arg) 1364 { 1365 int fd = arg->val; 1366 size_t printed = scnprintf(bf, size, "%d", fd); 1367 const char *path = thread__fd_path(arg->thread, fd, arg->trace); 1368 1369 if (path) 1370 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 1371 1372 return printed; 1373 } 1374 1375 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 1376 struct syscall_arg *arg) 1377 { 1378 int fd = arg->val; 1379 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); 1380 struct thread_trace *ttrace = thread__priv(arg->thread); 1381 1382 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) 1383 zfree(&ttrace->paths.table[fd]); 1384 1385 return printed; 1386 } 1387 1388 static bool trace__filter_duration(struct trace *trace, double t) 1389 { 1390 return t < (trace->duration_filter * NSEC_PER_MSEC); 1391 } 1392 1393 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1394 { 1395 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 1396 1397 return fprintf(fp, "%10.3f ", ts); 1398 } 1399 1400 static bool done = false; 1401 static bool interrupted = false; 1402 1403 static void sig_handler(int sig) 1404 { 1405 done = true; 1406 interrupted = sig == SIGINT; 1407 } 1408 1409 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 1410 u64 duration, u64 tstamp, FILE *fp) 1411 { 1412 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 1413 printed += fprintf_duration(duration, fp); 1414 1415 if (trace->multiple_threads) { 1416 if (trace->show_comm) 1417 printed += fprintf(fp, "%.14s/", thread__comm_str(thread)); 1418 printed += fprintf(fp, "%d ", thread->tid); 1419 } 1420 1421 return printed; 1422 } 1423 1424 static int trace__process_event(struct trace *trace, struct machine *machine, 1425 union perf_event *event, struct perf_sample *sample) 1426 { 1427 int ret = 0; 1428 1429 switch (event->header.type) { 1430 case PERF_RECORD_LOST: 1431 color_fprintf(trace->output, PERF_COLOR_RED, 1432 "LOST %" PRIu64 " events!\n", event->lost.lost); 1433 ret = machine__process_lost_event(machine, event, sample); 1434 default: 1435 ret = machine__process_event(machine, event, sample); 1436 break; 1437 } 1438 1439 return ret; 1440 } 1441 1442 static int trace__tool_process(struct perf_tool *tool, 1443 union perf_event *event, 1444 struct perf_sample *sample, 1445 struct machine *machine) 1446 { 1447 struct trace *trace = container_of(tool, struct trace, tool); 1448 return trace__process_event(trace, machine, event, sample); 1449 } 1450 1451 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 1452 { 1453 int err = symbol__init(NULL); 1454 1455 if (err) 1456 return err; 1457 1458 trace->host = machine__new_host(); 1459 if (trace->host == NULL) 1460 return -ENOMEM; 1461 1462 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1463 evlist->threads, trace__tool_process, false); 1464 if (err) 1465 symbol__exit(); 1466 1467 return err; 1468 } 1469 1470 static int syscall__set_arg_fmts(struct syscall *sc) 1471 { 1472 struct format_field *field; 1473 int idx = 0; 1474 1475 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *)); 1476 if (sc->arg_scnprintf == NULL) 1477 return -1; 1478 1479 if (sc->fmt) 1480 sc->arg_parm = sc->fmt->arg_parm; 1481 1482 for (field = sc->args; field; field = field->next) { 1483 if (sc->fmt && sc->fmt->arg_scnprintf[idx]) 1484 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; 1485 else if (field->flags & FIELD_IS_POINTER) 1486 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; 1487 ++idx; 1488 } 1489 1490 return 0; 1491 } 1492 1493 static int trace__read_syscall_info(struct trace *trace, int id) 1494 { 1495 char tp_name[128]; 1496 struct syscall *sc; 1497 const char *name = audit_syscall_to_name(id, trace->audit.machine); 1498 1499 if (name == NULL) 1500 return -1; 1501 1502 if (id > trace->syscalls.max) { 1503 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1504 1505 if (nsyscalls == NULL) 1506 return -1; 1507 1508 if (trace->syscalls.max != -1) { 1509 memset(nsyscalls + trace->syscalls.max + 1, 0, 1510 (id - trace->syscalls.max) * sizeof(*sc)); 1511 } else { 1512 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 1513 } 1514 1515 trace->syscalls.table = nsyscalls; 1516 trace->syscalls.max = id; 1517 } 1518 1519 sc = trace->syscalls.table + id; 1520 sc->name = name; 1521 1522 if (trace->ev_qualifier) { 1523 bool in = strlist__find(trace->ev_qualifier, name) != NULL; 1524 1525 if (!(in ^ trace->not_ev_qualifier)) { 1526 sc->filtered = true; 1527 /* 1528 * No need to do read tracepoint information since this will be 1529 * filtered out. 1530 */ 1531 return 0; 1532 } 1533 } 1534 1535 sc->fmt = syscall_fmt__find(sc->name); 1536 1537 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1538 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1539 1540 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { 1541 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 1542 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1543 } 1544 1545 if (sc->tp_format == NULL) 1546 return -1; 1547 1548 sc->args = sc->tp_format->format.fields; 1549 sc->nr_args = sc->tp_format->format.nr_fields; 1550 /* drop nr field - not relevant here; does not exist on older kernels */ 1551 if (sc->args && strcmp(sc->args->name, "nr") == 0) { 1552 sc->args = sc->args->next; 1553 --sc->nr_args; 1554 } 1555 1556 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); 1557 1558 return syscall__set_arg_fmts(sc); 1559 } 1560 1561 /* 1562 * args is to be interpreted as a series of longs but we need to handle 1563 * 8-byte unaligned accesses. args points to raw_data within the event 1564 * and raw_data is guaranteed to be 8-byte unaligned because it is 1565 * preceded by raw_size which is a u32. So we need to copy args to a temp 1566 * variable to read it. Most notably this avoids extended load instructions 1567 * on unaligned addresses 1568 */ 1569 1570 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 1571 unsigned char *args, struct trace *trace, 1572 struct thread *thread) 1573 { 1574 size_t printed = 0; 1575 unsigned char *p; 1576 unsigned long val; 1577 1578 if (sc->args != NULL) { 1579 struct format_field *field; 1580 u8 bit = 1; 1581 struct syscall_arg arg = { 1582 .idx = 0, 1583 .mask = 0, 1584 .trace = trace, 1585 .thread = thread, 1586 }; 1587 1588 for (field = sc->args; field; 1589 field = field->next, ++arg.idx, bit <<= 1) { 1590 if (arg.mask & bit) 1591 continue; 1592 1593 /* special care for unaligned accesses */ 1594 p = args + sizeof(unsigned long) * arg.idx; 1595 memcpy(&val, p, sizeof(val)); 1596 1597 /* 1598 * Suppress this argument if its value is zero and 1599 * and we don't have a string associated in an 1600 * strarray for it. 1601 */ 1602 if (val == 0 && 1603 !(sc->arg_scnprintf && 1604 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && 1605 sc->arg_parm[arg.idx])) 1606 continue; 1607 1608 printed += scnprintf(bf + printed, size - printed, 1609 "%s%s: ", printed ? ", " : "", field->name); 1610 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { 1611 arg.val = val; 1612 if (sc->arg_parm) 1613 arg.parm = sc->arg_parm[arg.idx]; 1614 printed += sc->arg_scnprintf[arg.idx](bf + printed, 1615 size - printed, &arg); 1616 } else { 1617 printed += scnprintf(bf + printed, size - printed, 1618 "%ld", val); 1619 } 1620 } 1621 } else { 1622 int i = 0; 1623 1624 while (i < 6) { 1625 /* special care for unaligned accesses */ 1626 p = args + sizeof(unsigned long) * i; 1627 memcpy(&val, p, sizeof(val)); 1628 printed += scnprintf(bf + printed, size - printed, 1629 "%sarg%d: %ld", 1630 printed ? ", " : "", i, val); 1631 ++i; 1632 } 1633 } 1634 1635 return printed; 1636 } 1637 1638 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 1639 union perf_event *event, 1640 struct perf_sample *sample); 1641 1642 static struct syscall *trace__syscall_info(struct trace *trace, 1643 struct perf_evsel *evsel, int id) 1644 { 1645 1646 if (id < 0) { 1647 1648 /* 1649 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 1650 * before that, leaving at a higher verbosity level till that is 1651 * explained. Reproduced with plain ftrace with: 1652 * 1653 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 1654 * grep "NR -1 " /t/trace_pipe 1655 * 1656 * After generating some load on the machine. 1657 */ 1658 if (verbose > 1) { 1659 static u64 n; 1660 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 1661 id, perf_evsel__name(evsel), ++n); 1662 } 1663 return NULL; 1664 } 1665 1666 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 1667 trace__read_syscall_info(trace, id)) 1668 goto out_cant_read; 1669 1670 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 1671 goto out_cant_read; 1672 1673 return &trace->syscalls.table[id]; 1674 1675 out_cant_read: 1676 if (verbose) { 1677 fprintf(trace->output, "Problems reading syscall %d", id); 1678 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 1679 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 1680 fputs(" information\n", trace->output); 1681 } 1682 return NULL; 1683 } 1684 1685 static void thread__update_stats(struct thread_trace *ttrace, 1686 int id, struct perf_sample *sample) 1687 { 1688 struct int_node *inode; 1689 struct stats *stats; 1690 u64 duration = 0; 1691 1692 inode = intlist__findnew(ttrace->syscall_stats, id); 1693 if (inode == NULL) 1694 return; 1695 1696 stats = inode->priv; 1697 if (stats == NULL) { 1698 stats = malloc(sizeof(struct stats)); 1699 if (stats == NULL) 1700 return; 1701 init_stats(stats); 1702 inode->priv = stats; 1703 } 1704 1705 if (ttrace->entry_time && sample->time > ttrace->entry_time) 1706 duration = sample->time - ttrace->entry_time; 1707 1708 update_stats(stats, duration); 1709 } 1710 1711 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample) 1712 { 1713 struct thread_trace *ttrace; 1714 u64 duration; 1715 size_t printed; 1716 1717 if (trace->current == NULL) 1718 return 0; 1719 1720 ttrace = thread__priv(trace->current); 1721 1722 if (!ttrace->entry_pending) 1723 return 0; 1724 1725 duration = sample->time - ttrace->entry_time; 1726 1727 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output); 1728 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); 1729 ttrace->entry_pending = false; 1730 1731 return printed; 1732 } 1733 1734 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 1735 union perf_event *event __maybe_unused, 1736 struct perf_sample *sample) 1737 { 1738 char *msg; 1739 void *args; 1740 size_t printed = 0; 1741 struct thread *thread; 1742 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1743 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1744 struct thread_trace *ttrace; 1745 1746 if (sc == NULL) 1747 return -1; 1748 1749 if (sc->filtered) 1750 return 0; 1751 1752 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1753 ttrace = thread__trace(thread, trace->output); 1754 if (ttrace == NULL) 1755 goto out_put; 1756 1757 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1758 1759 if (ttrace->entry_str == NULL) { 1760 ttrace->entry_str = malloc(1024); 1761 if (!ttrace->entry_str) 1762 goto out_put; 1763 } 1764 1765 if (!trace->summary_only) 1766 trace__printf_interrupted_entry(trace, sample); 1767 1768 ttrace->entry_time = sample->time; 1769 msg = ttrace->entry_str; 1770 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); 1771 1772 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, 1773 args, trace, thread); 1774 1775 if (sc->is_exit) { 1776 if (!trace->duration_filter && !trace->summary_only) { 1777 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); 1778 fprintf(trace->output, "%-70s\n", ttrace->entry_str); 1779 } 1780 } else 1781 ttrace->entry_pending = true; 1782 1783 if (trace->current != thread) { 1784 thread__put(trace->current); 1785 trace->current = thread__get(thread); 1786 } 1787 err = 0; 1788 out_put: 1789 thread__put(thread); 1790 return err; 1791 } 1792 1793 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1794 union perf_event *event __maybe_unused, 1795 struct perf_sample *sample) 1796 { 1797 long ret; 1798 u64 duration = 0; 1799 struct thread *thread; 1800 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1801 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1802 struct thread_trace *ttrace; 1803 1804 if (sc == NULL) 1805 return -1; 1806 1807 if (sc->filtered) 1808 return 0; 1809 1810 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1811 ttrace = thread__trace(thread, trace->output); 1812 if (ttrace == NULL) 1813 goto out_put; 1814 1815 if (trace->summary) 1816 thread__update_stats(ttrace, id, sample); 1817 1818 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 1819 1820 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { 1821 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); 1822 trace->last_vfs_getname = NULL; 1823 ++trace->stats.vfs_getname; 1824 } 1825 1826 ttrace->exit_time = sample->time; 1827 1828 if (ttrace->entry_time) { 1829 duration = sample->time - ttrace->entry_time; 1830 if (trace__filter_duration(trace, duration)) 1831 goto out; 1832 } else if (trace->duration_filter) 1833 goto out; 1834 1835 if (trace->summary_only) 1836 goto out; 1837 1838 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); 1839 1840 if (ttrace->entry_pending) { 1841 fprintf(trace->output, "%-70s", ttrace->entry_str); 1842 } else { 1843 fprintf(trace->output, " ... ["); 1844 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 1845 fprintf(trace->output, "]: %s()", sc->name); 1846 } 1847 1848 if (sc->fmt == NULL) { 1849 signed_print: 1850 fprintf(trace->output, ") = %ld", ret); 1851 } else if (ret < 0 && sc->fmt->errmsg) { 1852 char bf[STRERR_BUFSIZE]; 1853 const char *emsg = strerror_r(-ret, bf, sizeof(bf)), 1854 *e = audit_errno_to_name(-ret); 1855 1856 fprintf(trace->output, ") = -1 %s %s", e, emsg); 1857 } else if (ret == 0 && sc->fmt->timeout) 1858 fprintf(trace->output, ") = 0 Timeout"); 1859 else if (sc->fmt->hexret) 1860 fprintf(trace->output, ") = %#lx", ret); 1861 else 1862 goto signed_print; 1863 1864 fputc('\n', trace->output); 1865 out: 1866 ttrace->entry_pending = false; 1867 err = 0; 1868 out_put: 1869 thread__put(thread); 1870 return err; 1871 } 1872 1873 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 1874 union perf_event *event __maybe_unused, 1875 struct perf_sample *sample) 1876 { 1877 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); 1878 return 0; 1879 } 1880 1881 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 1882 union perf_event *event __maybe_unused, 1883 struct perf_sample *sample) 1884 { 1885 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1886 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 1887 struct thread *thread = machine__findnew_thread(trace->host, 1888 sample->pid, 1889 sample->tid); 1890 struct thread_trace *ttrace = thread__trace(thread, trace->output); 1891 1892 if (ttrace == NULL) 1893 goto out_dump; 1894 1895 ttrace->runtime_ms += runtime_ms; 1896 trace->runtime_ms += runtime_ms; 1897 thread__put(thread); 1898 return 0; 1899 1900 out_dump: 1901 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 1902 evsel->name, 1903 perf_evsel__strval(evsel, sample, "comm"), 1904 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1905 runtime, 1906 perf_evsel__intval(evsel, sample, "vruntime")); 1907 thread__put(thread); 1908 return 0; 1909 } 1910 1911 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, 1912 union perf_event *event __maybe_unused, 1913 struct perf_sample *sample) 1914 { 1915 trace__printf_interrupted_entry(trace, sample); 1916 trace__fprintf_tstamp(trace, sample->time, trace->output); 1917 1918 if (trace->trace_syscalls) 1919 fprintf(trace->output, "( ): "); 1920 1921 fprintf(trace->output, "%s:", evsel->name); 1922 1923 if (evsel->tp_format) { 1924 event_format__fprintf(evsel->tp_format, sample->cpu, 1925 sample->raw_data, sample->raw_size, 1926 trace->output); 1927 } 1928 1929 fprintf(trace->output, ")\n"); 1930 return 0; 1931 } 1932 1933 static void print_location(FILE *f, struct perf_sample *sample, 1934 struct addr_location *al, 1935 bool print_dso, bool print_sym) 1936 { 1937 1938 if ((verbose || print_dso) && al->map) 1939 fprintf(f, "%s@", al->map->dso->long_name); 1940 1941 if ((verbose || print_sym) && al->sym) 1942 fprintf(f, "%s+0x%" PRIx64, al->sym->name, 1943 al->addr - al->sym->start); 1944 else if (al->map) 1945 fprintf(f, "0x%" PRIx64, al->addr); 1946 else 1947 fprintf(f, "0x%" PRIx64, sample->addr); 1948 } 1949 1950 static int trace__pgfault(struct trace *trace, 1951 struct perf_evsel *evsel, 1952 union perf_event *event, 1953 struct perf_sample *sample) 1954 { 1955 struct thread *thread; 1956 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 1957 struct addr_location al; 1958 char map_type = 'd'; 1959 struct thread_trace *ttrace; 1960 int err = -1; 1961 1962 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1963 ttrace = thread__trace(thread, trace->output); 1964 if (ttrace == NULL) 1965 goto out_put; 1966 1967 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) 1968 ttrace->pfmaj++; 1969 else 1970 ttrace->pfmin++; 1971 1972 if (trace->summary_only) 1973 goto out; 1974 1975 thread__find_addr_location(thread, cpumode, MAP__FUNCTION, 1976 sample->ip, &al); 1977 1978 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); 1979 1980 fprintf(trace->output, "%sfault [", 1981 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? 1982 "maj" : "min"); 1983 1984 print_location(trace->output, sample, &al, false, true); 1985 1986 fprintf(trace->output, "] => "); 1987 1988 thread__find_addr_location(thread, cpumode, MAP__VARIABLE, 1989 sample->addr, &al); 1990 1991 if (!al.map) { 1992 thread__find_addr_location(thread, cpumode, 1993 MAP__FUNCTION, sample->addr, &al); 1994 1995 if (al.map) 1996 map_type = 'x'; 1997 else 1998 map_type = '?'; 1999 } 2000 2001 print_location(trace->output, sample, &al, true, false); 2002 2003 fprintf(trace->output, " (%c%c)\n", map_type, al.level); 2004 out: 2005 err = 0; 2006 out_put: 2007 thread__put(thread); 2008 return err; 2009 } 2010 2011 static bool skip_sample(struct trace *trace, struct perf_sample *sample) 2012 { 2013 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || 2014 (trace->tid_list && intlist__find(trace->tid_list, sample->tid))) 2015 return false; 2016 2017 if (trace->pid_list || trace->tid_list) 2018 return true; 2019 2020 return false; 2021 } 2022 2023 static int trace__process_sample(struct perf_tool *tool, 2024 union perf_event *event, 2025 struct perf_sample *sample, 2026 struct perf_evsel *evsel, 2027 struct machine *machine __maybe_unused) 2028 { 2029 struct trace *trace = container_of(tool, struct trace, tool); 2030 int err = 0; 2031 2032 tracepoint_handler handler = evsel->handler; 2033 2034 if (skip_sample(trace, sample)) 2035 return 0; 2036 2037 if (!trace->full_time && trace->base_time == 0) 2038 trace->base_time = sample->time; 2039 2040 if (handler) { 2041 ++trace->nr_events; 2042 handler(trace, evsel, event, sample); 2043 } 2044 2045 return err; 2046 } 2047 2048 static int parse_target_str(struct trace *trace) 2049 { 2050 if (trace->opts.target.pid) { 2051 trace->pid_list = intlist__new(trace->opts.target.pid); 2052 if (trace->pid_list == NULL) { 2053 pr_err("Error parsing process id string\n"); 2054 return -EINVAL; 2055 } 2056 } 2057 2058 if (trace->opts.target.tid) { 2059 trace->tid_list = intlist__new(trace->opts.target.tid); 2060 if (trace->tid_list == NULL) { 2061 pr_err("Error parsing thread id string\n"); 2062 return -EINVAL; 2063 } 2064 } 2065 2066 return 0; 2067 } 2068 2069 static int trace__record(struct trace *trace, int argc, const char **argv) 2070 { 2071 unsigned int rec_argc, i, j; 2072 const char **rec_argv; 2073 const char * const record_args[] = { 2074 "record", 2075 "-R", 2076 "-m", "1024", 2077 "-c", "1", 2078 }; 2079 2080 const char * const sc_args[] = { "-e", }; 2081 unsigned int sc_args_nr = ARRAY_SIZE(sc_args); 2082 const char * const majpf_args[] = { "-e", "major-faults" }; 2083 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args); 2084 const char * const minpf_args[] = { "-e", "minor-faults" }; 2085 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args); 2086 2087 /* +1 is for the event string below */ 2088 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 + 2089 majpf_args_nr + minpf_args_nr + argc; 2090 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2091 2092 if (rec_argv == NULL) 2093 return -ENOMEM; 2094 2095 j = 0; 2096 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2097 rec_argv[j++] = record_args[i]; 2098 2099 if (trace->trace_syscalls) { 2100 for (i = 0; i < sc_args_nr; i++) 2101 rec_argv[j++] = sc_args[i]; 2102 2103 /* event string may be different for older kernels - e.g., RHEL6 */ 2104 if (is_valid_tracepoint("raw_syscalls:sys_enter")) 2105 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; 2106 else if (is_valid_tracepoint("syscalls:sys_enter")) 2107 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; 2108 else { 2109 pr_err("Neither raw_syscalls nor syscalls events exist.\n"); 2110 return -1; 2111 } 2112 } 2113 2114 if (trace->trace_pgfaults & TRACE_PFMAJ) 2115 for (i = 0; i < majpf_args_nr; i++) 2116 rec_argv[j++] = majpf_args[i]; 2117 2118 if (trace->trace_pgfaults & TRACE_PFMIN) 2119 for (i = 0; i < minpf_args_nr; i++) 2120 rec_argv[j++] = minpf_args[i]; 2121 2122 for (i = 0; i < (unsigned int)argc; i++) 2123 rec_argv[j++] = argv[i]; 2124 2125 return cmd_record(j, rec_argv, NULL); 2126 } 2127 2128 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 2129 2130 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 2131 { 2132 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); 2133 if (evsel == NULL) 2134 return; 2135 2136 if (perf_evsel__field(evsel, "pathname") == NULL) { 2137 perf_evsel__delete(evsel); 2138 return; 2139 } 2140 2141 evsel->handler = trace__vfs_getname; 2142 perf_evlist__add(evlist, evsel); 2143 } 2144 2145 static int perf_evlist__add_pgfault(struct perf_evlist *evlist, 2146 u64 config) 2147 { 2148 struct perf_evsel *evsel; 2149 struct perf_event_attr attr = { 2150 .type = PERF_TYPE_SOFTWARE, 2151 .mmap_data = 1, 2152 }; 2153 2154 attr.config = config; 2155 attr.sample_period = 1; 2156 2157 event_attr_init(&attr); 2158 2159 evsel = perf_evsel__new(&attr); 2160 if (!evsel) 2161 return -ENOMEM; 2162 2163 evsel->handler = trace__pgfault; 2164 perf_evlist__add(evlist, evsel); 2165 2166 return 0; 2167 } 2168 2169 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) 2170 { 2171 const u32 type = event->header.type; 2172 struct perf_evsel *evsel; 2173 2174 if (!trace->full_time && trace->base_time == 0) 2175 trace->base_time = sample->time; 2176 2177 if (type != PERF_RECORD_SAMPLE) { 2178 trace__process_event(trace, trace->host, event, sample); 2179 return; 2180 } 2181 2182 evsel = perf_evlist__id2evsel(trace->evlist, sample->id); 2183 if (evsel == NULL) { 2184 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); 2185 return; 2186 } 2187 2188 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 2189 sample->raw_data == NULL) { 2190 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 2191 perf_evsel__name(evsel), sample->tid, 2192 sample->cpu, sample->raw_size); 2193 } else { 2194 tracepoint_handler handler = evsel->handler; 2195 handler(trace, evsel, event, sample); 2196 } 2197 } 2198 2199 static int trace__run(struct trace *trace, int argc, const char **argv) 2200 { 2201 struct perf_evlist *evlist = trace->evlist; 2202 int err = -1, i; 2203 unsigned long before; 2204 const bool forks = argc > 0; 2205 bool draining = false; 2206 2207 trace->live = true; 2208 2209 if (trace->trace_syscalls && 2210 perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, 2211 trace__sys_exit)) 2212 goto out_error_raw_syscalls; 2213 2214 if (trace->trace_syscalls) 2215 perf_evlist__add_vfs_getname(evlist); 2216 2217 if ((trace->trace_pgfaults & TRACE_PFMAJ) && 2218 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { 2219 goto out_error_mem; 2220 } 2221 2222 if ((trace->trace_pgfaults & TRACE_PFMIN) && 2223 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) 2224 goto out_error_mem; 2225 2226 if (trace->sched && 2227 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 2228 trace__sched_stat_runtime)) 2229 goto out_error_sched_stat_runtime; 2230 2231 err = perf_evlist__create_maps(evlist, &trace->opts.target); 2232 if (err < 0) { 2233 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 2234 goto out_delete_evlist; 2235 } 2236 2237 err = trace__symbols_init(trace, evlist); 2238 if (err < 0) { 2239 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 2240 goto out_delete_evlist; 2241 } 2242 2243 perf_evlist__config(evlist, &trace->opts); 2244 2245 signal(SIGCHLD, sig_handler); 2246 signal(SIGINT, sig_handler); 2247 2248 if (forks) { 2249 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 2250 argv, false, NULL); 2251 if (err < 0) { 2252 fprintf(trace->output, "Couldn't run the workload!\n"); 2253 goto out_delete_evlist; 2254 } 2255 } 2256 2257 err = perf_evlist__open(evlist); 2258 if (err < 0) 2259 goto out_error_open; 2260 2261 /* 2262 * Better not use !target__has_task() here because we need to cover the 2263 * case where no threads were specified in the command line, but a 2264 * workload was, and in that case we will fill in the thread_map when 2265 * we fork the workload in perf_evlist__prepare_workload. 2266 */ 2267 if (trace->filter_pids.nr > 0) 2268 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); 2269 else if (evlist->threads->map[0] == -1) 2270 err = perf_evlist__set_filter_pid(evlist, getpid()); 2271 2272 if (err < 0) { 2273 printf("err=%d,%s\n", -err, strerror(-err)); 2274 exit(1); 2275 } 2276 2277 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); 2278 if (err < 0) 2279 goto out_error_mmap; 2280 2281 if (!target__none(&trace->opts.target)) 2282 perf_evlist__enable(evlist); 2283 2284 if (forks) 2285 perf_evlist__start_workload(evlist); 2286 2287 trace->multiple_threads = evlist->threads->map[0] == -1 || 2288 evlist->threads->nr > 1 || 2289 perf_evlist__first(evlist)->attr.inherit; 2290 again: 2291 before = trace->nr_events; 2292 2293 for (i = 0; i < evlist->nr_mmaps; i++) { 2294 union perf_event *event; 2295 2296 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 2297 struct perf_sample sample; 2298 2299 ++trace->nr_events; 2300 2301 err = perf_evlist__parse_sample(evlist, event, &sample); 2302 if (err) { 2303 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 2304 goto next_event; 2305 } 2306 2307 trace__handle_event(trace, event, &sample); 2308 next_event: 2309 perf_evlist__mmap_consume(evlist, i); 2310 2311 if (interrupted) 2312 goto out_disable; 2313 2314 if (done && !draining) { 2315 perf_evlist__disable(evlist); 2316 draining = true; 2317 } 2318 } 2319 } 2320 2321 if (trace->nr_events == before) { 2322 int timeout = done ? 100 : -1; 2323 2324 if (!draining && perf_evlist__poll(evlist, timeout) > 0) { 2325 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0) 2326 draining = true; 2327 2328 goto again; 2329 } 2330 } else { 2331 goto again; 2332 } 2333 2334 out_disable: 2335 thread__zput(trace->current); 2336 2337 perf_evlist__disable(evlist); 2338 2339 if (!err) { 2340 if (trace->summary) 2341 trace__fprintf_thread_summary(trace, trace->output); 2342 2343 if (trace->show_tool_stats) { 2344 fprintf(trace->output, "Stats:\n " 2345 " vfs_getname : %" PRIu64 "\n" 2346 " proc_getname: %" PRIu64 "\n", 2347 trace->stats.vfs_getname, 2348 trace->stats.proc_getname); 2349 } 2350 } 2351 2352 out_delete_evlist: 2353 perf_evlist__delete(evlist); 2354 trace->evlist = NULL; 2355 trace->live = false; 2356 return err; 2357 { 2358 char errbuf[BUFSIZ]; 2359 2360 out_error_sched_stat_runtime: 2361 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); 2362 goto out_error; 2363 2364 out_error_raw_syscalls: 2365 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); 2366 goto out_error; 2367 2368 out_error_mmap: 2369 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); 2370 goto out_error; 2371 2372 out_error_open: 2373 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); 2374 2375 out_error: 2376 fprintf(trace->output, "%s\n", errbuf); 2377 goto out_delete_evlist; 2378 } 2379 out_error_mem: 2380 fprintf(trace->output, "Not enough memory to run!\n"); 2381 goto out_delete_evlist; 2382 } 2383 2384 static int trace__replay(struct trace *trace) 2385 { 2386 const struct perf_evsel_str_handler handlers[] = { 2387 { "probe:vfs_getname", trace__vfs_getname, }, 2388 }; 2389 struct perf_data_file file = { 2390 .path = input_name, 2391 .mode = PERF_DATA_MODE_READ, 2392 .force = trace->force, 2393 }; 2394 struct perf_session *session; 2395 struct perf_evsel *evsel; 2396 int err = -1; 2397 2398 trace->tool.sample = trace__process_sample; 2399 trace->tool.mmap = perf_event__process_mmap; 2400 trace->tool.mmap2 = perf_event__process_mmap2; 2401 trace->tool.comm = perf_event__process_comm; 2402 trace->tool.exit = perf_event__process_exit; 2403 trace->tool.fork = perf_event__process_fork; 2404 trace->tool.attr = perf_event__process_attr; 2405 trace->tool.tracing_data = perf_event__process_tracing_data; 2406 trace->tool.build_id = perf_event__process_build_id; 2407 2408 trace->tool.ordered_events = true; 2409 trace->tool.ordering_requires_timestamps = true; 2410 2411 /* add tid to output */ 2412 trace->multiple_threads = true; 2413 2414 session = perf_session__new(&file, false, &trace->tool); 2415 if (session == NULL) 2416 return -1; 2417 2418 if (symbol__init(&session->header.env) < 0) 2419 goto out; 2420 2421 trace->host = &session->machines.host; 2422 2423 err = perf_session__set_tracepoints_handlers(session, handlers); 2424 if (err) 2425 goto out; 2426 2427 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2428 "raw_syscalls:sys_enter"); 2429 /* older kernels have syscalls tp versus raw_syscalls */ 2430 if (evsel == NULL) 2431 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2432 "syscalls:sys_enter"); 2433 2434 if (evsel && 2435 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || 2436 perf_evsel__init_sc_tp_ptr_field(evsel, args))) { 2437 pr_err("Error during initialize raw_syscalls:sys_enter event\n"); 2438 goto out; 2439 } 2440 2441 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2442 "raw_syscalls:sys_exit"); 2443 if (evsel == NULL) 2444 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2445 "syscalls:sys_exit"); 2446 if (evsel && 2447 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || 2448 perf_evsel__init_sc_tp_uint_field(evsel, ret))) { 2449 pr_err("Error during initialize raw_syscalls:sys_exit event\n"); 2450 goto out; 2451 } 2452 2453 evlist__for_each(session->evlist, evsel) { 2454 if (evsel->attr.type == PERF_TYPE_SOFTWARE && 2455 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || 2456 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || 2457 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS)) 2458 evsel->handler = trace__pgfault; 2459 } 2460 2461 err = parse_target_str(trace); 2462 if (err != 0) 2463 goto out; 2464 2465 setup_pager(); 2466 2467 err = perf_session__process_events(session); 2468 if (err) 2469 pr_err("Failed to process events, error %d", err); 2470 2471 else if (trace->summary) 2472 trace__fprintf_thread_summary(trace, trace->output); 2473 2474 out: 2475 perf_session__delete(session); 2476 2477 return err; 2478 } 2479 2480 static size_t trace__fprintf_threads_header(FILE *fp) 2481 { 2482 size_t printed; 2483 2484 printed = fprintf(fp, "\n Summary of events:\n\n"); 2485 2486 return printed; 2487 } 2488 2489 static size_t thread__dump_stats(struct thread_trace *ttrace, 2490 struct trace *trace, FILE *fp) 2491 { 2492 struct stats *stats; 2493 size_t printed = 0; 2494 struct syscall *sc; 2495 struct int_node *inode = intlist__first(ttrace->syscall_stats); 2496 2497 if (inode == NULL) 2498 return 0; 2499 2500 printed += fprintf(fp, "\n"); 2501 2502 printed += fprintf(fp, " syscall calls min avg max stddev\n"); 2503 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); 2504 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); 2505 2506 /* each int_node is a syscall */ 2507 while (inode) { 2508 stats = inode->priv; 2509 if (stats) { 2510 double min = (double)(stats->min) / NSEC_PER_MSEC; 2511 double max = (double)(stats->max) / NSEC_PER_MSEC; 2512 double avg = avg_stats(stats); 2513 double pct; 2514 u64 n = (u64) stats->n; 2515 2516 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; 2517 avg /= NSEC_PER_MSEC; 2518 2519 sc = &trace->syscalls.table[inode->i]; 2520 printed += fprintf(fp, " %-15s", sc->name); 2521 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", 2522 n, min, avg); 2523 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); 2524 } 2525 2526 inode = intlist__next(inode); 2527 } 2528 2529 printed += fprintf(fp, "\n\n"); 2530 2531 return printed; 2532 } 2533 2534 /* struct used to pass data to per-thread function */ 2535 struct summary_data { 2536 FILE *fp; 2537 struct trace *trace; 2538 size_t printed; 2539 }; 2540 2541 static int trace__fprintf_one_thread(struct thread *thread, void *priv) 2542 { 2543 struct summary_data *data = priv; 2544 FILE *fp = data->fp; 2545 size_t printed = data->printed; 2546 struct trace *trace = data->trace; 2547 struct thread_trace *ttrace = thread__priv(thread); 2548 double ratio; 2549 2550 if (ttrace == NULL) 2551 return 0; 2552 2553 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 2554 2555 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid); 2556 printed += fprintf(fp, "%lu events, ", ttrace->nr_events); 2557 printed += fprintf(fp, "%.1f%%", ratio); 2558 if (ttrace->pfmaj) 2559 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj); 2560 if (ttrace->pfmin) 2561 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin); 2562 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); 2563 printed += thread__dump_stats(ttrace, trace, fp); 2564 2565 data->printed += printed; 2566 2567 return 0; 2568 } 2569 2570 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 2571 { 2572 struct summary_data data = { 2573 .fp = fp, 2574 .trace = trace 2575 }; 2576 data.printed = trace__fprintf_threads_header(fp); 2577 2578 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data); 2579 2580 return data.printed; 2581 } 2582 2583 static int trace__set_duration(const struct option *opt, const char *str, 2584 int unset __maybe_unused) 2585 { 2586 struct trace *trace = opt->value; 2587 2588 trace->duration_filter = atof(str); 2589 return 0; 2590 } 2591 2592 static int trace__set_filter_pids(const struct option *opt, const char *str, 2593 int unset __maybe_unused) 2594 { 2595 int ret = -1; 2596 size_t i; 2597 struct trace *trace = opt->value; 2598 /* 2599 * FIXME: introduce a intarray class, plain parse csv and create a 2600 * { int nr, int entries[] } struct... 2601 */ 2602 struct intlist *list = intlist__new(str); 2603 2604 if (list == NULL) 2605 return -1; 2606 2607 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; 2608 trace->filter_pids.entries = calloc(i, sizeof(pid_t)); 2609 2610 if (trace->filter_pids.entries == NULL) 2611 goto out; 2612 2613 trace->filter_pids.entries[0] = getpid(); 2614 2615 for (i = 1; i < trace->filter_pids.nr; ++i) 2616 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; 2617 2618 intlist__delete(list); 2619 ret = 0; 2620 out: 2621 return ret; 2622 } 2623 2624 static int trace__open_output(struct trace *trace, const char *filename) 2625 { 2626 struct stat st; 2627 2628 if (!stat(filename, &st) && st.st_size) { 2629 char oldname[PATH_MAX]; 2630 2631 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 2632 unlink(oldname); 2633 rename(filename, oldname); 2634 } 2635 2636 trace->output = fopen(filename, "w"); 2637 2638 return trace->output == NULL ? -errno : 0; 2639 } 2640 2641 static int parse_pagefaults(const struct option *opt, const char *str, 2642 int unset __maybe_unused) 2643 { 2644 int *trace_pgfaults = opt->value; 2645 2646 if (strcmp(str, "all") == 0) 2647 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN; 2648 else if (strcmp(str, "maj") == 0) 2649 *trace_pgfaults |= TRACE_PFMAJ; 2650 else if (strcmp(str, "min") == 0) 2651 *trace_pgfaults |= TRACE_PFMIN; 2652 else 2653 return -1; 2654 2655 return 0; 2656 } 2657 2658 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) 2659 { 2660 struct perf_evsel *evsel; 2661 2662 evlist__for_each(evlist, evsel) 2663 evsel->handler = handler; 2664 } 2665 2666 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) 2667 { 2668 const char *trace_usage[] = { 2669 "perf trace [<options>] [<command>]", 2670 "perf trace [<options>] -- <command> [<options>]", 2671 "perf trace record [<options>] [<command>]", 2672 "perf trace record [<options>] -- <command> [<options>]", 2673 NULL 2674 }; 2675 struct trace trace = { 2676 .audit = { 2677 .machine = audit_detect_machine(), 2678 .open_id = audit_name_to_syscall("open", trace.audit.machine), 2679 }, 2680 .syscalls = { 2681 . max = -1, 2682 }, 2683 .opts = { 2684 .target = { 2685 .uid = UINT_MAX, 2686 .uses_mmap = true, 2687 }, 2688 .user_freq = UINT_MAX, 2689 .user_interval = ULLONG_MAX, 2690 .no_buffering = true, 2691 .mmap_pages = UINT_MAX, 2692 }, 2693 .output = stdout, 2694 .show_comm = true, 2695 .trace_syscalls = true, 2696 }; 2697 const char *output_name = NULL; 2698 const char *ev_qualifier_str = NULL; 2699 const struct option trace_options[] = { 2700 OPT_CALLBACK(0, "event", &trace.evlist, "event", 2701 "event selector. use 'perf list' to list available events", 2702 parse_events_option), 2703 OPT_BOOLEAN(0, "comm", &trace.show_comm, 2704 "show the thread COMM next to its id"), 2705 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), 2706 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"), 2707 OPT_STRING('o', "output", &output_name, "file", "output file name"), 2708 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 2709 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 2710 "trace events on existing process id"), 2711 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 2712 "trace events on existing thread id"), 2713 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids", 2714 "pids to filter (by the kernel)", trace__set_filter_pids), 2715 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 2716 "system-wide collection from all CPUs"), 2717 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 2718 "list of cpus to monitor"), 2719 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 2720 "child tasks do not inherit counters"), 2721 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", 2722 "number of mmap data pages", 2723 perf_evlist__parse_mmap_pages), 2724 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 2725 "user to profile"), 2726 OPT_CALLBACK(0, "duration", &trace, "float", 2727 "show only events with duration > N.M ms", 2728 trace__set_duration), 2729 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 2730 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 2731 OPT_BOOLEAN('T', "time", &trace.full_time, 2732 "Show full timestamp, not time relative to first start"), 2733 OPT_BOOLEAN('s', "summary", &trace.summary_only, 2734 "Show only syscall summary with statistics"), 2735 OPT_BOOLEAN('S', "with-summary", &trace.summary, 2736 "Show all syscalls and summary with statistics"), 2737 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", 2738 "Trace pagefaults", parse_pagefaults, "maj"), 2739 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), 2740 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), 2741 OPT_END() 2742 }; 2743 const char * const trace_subcommands[] = { "record", NULL }; 2744 int err; 2745 char bf[BUFSIZ]; 2746 2747 signal(SIGSEGV, sighandler_dump_stack); 2748 signal(SIGFPE, sighandler_dump_stack); 2749 2750 trace.evlist = perf_evlist__new(); 2751 2752 if (trace.evlist == NULL) { 2753 pr_err("Not enough memory to run!\n"); 2754 err = -ENOMEM; 2755 goto out; 2756 } 2757 2758 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, 2759 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); 2760 2761 if (trace.trace_pgfaults) { 2762 trace.opts.sample_address = true; 2763 trace.opts.sample_time = true; 2764 } 2765 2766 if (trace.evlist->nr_entries > 0) 2767 evlist__set_evsel_handler(trace.evlist, trace__event_handler); 2768 2769 if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) 2770 return trace__record(&trace, argc-1, &argv[1]); 2771 2772 /* summary_only implies summary option, but don't overwrite summary if set */ 2773 if (trace.summary_only) 2774 trace.summary = trace.summary_only; 2775 2776 if (!trace.trace_syscalls && !trace.trace_pgfaults && 2777 trace.evlist->nr_entries == 0 /* Was --events used? */) { 2778 pr_err("Please specify something to trace.\n"); 2779 return -1; 2780 } 2781 2782 if (output_name != NULL) { 2783 err = trace__open_output(&trace, output_name); 2784 if (err < 0) { 2785 perror("failed to create output file"); 2786 goto out; 2787 } 2788 } 2789 2790 if (ev_qualifier_str != NULL) { 2791 const char *s = ev_qualifier_str; 2792 2793 trace.not_ev_qualifier = *s == '!'; 2794 if (trace.not_ev_qualifier) 2795 ++s; 2796 trace.ev_qualifier = strlist__new(true, s); 2797 if (trace.ev_qualifier == NULL) { 2798 fputs("Not enough memory to parse event qualifier", 2799 trace.output); 2800 err = -ENOMEM; 2801 goto out_close; 2802 } 2803 } 2804 2805 err = target__validate(&trace.opts.target); 2806 if (err) { 2807 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2808 fprintf(trace.output, "%s", bf); 2809 goto out_close; 2810 } 2811 2812 err = target__parse_uid(&trace.opts.target); 2813 if (err) { 2814 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2815 fprintf(trace.output, "%s", bf); 2816 goto out_close; 2817 } 2818 2819 if (!argc && target__none(&trace.opts.target)) 2820 trace.opts.target.system_wide = true; 2821 2822 if (input_name) 2823 err = trace__replay(&trace); 2824 else 2825 err = trace__run(&trace, argc, argv); 2826 2827 out_close: 2828 if (output_name != NULL) 2829 fclose(trace.output); 2830 out: 2831 return err; 2832 } 2833