1 #include <traceevent/event-parse.h> 2 #include "builtin.h" 3 #include "util/color.h" 4 #include "util/debug.h" 5 #include "util/evlist.h" 6 #include "util/machine.h" 7 #include "util/session.h" 8 #include "util/thread.h" 9 #include "util/parse-options.h" 10 #include "util/strlist.h" 11 #include "util/intlist.h" 12 #include "util/thread_map.h" 13 #include "util/stat.h" 14 #include "trace-event.h" 15 #include "util/parse-events.h" 16 17 #include <libaudit.h> 18 #include <stdlib.h> 19 #include <sys/mman.h> 20 #include <linux/futex.h> 21 22 /* For older distros: */ 23 #ifndef MAP_STACK 24 # define MAP_STACK 0x20000 25 #endif 26 27 #ifndef MADV_HWPOISON 28 # define MADV_HWPOISON 100 29 #endif 30 31 #ifndef MADV_MERGEABLE 32 # define MADV_MERGEABLE 12 33 #endif 34 35 #ifndef MADV_UNMERGEABLE 36 # define MADV_UNMERGEABLE 13 37 #endif 38 39 #ifndef EFD_SEMAPHORE 40 # define EFD_SEMAPHORE 1 41 #endif 42 43 #ifndef EFD_NONBLOCK 44 # define EFD_NONBLOCK 00004000 45 #endif 46 47 #ifndef EFD_CLOEXEC 48 # define EFD_CLOEXEC 02000000 49 #endif 50 51 #ifndef O_CLOEXEC 52 # define O_CLOEXEC 02000000 53 #endif 54 55 #ifndef SOCK_DCCP 56 # define SOCK_DCCP 6 57 #endif 58 59 #ifndef SOCK_CLOEXEC 60 # define SOCK_CLOEXEC 02000000 61 #endif 62 63 #ifndef SOCK_NONBLOCK 64 # define SOCK_NONBLOCK 00004000 65 #endif 66 67 #ifndef MSG_CMSG_CLOEXEC 68 # define MSG_CMSG_CLOEXEC 0x40000000 69 #endif 70 71 #ifndef PERF_FLAG_FD_NO_GROUP 72 # define PERF_FLAG_FD_NO_GROUP (1UL << 0) 73 #endif 74 75 #ifndef PERF_FLAG_FD_OUTPUT 76 # define PERF_FLAG_FD_OUTPUT (1UL << 1) 77 #endif 78 79 #ifndef PERF_FLAG_PID_CGROUP 80 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ 81 #endif 82 83 #ifndef PERF_FLAG_FD_CLOEXEC 84 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ 85 #endif 86 87 88 struct tp_field { 89 int offset; 90 union { 91 u64 (*integer)(struct tp_field *field, struct perf_sample *sample); 92 void *(*pointer)(struct tp_field *field, struct perf_sample *sample); 93 }; 94 }; 95 96 #define TP_UINT_FIELD(bits) \ 97 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ 98 { \ 99 u##bits value; \ 100 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 101 return value; \ 102 } 103 104 TP_UINT_FIELD(8); 105 TP_UINT_FIELD(16); 106 TP_UINT_FIELD(32); 107 TP_UINT_FIELD(64); 108 109 #define TP_UINT_FIELD__SWAPPED(bits) \ 110 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ 111 { \ 112 u##bits value; \ 113 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 114 return bswap_##bits(value);\ 115 } 116 117 TP_UINT_FIELD__SWAPPED(16); 118 TP_UINT_FIELD__SWAPPED(32); 119 TP_UINT_FIELD__SWAPPED(64); 120 121 static int tp_field__init_uint(struct tp_field *field, 122 struct format_field *format_field, 123 bool needs_swap) 124 { 125 field->offset = format_field->offset; 126 127 switch (format_field->size) { 128 case 1: 129 field->integer = tp_field__u8; 130 break; 131 case 2: 132 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; 133 break; 134 case 4: 135 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; 136 break; 137 case 8: 138 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; 139 break; 140 default: 141 return -1; 142 } 143 144 return 0; 145 } 146 147 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) 148 { 149 return sample->raw_data + field->offset; 150 } 151 152 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) 153 { 154 field->offset = format_field->offset; 155 field->pointer = tp_field__ptr; 156 return 0; 157 } 158 159 struct syscall_tp { 160 struct tp_field id; 161 union { 162 struct tp_field args, ret; 163 }; 164 }; 165 166 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, 167 struct tp_field *field, 168 const char *name) 169 { 170 struct format_field *format_field = perf_evsel__field(evsel, name); 171 172 if (format_field == NULL) 173 return -1; 174 175 return tp_field__init_uint(field, format_field, evsel->needs_swap); 176 } 177 178 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ 179 ({ struct syscall_tp *sc = evsel->priv;\ 180 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) 181 182 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, 183 struct tp_field *field, 184 const char *name) 185 { 186 struct format_field *format_field = perf_evsel__field(evsel, name); 187 188 if (format_field == NULL) 189 return -1; 190 191 return tp_field__init_ptr(field, format_field); 192 } 193 194 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ 195 ({ struct syscall_tp *sc = evsel->priv;\ 196 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) 197 198 static void perf_evsel__delete_priv(struct perf_evsel *evsel) 199 { 200 zfree(&evsel->priv); 201 perf_evsel__delete(evsel); 202 } 203 204 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler) 205 { 206 evsel->priv = malloc(sizeof(struct syscall_tp)); 207 if (evsel->priv != NULL) { 208 if (perf_evsel__init_sc_tp_uint_field(evsel, id)) 209 goto out_delete; 210 211 evsel->handler = handler; 212 return 0; 213 } 214 215 return -ENOMEM; 216 217 out_delete: 218 zfree(&evsel->priv); 219 return -ENOENT; 220 } 221 222 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler) 223 { 224 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); 225 226 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ 227 if (evsel == NULL) 228 evsel = perf_evsel__newtp("syscalls", direction); 229 230 if (evsel) { 231 if (perf_evsel__init_syscall_tp(evsel, handler)) 232 goto out_delete; 233 } 234 235 return evsel; 236 237 out_delete: 238 perf_evsel__delete_priv(evsel); 239 return NULL; 240 } 241 242 #define perf_evsel__sc_tp_uint(evsel, name, sample) \ 243 ({ struct syscall_tp *fields = evsel->priv; \ 244 fields->name.integer(&fields->name, sample); }) 245 246 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ 247 ({ struct syscall_tp *fields = evsel->priv; \ 248 fields->name.pointer(&fields->name, sample); }) 249 250 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist, 251 void *sys_enter_handler, 252 void *sys_exit_handler) 253 { 254 int ret = -1; 255 struct perf_evsel *sys_enter, *sys_exit; 256 257 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler); 258 if (sys_enter == NULL) 259 goto out; 260 261 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) 262 goto out_delete_sys_enter; 263 264 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler); 265 if (sys_exit == NULL) 266 goto out_delete_sys_enter; 267 268 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) 269 goto out_delete_sys_exit; 270 271 perf_evlist__add(evlist, sys_enter); 272 perf_evlist__add(evlist, sys_exit); 273 274 ret = 0; 275 out: 276 return ret; 277 278 out_delete_sys_exit: 279 perf_evsel__delete_priv(sys_exit); 280 out_delete_sys_enter: 281 perf_evsel__delete_priv(sys_enter); 282 goto out; 283 } 284 285 286 struct syscall_arg { 287 unsigned long val; 288 struct thread *thread; 289 struct trace *trace; 290 void *parm; 291 u8 idx; 292 u8 mask; 293 }; 294 295 struct strarray { 296 int offset; 297 int nr_entries; 298 const char **entries; 299 }; 300 301 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \ 302 .nr_entries = ARRAY_SIZE(array), \ 303 .entries = array, \ 304 } 305 306 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \ 307 .offset = off, \ 308 .nr_entries = ARRAY_SIZE(array), \ 309 .entries = array, \ 310 } 311 312 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, 313 const char *intfmt, 314 struct syscall_arg *arg) 315 { 316 struct strarray *sa = arg->parm; 317 int idx = arg->val - sa->offset; 318 319 if (idx < 0 || idx >= sa->nr_entries) 320 return scnprintf(bf, size, intfmt, arg->val); 321 322 return scnprintf(bf, size, "%s", sa->entries[idx]); 323 } 324 325 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, 326 struct syscall_arg *arg) 327 { 328 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg); 329 } 330 331 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 332 333 #if defined(__i386__) || defined(__x86_64__) 334 /* 335 * FIXME: Make this available to all arches as soon as the ioctl beautifier 336 * gets rewritten to support all arches. 337 */ 338 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size, 339 struct syscall_arg *arg) 340 { 341 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg); 342 } 343 344 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray 345 #endif /* defined(__i386__) || defined(__x86_64__) */ 346 347 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 348 struct syscall_arg *arg); 349 350 #define SCA_FD syscall_arg__scnprintf_fd 351 352 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, 353 struct syscall_arg *arg) 354 { 355 int fd = arg->val; 356 357 if (fd == AT_FDCWD) 358 return scnprintf(bf, size, "CWD"); 359 360 return syscall_arg__scnprintf_fd(bf, size, arg); 361 } 362 363 #define SCA_FDAT syscall_arg__scnprintf_fd_at 364 365 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 366 struct syscall_arg *arg); 367 368 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd 369 370 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, 371 struct syscall_arg *arg) 372 { 373 return scnprintf(bf, size, "%#lx", arg->val); 374 } 375 376 #define SCA_HEX syscall_arg__scnprintf_hex 377 378 static size_t syscall_arg__scnprintf_int(char *bf, size_t size, 379 struct syscall_arg *arg) 380 { 381 return scnprintf(bf, size, "%d", arg->val); 382 } 383 384 #define SCA_INT syscall_arg__scnprintf_int 385 386 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, 387 struct syscall_arg *arg) 388 { 389 int printed = 0, prot = arg->val; 390 391 if (prot == PROT_NONE) 392 return scnprintf(bf, size, "NONE"); 393 #define P_MMAP_PROT(n) \ 394 if (prot & PROT_##n) { \ 395 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 396 prot &= ~PROT_##n; \ 397 } 398 399 P_MMAP_PROT(EXEC); 400 P_MMAP_PROT(READ); 401 P_MMAP_PROT(WRITE); 402 #ifdef PROT_SEM 403 P_MMAP_PROT(SEM); 404 #endif 405 P_MMAP_PROT(GROWSDOWN); 406 P_MMAP_PROT(GROWSUP); 407 #undef P_MMAP_PROT 408 409 if (prot) 410 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); 411 412 return printed; 413 } 414 415 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot 416 417 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, 418 struct syscall_arg *arg) 419 { 420 int printed = 0, flags = arg->val; 421 422 #define P_MMAP_FLAG(n) \ 423 if (flags & MAP_##n) { \ 424 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 425 flags &= ~MAP_##n; \ 426 } 427 428 P_MMAP_FLAG(SHARED); 429 P_MMAP_FLAG(PRIVATE); 430 #ifdef MAP_32BIT 431 P_MMAP_FLAG(32BIT); 432 #endif 433 P_MMAP_FLAG(ANONYMOUS); 434 P_MMAP_FLAG(DENYWRITE); 435 P_MMAP_FLAG(EXECUTABLE); 436 P_MMAP_FLAG(FILE); 437 P_MMAP_FLAG(FIXED); 438 P_MMAP_FLAG(GROWSDOWN); 439 #ifdef MAP_HUGETLB 440 P_MMAP_FLAG(HUGETLB); 441 #endif 442 P_MMAP_FLAG(LOCKED); 443 P_MMAP_FLAG(NONBLOCK); 444 P_MMAP_FLAG(NORESERVE); 445 P_MMAP_FLAG(POPULATE); 446 P_MMAP_FLAG(STACK); 447 #ifdef MAP_UNINITIALIZED 448 P_MMAP_FLAG(UNINITIALIZED); 449 #endif 450 #undef P_MMAP_FLAG 451 452 if (flags) 453 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 454 455 return printed; 456 } 457 458 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags 459 460 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, 461 struct syscall_arg *arg) 462 { 463 int printed = 0, flags = arg->val; 464 465 #define P_MREMAP_FLAG(n) \ 466 if (flags & MREMAP_##n) { \ 467 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 468 flags &= ~MREMAP_##n; \ 469 } 470 471 P_MREMAP_FLAG(MAYMOVE); 472 #ifdef MREMAP_FIXED 473 P_MREMAP_FLAG(FIXED); 474 #endif 475 #undef P_MREMAP_FLAG 476 477 if (flags) 478 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 479 480 return printed; 481 } 482 483 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags 484 485 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, 486 struct syscall_arg *arg) 487 { 488 int behavior = arg->val; 489 490 switch (behavior) { 491 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) 492 P_MADV_BHV(NORMAL); 493 P_MADV_BHV(RANDOM); 494 P_MADV_BHV(SEQUENTIAL); 495 P_MADV_BHV(WILLNEED); 496 P_MADV_BHV(DONTNEED); 497 P_MADV_BHV(REMOVE); 498 P_MADV_BHV(DONTFORK); 499 P_MADV_BHV(DOFORK); 500 P_MADV_BHV(HWPOISON); 501 #ifdef MADV_SOFT_OFFLINE 502 P_MADV_BHV(SOFT_OFFLINE); 503 #endif 504 P_MADV_BHV(MERGEABLE); 505 P_MADV_BHV(UNMERGEABLE); 506 #ifdef MADV_HUGEPAGE 507 P_MADV_BHV(HUGEPAGE); 508 #endif 509 #ifdef MADV_NOHUGEPAGE 510 P_MADV_BHV(NOHUGEPAGE); 511 #endif 512 #ifdef MADV_DONTDUMP 513 P_MADV_BHV(DONTDUMP); 514 #endif 515 #ifdef MADV_DODUMP 516 P_MADV_BHV(DODUMP); 517 #endif 518 #undef P_MADV_PHV 519 default: break; 520 } 521 522 return scnprintf(bf, size, "%#x", behavior); 523 } 524 525 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior 526 527 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, 528 struct syscall_arg *arg) 529 { 530 int printed = 0, op = arg->val; 531 532 if (op == 0) 533 return scnprintf(bf, size, "NONE"); 534 #define P_CMD(cmd) \ 535 if ((op & LOCK_##cmd) == LOCK_##cmd) { \ 536 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \ 537 op &= ~LOCK_##cmd; \ 538 } 539 540 P_CMD(SH); 541 P_CMD(EX); 542 P_CMD(NB); 543 P_CMD(UN); 544 P_CMD(MAND); 545 P_CMD(RW); 546 P_CMD(READ); 547 P_CMD(WRITE); 548 #undef P_OP 549 550 if (op) 551 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op); 552 553 return printed; 554 } 555 556 #define SCA_FLOCK syscall_arg__scnprintf_flock 557 558 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) 559 { 560 enum syscall_futex_args { 561 SCF_UADDR = (1 << 0), 562 SCF_OP = (1 << 1), 563 SCF_VAL = (1 << 2), 564 SCF_TIMEOUT = (1 << 3), 565 SCF_UADDR2 = (1 << 4), 566 SCF_VAL3 = (1 << 5), 567 }; 568 int op = arg->val; 569 int cmd = op & FUTEX_CMD_MASK; 570 size_t printed = 0; 571 572 switch (cmd) { 573 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); 574 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 575 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 576 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 577 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break; 578 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break; 579 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break; 580 P_FUTEX_OP(WAKE_OP); break; 581 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 582 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 583 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 584 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break; 585 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break; 586 P_FUTEX_OP(WAIT_REQUEUE_PI); break; 587 default: printed = scnprintf(bf, size, "%#x", cmd); break; 588 } 589 590 if (op & FUTEX_PRIVATE_FLAG) 591 printed += scnprintf(bf + printed, size - printed, "|PRIV"); 592 593 if (op & FUTEX_CLOCK_REALTIME) 594 printed += scnprintf(bf + printed, size - printed, "|CLKRT"); 595 596 return printed; 597 } 598 599 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op 600 601 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 602 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); 603 604 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 605 static DEFINE_STRARRAY(itimers); 606 607 static const char *whences[] = { "SET", "CUR", "END", 608 #ifdef SEEK_DATA 609 "DATA", 610 #endif 611 #ifdef SEEK_HOLE 612 "HOLE", 613 #endif 614 }; 615 static DEFINE_STRARRAY(whences); 616 617 static const char *fcntl_cmds[] = { 618 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", 619 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64", 620 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX", 621 "F_GETOWNER_UIDS", 622 }; 623 static DEFINE_STRARRAY(fcntl_cmds); 624 625 static const char *rlimit_resources[] = { 626 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", 627 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", 628 "RTTIME", 629 }; 630 static DEFINE_STRARRAY(rlimit_resources); 631 632 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; 633 static DEFINE_STRARRAY(sighow); 634 635 static const char *clockid[] = { 636 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", 637 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", 638 }; 639 static DEFINE_STRARRAY(clockid); 640 641 static const char *socket_families[] = { 642 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", 643 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", 644 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", 645 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", 646 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", 647 "ALG", "NFC", "VSOCK", 648 }; 649 static DEFINE_STRARRAY(socket_families); 650 651 #ifndef SOCK_TYPE_MASK 652 #define SOCK_TYPE_MASK 0xf 653 #endif 654 655 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, 656 struct syscall_arg *arg) 657 { 658 size_t printed; 659 int type = arg->val, 660 flags = type & ~SOCK_TYPE_MASK; 661 662 type &= SOCK_TYPE_MASK; 663 /* 664 * Can't use a strarray, MIPS may override for ABI reasons. 665 */ 666 switch (type) { 667 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break; 668 P_SK_TYPE(STREAM); 669 P_SK_TYPE(DGRAM); 670 P_SK_TYPE(RAW); 671 P_SK_TYPE(RDM); 672 P_SK_TYPE(SEQPACKET); 673 P_SK_TYPE(DCCP); 674 P_SK_TYPE(PACKET); 675 #undef P_SK_TYPE 676 default: 677 printed = scnprintf(bf, size, "%#x", type); 678 } 679 680 #define P_SK_FLAG(n) \ 681 if (flags & SOCK_##n) { \ 682 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ 683 flags &= ~SOCK_##n; \ 684 } 685 686 P_SK_FLAG(CLOEXEC); 687 P_SK_FLAG(NONBLOCK); 688 #undef P_SK_FLAG 689 690 if (flags) 691 printed += scnprintf(bf + printed, size - printed, "|%#x", flags); 692 693 return printed; 694 } 695 696 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type 697 698 #ifndef MSG_PROBE 699 #define MSG_PROBE 0x10 700 #endif 701 #ifndef MSG_WAITFORONE 702 #define MSG_WAITFORONE 0x10000 703 #endif 704 #ifndef MSG_SENDPAGE_NOTLAST 705 #define MSG_SENDPAGE_NOTLAST 0x20000 706 #endif 707 #ifndef MSG_FASTOPEN 708 #define MSG_FASTOPEN 0x20000000 709 #endif 710 711 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, 712 struct syscall_arg *arg) 713 { 714 int printed = 0, flags = arg->val; 715 716 if (flags == 0) 717 return scnprintf(bf, size, "NONE"); 718 #define P_MSG_FLAG(n) \ 719 if (flags & MSG_##n) { \ 720 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 721 flags &= ~MSG_##n; \ 722 } 723 724 P_MSG_FLAG(OOB); 725 P_MSG_FLAG(PEEK); 726 P_MSG_FLAG(DONTROUTE); 727 P_MSG_FLAG(TRYHARD); 728 P_MSG_FLAG(CTRUNC); 729 P_MSG_FLAG(PROBE); 730 P_MSG_FLAG(TRUNC); 731 P_MSG_FLAG(DONTWAIT); 732 P_MSG_FLAG(EOR); 733 P_MSG_FLAG(WAITALL); 734 P_MSG_FLAG(FIN); 735 P_MSG_FLAG(SYN); 736 P_MSG_FLAG(CONFIRM); 737 P_MSG_FLAG(RST); 738 P_MSG_FLAG(ERRQUEUE); 739 P_MSG_FLAG(NOSIGNAL); 740 P_MSG_FLAG(MORE); 741 P_MSG_FLAG(WAITFORONE); 742 P_MSG_FLAG(SENDPAGE_NOTLAST); 743 P_MSG_FLAG(FASTOPEN); 744 P_MSG_FLAG(CMSG_CLOEXEC); 745 #undef P_MSG_FLAG 746 747 if (flags) 748 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 749 750 return printed; 751 } 752 753 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags 754 755 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, 756 struct syscall_arg *arg) 757 { 758 size_t printed = 0; 759 int mode = arg->val; 760 761 if (mode == F_OK) /* 0 */ 762 return scnprintf(bf, size, "F"); 763 #define P_MODE(n) \ 764 if (mode & n##_OK) { \ 765 printed += scnprintf(bf + printed, size - printed, "%s", #n); \ 766 mode &= ~n##_OK; \ 767 } 768 769 P_MODE(R); 770 P_MODE(W); 771 P_MODE(X); 772 #undef P_MODE 773 774 if (mode) 775 printed += scnprintf(bf + printed, size - printed, "|%#x", mode); 776 777 return printed; 778 } 779 780 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode 781 782 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, 783 struct syscall_arg *arg) 784 { 785 int printed = 0, flags = arg->val; 786 787 if (!(flags & O_CREAT)) 788 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ 789 790 if (flags == 0) 791 return scnprintf(bf, size, "RDONLY"); 792 #define P_FLAG(n) \ 793 if (flags & O_##n) { \ 794 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 795 flags &= ~O_##n; \ 796 } 797 798 P_FLAG(APPEND); 799 P_FLAG(ASYNC); 800 P_FLAG(CLOEXEC); 801 P_FLAG(CREAT); 802 P_FLAG(DIRECT); 803 P_FLAG(DIRECTORY); 804 P_FLAG(EXCL); 805 P_FLAG(LARGEFILE); 806 P_FLAG(NOATIME); 807 P_FLAG(NOCTTY); 808 #ifdef O_NONBLOCK 809 P_FLAG(NONBLOCK); 810 #elif O_NDELAY 811 P_FLAG(NDELAY); 812 #endif 813 #ifdef O_PATH 814 P_FLAG(PATH); 815 #endif 816 P_FLAG(RDWR); 817 #ifdef O_DSYNC 818 if ((flags & O_SYNC) == O_SYNC) 819 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); 820 else { 821 P_FLAG(DSYNC); 822 } 823 #else 824 P_FLAG(SYNC); 825 #endif 826 P_FLAG(TRUNC); 827 P_FLAG(WRONLY); 828 #undef P_FLAG 829 830 if (flags) 831 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 832 833 return printed; 834 } 835 836 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags 837 838 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size, 839 struct syscall_arg *arg) 840 { 841 int printed = 0, flags = arg->val; 842 843 if (flags == 0) 844 return 0; 845 846 #define P_FLAG(n) \ 847 if (flags & PERF_FLAG_##n) { \ 848 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 849 flags &= ~PERF_FLAG_##n; \ 850 } 851 852 P_FLAG(FD_NO_GROUP); 853 P_FLAG(FD_OUTPUT); 854 P_FLAG(PID_CGROUP); 855 P_FLAG(FD_CLOEXEC); 856 #undef P_FLAG 857 858 if (flags) 859 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 860 861 return printed; 862 } 863 864 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags 865 866 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, 867 struct syscall_arg *arg) 868 { 869 int printed = 0, flags = arg->val; 870 871 if (flags == 0) 872 return scnprintf(bf, size, "NONE"); 873 #define P_FLAG(n) \ 874 if (flags & EFD_##n) { \ 875 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 876 flags &= ~EFD_##n; \ 877 } 878 879 P_FLAG(SEMAPHORE); 880 P_FLAG(CLOEXEC); 881 P_FLAG(NONBLOCK); 882 #undef P_FLAG 883 884 if (flags) 885 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 886 887 return printed; 888 } 889 890 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags 891 892 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, 893 struct syscall_arg *arg) 894 { 895 int printed = 0, flags = arg->val; 896 897 #define P_FLAG(n) \ 898 if (flags & O_##n) { \ 899 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 900 flags &= ~O_##n; \ 901 } 902 903 P_FLAG(CLOEXEC); 904 P_FLAG(NONBLOCK); 905 #undef P_FLAG 906 907 if (flags) 908 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 909 910 return printed; 911 } 912 913 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags 914 915 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) 916 { 917 int sig = arg->val; 918 919 switch (sig) { 920 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n) 921 P_SIGNUM(HUP); 922 P_SIGNUM(INT); 923 P_SIGNUM(QUIT); 924 P_SIGNUM(ILL); 925 P_SIGNUM(TRAP); 926 P_SIGNUM(ABRT); 927 P_SIGNUM(BUS); 928 P_SIGNUM(FPE); 929 P_SIGNUM(KILL); 930 P_SIGNUM(USR1); 931 P_SIGNUM(SEGV); 932 P_SIGNUM(USR2); 933 P_SIGNUM(PIPE); 934 P_SIGNUM(ALRM); 935 P_SIGNUM(TERM); 936 P_SIGNUM(CHLD); 937 P_SIGNUM(CONT); 938 P_SIGNUM(STOP); 939 P_SIGNUM(TSTP); 940 P_SIGNUM(TTIN); 941 P_SIGNUM(TTOU); 942 P_SIGNUM(URG); 943 P_SIGNUM(XCPU); 944 P_SIGNUM(XFSZ); 945 P_SIGNUM(VTALRM); 946 P_SIGNUM(PROF); 947 P_SIGNUM(WINCH); 948 P_SIGNUM(IO); 949 P_SIGNUM(PWR); 950 P_SIGNUM(SYS); 951 #ifdef SIGEMT 952 P_SIGNUM(EMT); 953 #endif 954 #ifdef SIGSTKFLT 955 P_SIGNUM(STKFLT); 956 #endif 957 #ifdef SIGSWI 958 P_SIGNUM(SWI); 959 #endif 960 default: break; 961 } 962 963 return scnprintf(bf, size, "%#x", sig); 964 } 965 966 #define SCA_SIGNUM syscall_arg__scnprintf_signum 967 968 #if defined(__i386__) || defined(__x86_64__) 969 /* 970 * FIXME: Make this available to all arches. 971 */ 972 #define TCGETS 0x5401 973 974 static const char *tioctls[] = { 975 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW", 976 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL", 977 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI", 978 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC", 979 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX", 980 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO", 981 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK", 982 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2", 983 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", 984 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", 985 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL", 986 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", 987 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", 988 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", 989 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE", 990 }; 991 992 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401); 993 #endif /* defined(__i386__) || defined(__x86_64__) */ 994 995 #define STRARRAY(arg, name, array) \ 996 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ 997 .arg_parm = { [arg] = &strarray__##array, } 998 999 static struct syscall_fmt { 1000 const char *name; 1001 const char *alias; 1002 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); 1003 void *arg_parm[6]; 1004 bool errmsg; 1005 bool timeout; 1006 bool hexret; 1007 } syscall_fmts[] = { 1008 { .name = "access", .errmsg = true, 1009 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, 1010 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, 1011 { .name = "brk", .hexret = true, 1012 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, 1013 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, 1014 { .name = "close", .errmsg = true, 1015 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 1016 { .name = "connect", .errmsg = true, }, 1017 { .name = "dup", .errmsg = true, 1018 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1019 { .name = "dup2", .errmsg = true, 1020 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1021 { .name = "dup3", .errmsg = true, 1022 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1023 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, 1024 { .name = "eventfd2", .errmsg = true, 1025 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, 1026 { .name = "faccessat", .errmsg = true, 1027 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1028 { .name = "fadvise64", .errmsg = true, 1029 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1030 { .name = "fallocate", .errmsg = true, 1031 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1032 { .name = "fchdir", .errmsg = true, 1033 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1034 { .name = "fchmod", .errmsg = true, 1035 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1036 { .name = "fchmodat", .errmsg = true, 1037 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1038 { .name = "fchown", .errmsg = true, 1039 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1040 { .name = "fchownat", .errmsg = true, 1041 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1042 { .name = "fcntl", .errmsg = true, 1043 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1044 [1] = SCA_STRARRAY, /* cmd */ }, 1045 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, 1046 { .name = "fdatasync", .errmsg = true, 1047 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1048 { .name = "flock", .errmsg = true, 1049 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1050 [1] = SCA_FLOCK, /* cmd */ }, }, 1051 { .name = "fsetxattr", .errmsg = true, 1052 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1053 { .name = "fstat", .errmsg = true, .alias = "newfstat", 1054 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1055 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", 1056 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1057 { .name = "fstatfs", .errmsg = true, 1058 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1059 { .name = "fsync", .errmsg = true, 1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1061 { .name = "ftruncate", .errmsg = true, 1062 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1063 { .name = "futex", .errmsg = true, 1064 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, 1065 { .name = "futimesat", .errmsg = true, 1066 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1067 { .name = "getdents", .errmsg = true, 1068 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1069 { .name = "getdents64", .errmsg = true, 1070 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1071 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 1072 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 1073 { .name = "ioctl", .errmsg = true, 1074 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1075 #if defined(__i386__) || defined(__x86_64__) 1076 /* 1077 * FIXME: Make this available to all arches. 1078 */ 1079 [1] = SCA_STRHEXARRAY, /* cmd */ 1080 [2] = SCA_HEX, /* arg */ }, 1081 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, }, 1082 #else 1083 [2] = SCA_HEX, /* arg */ }, }, 1084 #endif 1085 { .name = "kill", .errmsg = true, 1086 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1087 { .name = "linkat", .errmsg = true, 1088 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1089 { .name = "lseek", .errmsg = true, 1090 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1091 [2] = SCA_STRARRAY, /* whence */ }, 1092 .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, 1093 { .name = "lstat", .errmsg = true, .alias = "newlstat", }, 1094 { .name = "madvise", .errmsg = true, 1095 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 1096 [2] = SCA_MADV_BHV, /* behavior */ }, }, 1097 { .name = "mkdirat", .errmsg = true, 1098 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1099 { .name = "mknodat", .errmsg = true, 1100 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1101 { .name = "mlock", .errmsg = true, 1102 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1103 { .name = "mlockall", .errmsg = true, 1104 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1105 { .name = "mmap", .hexret = true, 1106 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 1107 [2] = SCA_MMAP_PROT, /* prot */ 1108 [3] = SCA_MMAP_FLAGS, /* flags */ 1109 [4] = SCA_FD, /* fd */ }, }, 1110 { .name = "mprotect", .errmsg = true, 1111 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 1112 [2] = SCA_MMAP_PROT, /* prot */ }, }, 1113 { .name = "mremap", .hexret = true, 1114 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 1115 [3] = SCA_MREMAP_FLAGS, /* flags */ 1116 [4] = SCA_HEX, /* new_addr */ }, }, 1117 { .name = "munlock", .errmsg = true, 1118 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1119 { .name = "munmap", .errmsg = true, 1120 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1121 { .name = "name_to_handle_at", .errmsg = true, 1122 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1123 { .name = "newfstatat", .errmsg = true, 1124 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1125 { .name = "open", .errmsg = true, 1126 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, 1127 { .name = "open_by_handle_at", .errmsg = true, 1128 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 1129 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 1130 { .name = "openat", .errmsg = true, 1131 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 1132 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 1133 { .name = "perf_event_open", .errmsg = true, 1134 .arg_scnprintf = { [1] = SCA_INT, /* pid */ 1135 [2] = SCA_INT, /* cpu */ 1136 [3] = SCA_FD, /* group_fd */ 1137 [4] = SCA_PERF_FLAGS, /* flags */ }, }, 1138 { .name = "pipe2", .errmsg = true, 1139 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, 1140 { .name = "poll", .errmsg = true, .timeout = true, }, 1141 { .name = "ppoll", .errmsg = true, .timeout = true, }, 1142 { .name = "pread", .errmsg = true, .alias = "pread64", 1143 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1144 { .name = "preadv", .errmsg = true, .alias = "pread", 1145 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1146 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, 1147 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", 1148 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1149 { .name = "pwritev", .errmsg = true, 1150 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1151 { .name = "read", .errmsg = true, 1152 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1153 { .name = "readlinkat", .errmsg = true, 1154 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1155 { .name = "readv", .errmsg = true, 1156 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1157 { .name = "recvfrom", .errmsg = true, 1158 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1159 { .name = "recvmmsg", .errmsg = true, 1160 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1161 { .name = "recvmsg", .errmsg = true, 1162 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1163 { .name = "renameat", .errmsg = true, 1164 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1165 { .name = "rt_sigaction", .errmsg = true, 1166 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, 1167 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, 1168 { .name = "rt_sigqueueinfo", .errmsg = true, 1169 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1170 { .name = "rt_tgsigqueueinfo", .errmsg = true, 1171 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1172 { .name = "select", .errmsg = true, .timeout = true, }, 1173 { .name = "sendmmsg", .errmsg = true, 1174 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1175 { .name = "sendmsg", .errmsg = true, 1176 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1177 { .name = "sendto", .errmsg = true, 1178 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1179 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 1180 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 1181 { .name = "shutdown", .errmsg = true, 1182 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1183 { .name = "socket", .errmsg = true, 1184 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1185 [1] = SCA_SK_TYPE, /* type */ }, 1186 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1187 { .name = "socketpair", .errmsg = true, 1188 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1189 [1] = SCA_SK_TYPE, /* type */ }, 1190 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1191 { .name = "stat", .errmsg = true, .alias = "newstat", }, 1192 { .name = "symlinkat", .errmsg = true, 1193 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1194 { .name = "tgkill", .errmsg = true, 1195 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1196 { .name = "tkill", .errmsg = true, 1197 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1198 { .name = "uname", .errmsg = true, .alias = "newuname", }, 1199 { .name = "unlinkat", .errmsg = true, 1200 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1201 { .name = "utimensat", .errmsg = true, 1202 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, 1203 { .name = "write", .errmsg = true, 1204 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1205 { .name = "writev", .errmsg = true, 1206 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1207 }; 1208 1209 static int syscall_fmt__cmp(const void *name, const void *fmtp) 1210 { 1211 const struct syscall_fmt *fmt = fmtp; 1212 return strcmp(name, fmt->name); 1213 } 1214 1215 static struct syscall_fmt *syscall_fmt__find(const char *name) 1216 { 1217 const int nmemb = ARRAY_SIZE(syscall_fmts); 1218 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 1219 } 1220 1221 struct syscall { 1222 struct event_format *tp_format; 1223 int nr_args; 1224 struct format_field *args; 1225 const char *name; 1226 bool filtered; 1227 bool is_exit; 1228 struct syscall_fmt *fmt; 1229 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 1230 void **arg_parm; 1231 }; 1232 1233 static size_t fprintf_duration(unsigned long t, FILE *fp) 1234 { 1235 double duration = (double)t / NSEC_PER_MSEC; 1236 size_t printed = fprintf(fp, "("); 1237 1238 if (duration >= 1.0) 1239 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 1240 else if (duration >= 0.01) 1241 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 1242 else 1243 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 1244 return printed + fprintf(fp, "): "); 1245 } 1246 1247 struct thread_trace { 1248 u64 entry_time; 1249 u64 exit_time; 1250 bool entry_pending; 1251 unsigned long nr_events; 1252 unsigned long pfmaj, pfmin; 1253 char *entry_str; 1254 double runtime_ms; 1255 struct { 1256 int max; 1257 char **table; 1258 } paths; 1259 1260 struct intlist *syscall_stats; 1261 }; 1262 1263 static struct thread_trace *thread_trace__new(void) 1264 { 1265 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); 1266 1267 if (ttrace) 1268 ttrace->paths.max = -1; 1269 1270 ttrace->syscall_stats = intlist__new(NULL); 1271 1272 return ttrace; 1273 } 1274 1275 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 1276 { 1277 struct thread_trace *ttrace; 1278 1279 if (thread == NULL) 1280 goto fail; 1281 1282 if (thread__priv(thread) == NULL) 1283 thread__set_priv(thread, thread_trace__new()); 1284 1285 if (thread__priv(thread) == NULL) 1286 goto fail; 1287 1288 ttrace = thread__priv(thread); 1289 ++ttrace->nr_events; 1290 1291 return ttrace; 1292 fail: 1293 color_fprintf(fp, PERF_COLOR_RED, 1294 "WARNING: not enough memory, dropping samples!\n"); 1295 return NULL; 1296 } 1297 1298 #define TRACE_PFMAJ (1 << 0) 1299 #define TRACE_PFMIN (1 << 1) 1300 1301 struct trace { 1302 struct perf_tool tool; 1303 struct { 1304 int machine; 1305 int open_id; 1306 } audit; 1307 struct { 1308 int max; 1309 struct syscall *table; 1310 } syscalls; 1311 struct record_opts opts; 1312 struct perf_evlist *evlist; 1313 struct machine *host; 1314 struct thread *current; 1315 u64 base_time; 1316 FILE *output; 1317 unsigned long nr_events; 1318 struct strlist *ev_qualifier; 1319 const char *last_vfs_getname; 1320 struct intlist *tid_list; 1321 struct intlist *pid_list; 1322 struct { 1323 size_t nr; 1324 pid_t *entries; 1325 } filter_pids; 1326 double duration_filter; 1327 double runtime_ms; 1328 struct { 1329 u64 vfs_getname, 1330 proc_getname; 1331 } stats; 1332 bool not_ev_qualifier; 1333 bool live; 1334 bool full_time; 1335 bool sched; 1336 bool multiple_threads; 1337 bool summary; 1338 bool summary_only; 1339 bool show_comm; 1340 bool show_tool_stats; 1341 bool trace_syscalls; 1342 bool force; 1343 int trace_pgfaults; 1344 }; 1345 1346 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) 1347 { 1348 struct thread_trace *ttrace = thread__priv(thread); 1349 1350 if (fd > ttrace->paths.max) { 1351 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); 1352 1353 if (npath == NULL) 1354 return -1; 1355 1356 if (ttrace->paths.max != -1) { 1357 memset(npath + ttrace->paths.max + 1, 0, 1358 (fd - ttrace->paths.max) * sizeof(char *)); 1359 } else { 1360 memset(npath, 0, (fd + 1) * sizeof(char *)); 1361 } 1362 1363 ttrace->paths.table = npath; 1364 ttrace->paths.max = fd; 1365 } 1366 1367 ttrace->paths.table[fd] = strdup(pathname); 1368 1369 return ttrace->paths.table[fd] != NULL ? 0 : -1; 1370 } 1371 1372 static int thread__read_fd_path(struct thread *thread, int fd) 1373 { 1374 char linkname[PATH_MAX], pathname[PATH_MAX]; 1375 struct stat st; 1376 int ret; 1377 1378 if (thread->pid_ == thread->tid) { 1379 scnprintf(linkname, sizeof(linkname), 1380 "/proc/%d/fd/%d", thread->pid_, fd); 1381 } else { 1382 scnprintf(linkname, sizeof(linkname), 1383 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); 1384 } 1385 1386 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) 1387 return -1; 1388 1389 ret = readlink(linkname, pathname, sizeof(pathname)); 1390 1391 if (ret < 0 || ret > st.st_size) 1392 return -1; 1393 1394 pathname[ret] = '\0'; 1395 return trace__set_fd_pathname(thread, fd, pathname); 1396 } 1397 1398 static const char *thread__fd_path(struct thread *thread, int fd, 1399 struct trace *trace) 1400 { 1401 struct thread_trace *ttrace = thread__priv(thread); 1402 1403 if (ttrace == NULL) 1404 return NULL; 1405 1406 if (fd < 0) 1407 return NULL; 1408 1409 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) { 1410 if (!trace->live) 1411 return NULL; 1412 ++trace->stats.proc_getname; 1413 if (thread__read_fd_path(thread, fd)) 1414 return NULL; 1415 } 1416 1417 return ttrace->paths.table[fd]; 1418 } 1419 1420 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 1421 struct syscall_arg *arg) 1422 { 1423 int fd = arg->val; 1424 size_t printed = scnprintf(bf, size, "%d", fd); 1425 const char *path = thread__fd_path(arg->thread, fd, arg->trace); 1426 1427 if (path) 1428 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 1429 1430 return printed; 1431 } 1432 1433 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 1434 struct syscall_arg *arg) 1435 { 1436 int fd = arg->val; 1437 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); 1438 struct thread_trace *ttrace = thread__priv(arg->thread); 1439 1440 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) 1441 zfree(&ttrace->paths.table[fd]); 1442 1443 return printed; 1444 } 1445 1446 static bool trace__filter_duration(struct trace *trace, double t) 1447 { 1448 return t < (trace->duration_filter * NSEC_PER_MSEC); 1449 } 1450 1451 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1452 { 1453 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 1454 1455 return fprintf(fp, "%10.3f ", ts); 1456 } 1457 1458 static bool done = false; 1459 static bool interrupted = false; 1460 1461 static void sig_handler(int sig) 1462 { 1463 done = true; 1464 interrupted = sig == SIGINT; 1465 } 1466 1467 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 1468 u64 duration, u64 tstamp, FILE *fp) 1469 { 1470 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 1471 printed += fprintf_duration(duration, fp); 1472 1473 if (trace->multiple_threads) { 1474 if (trace->show_comm) 1475 printed += fprintf(fp, "%.14s/", thread__comm_str(thread)); 1476 printed += fprintf(fp, "%d ", thread->tid); 1477 } 1478 1479 return printed; 1480 } 1481 1482 static int trace__process_event(struct trace *trace, struct machine *machine, 1483 union perf_event *event, struct perf_sample *sample) 1484 { 1485 int ret = 0; 1486 1487 switch (event->header.type) { 1488 case PERF_RECORD_LOST: 1489 color_fprintf(trace->output, PERF_COLOR_RED, 1490 "LOST %" PRIu64 " events!\n", event->lost.lost); 1491 ret = machine__process_lost_event(machine, event, sample); 1492 default: 1493 ret = machine__process_event(machine, event, sample); 1494 break; 1495 } 1496 1497 return ret; 1498 } 1499 1500 static int trace__tool_process(struct perf_tool *tool, 1501 union perf_event *event, 1502 struct perf_sample *sample, 1503 struct machine *machine) 1504 { 1505 struct trace *trace = container_of(tool, struct trace, tool); 1506 return trace__process_event(trace, machine, event, sample); 1507 } 1508 1509 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 1510 { 1511 int err = symbol__init(NULL); 1512 1513 if (err) 1514 return err; 1515 1516 trace->host = machine__new_host(); 1517 if (trace->host == NULL) 1518 return -ENOMEM; 1519 1520 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1521 evlist->threads, trace__tool_process, false, 1522 trace->opts.proc_map_timeout); 1523 if (err) 1524 symbol__exit(); 1525 1526 return err; 1527 } 1528 1529 static int syscall__set_arg_fmts(struct syscall *sc) 1530 { 1531 struct format_field *field; 1532 int idx = 0; 1533 1534 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *)); 1535 if (sc->arg_scnprintf == NULL) 1536 return -1; 1537 1538 if (sc->fmt) 1539 sc->arg_parm = sc->fmt->arg_parm; 1540 1541 for (field = sc->args; field; field = field->next) { 1542 if (sc->fmt && sc->fmt->arg_scnprintf[idx]) 1543 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; 1544 else if (field->flags & FIELD_IS_POINTER) 1545 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; 1546 ++idx; 1547 } 1548 1549 return 0; 1550 } 1551 1552 static int trace__read_syscall_info(struct trace *trace, int id) 1553 { 1554 char tp_name[128]; 1555 struct syscall *sc; 1556 const char *name = audit_syscall_to_name(id, trace->audit.machine); 1557 1558 if (name == NULL) 1559 return -1; 1560 1561 if (id > trace->syscalls.max) { 1562 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1563 1564 if (nsyscalls == NULL) 1565 return -1; 1566 1567 if (trace->syscalls.max != -1) { 1568 memset(nsyscalls + trace->syscalls.max + 1, 0, 1569 (id - trace->syscalls.max) * sizeof(*sc)); 1570 } else { 1571 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 1572 } 1573 1574 trace->syscalls.table = nsyscalls; 1575 trace->syscalls.max = id; 1576 } 1577 1578 sc = trace->syscalls.table + id; 1579 sc->name = name; 1580 1581 if (trace->ev_qualifier) { 1582 bool in = strlist__find(trace->ev_qualifier, name) != NULL; 1583 1584 if (!(in ^ trace->not_ev_qualifier)) { 1585 sc->filtered = true; 1586 /* 1587 * No need to do read tracepoint information since this will be 1588 * filtered out. 1589 */ 1590 return 0; 1591 } 1592 } 1593 1594 sc->fmt = syscall_fmt__find(sc->name); 1595 1596 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1597 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1598 1599 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { 1600 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 1601 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1602 } 1603 1604 if (sc->tp_format == NULL) 1605 return -1; 1606 1607 sc->args = sc->tp_format->format.fields; 1608 sc->nr_args = sc->tp_format->format.nr_fields; 1609 /* drop nr field - not relevant here; does not exist on older kernels */ 1610 if (sc->args && strcmp(sc->args->name, "nr") == 0) { 1611 sc->args = sc->args->next; 1612 --sc->nr_args; 1613 } 1614 1615 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); 1616 1617 return syscall__set_arg_fmts(sc); 1618 } 1619 1620 /* 1621 * args is to be interpreted as a series of longs but we need to handle 1622 * 8-byte unaligned accesses. args points to raw_data within the event 1623 * and raw_data is guaranteed to be 8-byte unaligned because it is 1624 * preceded by raw_size which is a u32. So we need to copy args to a temp 1625 * variable to read it. Most notably this avoids extended load instructions 1626 * on unaligned addresses 1627 */ 1628 1629 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 1630 unsigned char *args, struct trace *trace, 1631 struct thread *thread) 1632 { 1633 size_t printed = 0; 1634 unsigned char *p; 1635 unsigned long val; 1636 1637 if (sc->args != NULL) { 1638 struct format_field *field; 1639 u8 bit = 1; 1640 struct syscall_arg arg = { 1641 .idx = 0, 1642 .mask = 0, 1643 .trace = trace, 1644 .thread = thread, 1645 }; 1646 1647 for (field = sc->args; field; 1648 field = field->next, ++arg.idx, bit <<= 1) { 1649 if (arg.mask & bit) 1650 continue; 1651 1652 /* special care for unaligned accesses */ 1653 p = args + sizeof(unsigned long) * arg.idx; 1654 memcpy(&val, p, sizeof(val)); 1655 1656 /* 1657 * Suppress this argument if its value is zero and 1658 * and we don't have a string associated in an 1659 * strarray for it. 1660 */ 1661 if (val == 0 && 1662 !(sc->arg_scnprintf && 1663 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && 1664 sc->arg_parm[arg.idx])) 1665 continue; 1666 1667 printed += scnprintf(bf + printed, size - printed, 1668 "%s%s: ", printed ? ", " : "", field->name); 1669 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { 1670 arg.val = val; 1671 if (sc->arg_parm) 1672 arg.parm = sc->arg_parm[arg.idx]; 1673 printed += sc->arg_scnprintf[arg.idx](bf + printed, 1674 size - printed, &arg); 1675 } else { 1676 printed += scnprintf(bf + printed, size - printed, 1677 "%ld", val); 1678 } 1679 } 1680 } else { 1681 int i = 0; 1682 1683 while (i < 6) { 1684 /* special care for unaligned accesses */ 1685 p = args + sizeof(unsigned long) * i; 1686 memcpy(&val, p, sizeof(val)); 1687 printed += scnprintf(bf + printed, size - printed, 1688 "%sarg%d: %ld", 1689 printed ? ", " : "", i, val); 1690 ++i; 1691 } 1692 } 1693 1694 return printed; 1695 } 1696 1697 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 1698 union perf_event *event, 1699 struct perf_sample *sample); 1700 1701 static struct syscall *trace__syscall_info(struct trace *trace, 1702 struct perf_evsel *evsel, int id) 1703 { 1704 1705 if (id < 0) { 1706 1707 /* 1708 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 1709 * before that, leaving at a higher verbosity level till that is 1710 * explained. Reproduced with plain ftrace with: 1711 * 1712 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 1713 * grep "NR -1 " /t/trace_pipe 1714 * 1715 * After generating some load on the machine. 1716 */ 1717 if (verbose > 1) { 1718 static u64 n; 1719 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 1720 id, perf_evsel__name(evsel), ++n); 1721 } 1722 return NULL; 1723 } 1724 1725 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 1726 trace__read_syscall_info(trace, id)) 1727 goto out_cant_read; 1728 1729 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 1730 goto out_cant_read; 1731 1732 return &trace->syscalls.table[id]; 1733 1734 out_cant_read: 1735 if (verbose) { 1736 fprintf(trace->output, "Problems reading syscall %d", id); 1737 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 1738 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 1739 fputs(" information\n", trace->output); 1740 } 1741 return NULL; 1742 } 1743 1744 static void thread__update_stats(struct thread_trace *ttrace, 1745 int id, struct perf_sample *sample) 1746 { 1747 struct int_node *inode; 1748 struct stats *stats; 1749 u64 duration = 0; 1750 1751 inode = intlist__findnew(ttrace->syscall_stats, id); 1752 if (inode == NULL) 1753 return; 1754 1755 stats = inode->priv; 1756 if (stats == NULL) { 1757 stats = malloc(sizeof(struct stats)); 1758 if (stats == NULL) 1759 return; 1760 init_stats(stats); 1761 inode->priv = stats; 1762 } 1763 1764 if (ttrace->entry_time && sample->time > ttrace->entry_time) 1765 duration = sample->time - ttrace->entry_time; 1766 1767 update_stats(stats, duration); 1768 } 1769 1770 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample) 1771 { 1772 struct thread_trace *ttrace; 1773 u64 duration; 1774 size_t printed; 1775 1776 if (trace->current == NULL) 1777 return 0; 1778 1779 ttrace = thread__priv(trace->current); 1780 1781 if (!ttrace->entry_pending) 1782 return 0; 1783 1784 duration = sample->time - ttrace->entry_time; 1785 1786 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output); 1787 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); 1788 ttrace->entry_pending = false; 1789 1790 return printed; 1791 } 1792 1793 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 1794 union perf_event *event __maybe_unused, 1795 struct perf_sample *sample) 1796 { 1797 char *msg; 1798 void *args; 1799 size_t printed = 0; 1800 struct thread *thread; 1801 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1802 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1803 struct thread_trace *ttrace; 1804 1805 if (sc == NULL) 1806 return -1; 1807 1808 if (sc->filtered) 1809 return 0; 1810 1811 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1812 ttrace = thread__trace(thread, trace->output); 1813 if (ttrace == NULL) 1814 goto out_put; 1815 1816 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1817 1818 if (ttrace->entry_str == NULL) { 1819 ttrace->entry_str = malloc(1024); 1820 if (!ttrace->entry_str) 1821 goto out_put; 1822 } 1823 1824 if (!trace->summary_only) 1825 trace__printf_interrupted_entry(trace, sample); 1826 1827 ttrace->entry_time = sample->time; 1828 msg = ttrace->entry_str; 1829 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); 1830 1831 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, 1832 args, trace, thread); 1833 1834 if (sc->is_exit) { 1835 if (!trace->duration_filter && !trace->summary_only) { 1836 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); 1837 fprintf(trace->output, "%-70s\n", ttrace->entry_str); 1838 } 1839 } else 1840 ttrace->entry_pending = true; 1841 1842 if (trace->current != thread) { 1843 thread__put(trace->current); 1844 trace->current = thread__get(thread); 1845 } 1846 err = 0; 1847 out_put: 1848 thread__put(thread); 1849 return err; 1850 } 1851 1852 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1853 union perf_event *event __maybe_unused, 1854 struct perf_sample *sample) 1855 { 1856 long ret; 1857 u64 duration = 0; 1858 struct thread *thread; 1859 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1860 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1861 struct thread_trace *ttrace; 1862 1863 if (sc == NULL) 1864 return -1; 1865 1866 if (sc->filtered) 1867 return 0; 1868 1869 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1870 ttrace = thread__trace(thread, trace->output); 1871 if (ttrace == NULL) 1872 goto out_put; 1873 1874 if (trace->summary) 1875 thread__update_stats(ttrace, id, sample); 1876 1877 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 1878 1879 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { 1880 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); 1881 trace->last_vfs_getname = NULL; 1882 ++trace->stats.vfs_getname; 1883 } 1884 1885 ttrace->exit_time = sample->time; 1886 1887 if (ttrace->entry_time) { 1888 duration = sample->time - ttrace->entry_time; 1889 if (trace__filter_duration(trace, duration)) 1890 goto out; 1891 } else if (trace->duration_filter) 1892 goto out; 1893 1894 if (trace->summary_only) 1895 goto out; 1896 1897 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); 1898 1899 if (ttrace->entry_pending) { 1900 fprintf(trace->output, "%-70s", ttrace->entry_str); 1901 } else { 1902 fprintf(trace->output, " ... ["); 1903 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 1904 fprintf(trace->output, "]: %s()", sc->name); 1905 } 1906 1907 if (sc->fmt == NULL) { 1908 signed_print: 1909 fprintf(trace->output, ") = %ld", ret); 1910 } else if (ret < 0 && sc->fmt->errmsg) { 1911 char bf[STRERR_BUFSIZE]; 1912 const char *emsg = strerror_r(-ret, bf, sizeof(bf)), 1913 *e = audit_errno_to_name(-ret); 1914 1915 fprintf(trace->output, ") = -1 %s %s", e, emsg); 1916 } else if (ret == 0 && sc->fmt->timeout) 1917 fprintf(trace->output, ") = 0 Timeout"); 1918 else if (sc->fmt->hexret) 1919 fprintf(trace->output, ") = %#lx", ret); 1920 else 1921 goto signed_print; 1922 1923 fputc('\n', trace->output); 1924 out: 1925 ttrace->entry_pending = false; 1926 err = 0; 1927 out_put: 1928 thread__put(thread); 1929 return err; 1930 } 1931 1932 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 1933 union perf_event *event __maybe_unused, 1934 struct perf_sample *sample) 1935 { 1936 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); 1937 return 0; 1938 } 1939 1940 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 1941 union perf_event *event __maybe_unused, 1942 struct perf_sample *sample) 1943 { 1944 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1945 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 1946 struct thread *thread = machine__findnew_thread(trace->host, 1947 sample->pid, 1948 sample->tid); 1949 struct thread_trace *ttrace = thread__trace(thread, trace->output); 1950 1951 if (ttrace == NULL) 1952 goto out_dump; 1953 1954 ttrace->runtime_ms += runtime_ms; 1955 trace->runtime_ms += runtime_ms; 1956 thread__put(thread); 1957 return 0; 1958 1959 out_dump: 1960 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 1961 evsel->name, 1962 perf_evsel__strval(evsel, sample, "comm"), 1963 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1964 runtime, 1965 perf_evsel__intval(evsel, sample, "vruntime")); 1966 thread__put(thread); 1967 return 0; 1968 } 1969 1970 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, 1971 union perf_event *event __maybe_unused, 1972 struct perf_sample *sample) 1973 { 1974 trace__printf_interrupted_entry(trace, sample); 1975 trace__fprintf_tstamp(trace, sample->time, trace->output); 1976 1977 if (trace->trace_syscalls) 1978 fprintf(trace->output, "( ): "); 1979 1980 fprintf(trace->output, "%s:", evsel->name); 1981 1982 if (evsel->tp_format) { 1983 event_format__fprintf(evsel->tp_format, sample->cpu, 1984 sample->raw_data, sample->raw_size, 1985 trace->output); 1986 } 1987 1988 fprintf(trace->output, ")\n"); 1989 return 0; 1990 } 1991 1992 static void print_location(FILE *f, struct perf_sample *sample, 1993 struct addr_location *al, 1994 bool print_dso, bool print_sym) 1995 { 1996 1997 if ((verbose || print_dso) && al->map) 1998 fprintf(f, "%s@", al->map->dso->long_name); 1999 2000 if ((verbose || print_sym) && al->sym) 2001 fprintf(f, "%s+0x%" PRIx64, al->sym->name, 2002 al->addr - al->sym->start); 2003 else if (al->map) 2004 fprintf(f, "0x%" PRIx64, al->addr); 2005 else 2006 fprintf(f, "0x%" PRIx64, sample->addr); 2007 } 2008 2009 static int trace__pgfault(struct trace *trace, 2010 struct perf_evsel *evsel, 2011 union perf_event *event, 2012 struct perf_sample *sample) 2013 { 2014 struct thread *thread; 2015 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 2016 struct addr_location al; 2017 char map_type = 'd'; 2018 struct thread_trace *ttrace; 2019 int err = -1; 2020 2021 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 2022 ttrace = thread__trace(thread, trace->output); 2023 if (ttrace == NULL) 2024 goto out_put; 2025 2026 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) 2027 ttrace->pfmaj++; 2028 else 2029 ttrace->pfmin++; 2030 2031 if (trace->summary_only) 2032 goto out; 2033 2034 thread__find_addr_location(thread, cpumode, MAP__FUNCTION, 2035 sample->ip, &al); 2036 2037 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); 2038 2039 fprintf(trace->output, "%sfault [", 2040 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? 2041 "maj" : "min"); 2042 2043 print_location(trace->output, sample, &al, false, true); 2044 2045 fprintf(trace->output, "] => "); 2046 2047 thread__find_addr_location(thread, cpumode, MAP__VARIABLE, 2048 sample->addr, &al); 2049 2050 if (!al.map) { 2051 thread__find_addr_location(thread, cpumode, 2052 MAP__FUNCTION, sample->addr, &al); 2053 2054 if (al.map) 2055 map_type = 'x'; 2056 else 2057 map_type = '?'; 2058 } 2059 2060 print_location(trace->output, sample, &al, true, false); 2061 2062 fprintf(trace->output, " (%c%c)\n", map_type, al.level); 2063 out: 2064 err = 0; 2065 out_put: 2066 thread__put(thread); 2067 return err; 2068 } 2069 2070 static bool skip_sample(struct trace *trace, struct perf_sample *sample) 2071 { 2072 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || 2073 (trace->tid_list && intlist__find(trace->tid_list, sample->tid))) 2074 return false; 2075 2076 if (trace->pid_list || trace->tid_list) 2077 return true; 2078 2079 return false; 2080 } 2081 2082 static int trace__process_sample(struct perf_tool *tool, 2083 union perf_event *event, 2084 struct perf_sample *sample, 2085 struct perf_evsel *evsel, 2086 struct machine *machine __maybe_unused) 2087 { 2088 struct trace *trace = container_of(tool, struct trace, tool); 2089 int err = 0; 2090 2091 tracepoint_handler handler = evsel->handler; 2092 2093 if (skip_sample(trace, sample)) 2094 return 0; 2095 2096 if (!trace->full_time && trace->base_time == 0) 2097 trace->base_time = sample->time; 2098 2099 if (handler) { 2100 ++trace->nr_events; 2101 handler(trace, evsel, event, sample); 2102 } 2103 2104 return err; 2105 } 2106 2107 static int parse_target_str(struct trace *trace) 2108 { 2109 if (trace->opts.target.pid) { 2110 trace->pid_list = intlist__new(trace->opts.target.pid); 2111 if (trace->pid_list == NULL) { 2112 pr_err("Error parsing process id string\n"); 2113 return -EINVAL; 2114 } 2115 } 2116 2117 if (trace->opts.target.tid) { 2118 trace->tid_list = intlist__new(trace->opts.target.tid); 2119 if (trace->tid_list == NULL) { 2120 pr_err("Error parsing thread id string\n"); 2121 return -EINVAL; 2122 } 2123 } 2124 2125 return 0; 2126 } 2127 2128 static int trace__record(struct trace *trace, int argc, const char **argv) 2129 { 2130 unsigned int rec_argc, i, j; 2131 const char **rec_argv; 2132 const char * const record_args[] = { 2133 "record", 2134 "-R", 2135 "-m", "1024", 2136 "-c", "1", 2137 }; 2138 2139 const char * const sc_args[] = { "-e", }; 2140 unsigned int sc_args_nr = ARRAY_SIZE(sc_args); 2141 const char * const majpf_args[] = { "-e", "major-faults" }; 2142 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args); 2143 const char * const minpf_args[] = { "-e", "minor-faults" }; 2144 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args); 2145 2146 /* +1 is for the event string below */ 2147 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 + 2148 majpf_args_nr + minpf_args_nr + argc; 2149 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2150 2151 if (rec_argv == NULL) 2152 return -ENOMEM; 2153 2154 j = 0; 2155 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2156 rec_argv[j++] = record_args[i]; 2157 2158 if (trace->trace_syscalls) { 2159 for (i = 0; i < sc_args_nr; i++) 2160 rec_argv[j++] = sc_args[i]; 2161 2162 /* event string may be different for older kernels - e.g., RHEL6 */ 2163 if (is_valid_tracepoint("raw_syscalls:sys_enter")) 2164 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; 2165 else if (is_valid_tracepoint("syscalls:sys_enter")) 2166 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; 2167 else { 2168 pr_err("Neither raw_syscalls nor syscalls events exist.\n"); 2169 return -1; 2170 } 2171 } 2172 2173 if (trace->trace_pgfaults & TRACE_PFMAJ) 2174 for (i = 0; i < majpf_args_nr; i++) 2175 rec_argv[j++] = majpf_args[i]; 2176 2177 if (trace->trace_pgfaults & TRACE_PFMIN) 2178 for (i = 0; i < minpf_args_nr; i++) 2179 rec_argv[j++] = minpf_args[i]; 2180 2181 for (i = 0; i < (unsigned int)argc; i++) 2182 rec_argv[j++] = argv[i]; 2183 2184 return cmd_record(j, rec_argv, NULL); 2185 } 2186 2187 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 2188 2189 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 2190 { 2191 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); 2192 if (evsel == NULL) 2193 return; 2194 2195 if (perf_evsel__field(evsel, "pathname") == NULL) { 2196 perf_evsel__delete(evsel); 2197 return; 2198 } 2199 2200 evsel->handler = trace__vfs_getname; 2201 perf_evlist__add(evlist, evsel); 2202 } 2203 2204 static int perf_evlist__add_pgfault(struct perf_evlist *evlist, 2205 u64 config) 2206 { 2207 struct perf_evsel *evsel; 2208 struct perf_event_attr attr = { 2209 .type = PERF_TYPE_SOFTWARE, 2210 .mmap_data = 1, 2211 }; 2212 2213 attr.config = config; 2214 attr.sample_period = 1; 2215 2216 event_attr_init(&attr); 2217 2218 evsel = perf_evsel__new(&attr); 2219 if (!evsel) 2220 return -ENOMEM; 2221 2222 evsel->handler = trace__pgfault; 2223 perf_evlist__add(evlist, evsel); 2224 2225 return 0; 2226 } 2227 2228 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) 2229 { 2230 const u32 type = event->header.type; 2231 struct perf_evsel *evsel; 2232 2233 if (!trace->full_time && trace->base_time == 0) 2234 trace->base_time = sample->time; 2235 2236 if (type != PERF_RECORD_SAMPLE) { 2237 trace__process_event(trace, trace->host, event, sample); 2238 return; 2239 } 2240 2241 evsel = perf_evlist__id2evsel(trace->evlist, sample->id); 2242 if (evsel == NULL) { 2243 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); 2244 return; 2245 } 2246 2247 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 2248 sample->raw_data == NULL) { 2249 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 2250 perf_evsel__name(evsel), sample->tid, 2251 sample->cpu, sample->raw_size); 2252 } else { 2253 tracepoint_handler handler = evsel->handler; 2254 handler(trace, evsel, event, sample); 2255 } 2256 } 2257 2258 static int trace__run(struct trace *trace, int argc, const char **argv) 2259 { 2260 struct perf_evlist *evlist = trace->evlist; 2261 int err = -1, i; 2262 unsigned long before; 2263 const bool forks = argc > 0; 2264 bool draining = false; 2265 2266 trace->live = true; 2267 2268 if (trace->trace_syscalls && 2269 perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, 2270 trace__sys_exit)) 2271 goto out_error_raw_syscalls; 2272 2273 if (trace->trace_syscalls) 2274 perf_evlist__add_vfs_getname(evlist); 2275 2276 if ((trace->trace_pgfaults & TRACE_PFMAJ) && 2277 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { 2278 goto out_error_mem; 2279 } 2280 2281 if ((trace->trace_pgfaults & TRACE_PFMIN) && 2282 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) 2283 goto out_error_mem; 2284 2285 if (trace->sched && 2286 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 2287 trace__sched_stat_runtime)) 2288 goto out_error_sched_stat_runtime; 2289 2290 err = perf_evlist__create_maps(evlist, &trace->opts.target); 2291 if (err < 0) { 2292 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 2293 goto out_delete_evlist; 2294 } 2295 2296 err = trace__symbols_init(trace, evlist); 2297 if (err < 0) { 2298 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 2299 goto out_delete_evlist; 2300 } 2301 2302 perf_evlist__config(evlist, &trace->opts); 2303 2304 signal(SIGCHLD, sig_handler); 2305 signal(SIGINT, sig_handler); 2306 2307 if (forks) { 2308 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 2309 argv, false, NULL); 2310 if (err < 0) { 2311 fprintf(trace->output, "Couldn't run the workload!\n"); 2312 goto out_delete_evlist; 2313 } 2314 } 2315 2316 err = perf_evlist__open(evlist); 2317 if (err < 0) 2318 goto out_error_open; 2319 2320 /* 2321 * Better not use !target__has_task() here because we need to cover the 2322 * case where no threads were specified in the command line, but a 2323 * workload was, and in that case we will fill in the thread_map when 2324 * we fork the workload in perf_evlist__prepare_workload. 2325 */ 2326 if (trace->filter_pids.nr > 0) 2327 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); 2328 else if (evlist->threads->map[0] == -1) 2329 err = perf_evlist__set_filter_pid(evlist, getpid()); 2330 2331 if (err < 0) { 2332 printf("err=%d,%s\n", -err, strerror(-err)); 2333 exit(1); 2334 } 2335 2336 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); 2337 if (err < 0) 2338 goto out_error_mmap; 2339 2340 if (!target__none(&trace->opts.target)) 2341 perf_evlist__enable(evlist); 2342 2343 if (forks) 2344 perf_evlist__start_workload(evlist); 2345 2346 trace->multiple_threads = evlist->threads->map[0] == -1 || 2347 evlist->threads->nr > 1 || 2348 perf_evlist__first(evlist)->attr.inherit; 2349 again: 2350 before = trace->nr_events; 2351 2352 for (i = 0; i < evlist->nr_mmaps; i++) { 2353 union perf_event *event; 2354 2355 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 2356 struct perf_sample sample; 2357 2358 ++trace->nr_events; 2359 2360 err = perf_evlist__parse_sample(evlist, event, &sample); 2361 if (err) { 2362 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 2363 goto next_event; 2364 } 2365 2366 trace__handle_event(trace, event, &sample); 2367 next_event: 2368 perf_evlist__mmap_consume(evlist, i); 2369 2370 if (interrupted) 2371 goto out_disable; 2372 2373 if (done && !draining) { 2374 perf_evlist__disable(evlist); 2375 draining = true; 2376 } 2377 } 2378 } 2379 2380 if (trace->nr_events == before) { 2381 int timeout = done ? 100 : -1; 2382 2383 if (!draining && perf_evlist__poll(evlist, timeout) > 0) { 2384 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0) 2385 draining = true; 2386 2387 goto again; 2388 } 2389 } else { 2390 goto again; 2391 } 2392 2393 out_disable: 2394 thread__zput(trace->current); 2395 2396 perf_evlist__disable(evlist); 2397 2398 if (!err) { 2399 if (trace->summary) 2400 trace__fprintf_thread_summary(trace, trace->output); 2401 2402 if (trace->show_tool_stats) { 2403 fprintf(trace->output, "Stats:\n " 2404 " vfs_getname : %" PRIu64 "\n" 2405 " proc_getname: %" PRIu64 "\n", 2406 trace->stats.vfs_getname, 2407 trace->stats.proc_getname); 2408 } 2409 } 2410 2411 out_delete_evlist: 2412 perf_evlist__delete(evlist); 2413 trace->evlist = NULL; 2414 trace->live = false; 2415 return err; 2416 { 2417 char errbuf[BUFSIZ]; 2418 2419 out_error_sched_stat_runtime: 2420 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); 2421 goto out_error; 2422 2423 out_error_raw_syscalls: 2424 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); 2425 goto out_error; 2426 2427 out_error_mmap: 2428 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); 2429 goto out_error; 2430 2431 out_error_open: 2432 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); 2433 2434 out_error: 2435 fprintf(trace->output, "%s\n", errbuf); 2436 goto out_delete_evlist; 2437 } 2438 out_error_mem: 2439 fprintf(trace->output, "Not enough memory to run!\n"); 2440 goto out_delete_evlist; 2441 } 2442 2443 static int trace__replay(struct trace *trace) 2444 { 2445 const struct perf_evsel_str_handler handlers[] = { 2446 { "probe:vfs_getname", trace__vfs_getname, }, 2447 }; 2448 struct perf_data_file file = { 2449 .path = input_name, 2450 .mode = PERF_DATA_MODE_READ, 2451 .force = trace->force, 2452 }; 2453 struct perf_session *session; 2454 struct perf_evsel *evsel; 2455 int err = -1; 2456 2457 trace->tool.sample = trace__process_sample; 2458 trace->tool.mmap = perf_event__process_mmap; 2459 trace->tool.mmap2 = perf_event__process_mmap2; 2460 trace->tool.comm = perf_event__process_comm; 2461 trace->tool.exit = perf_event__process_exit; 2462 trace->tool.fork = perf_event__process_fork; 2463 trace->tool.attr = perf_event__process_attr; 2464 trace->tool.tracing_data = perf_event__process_tracing_data; 2465 trace->tool.build_id = perf_event__process_build_id; 2466 2467 trace->tool.ordered_events = true; 2468 trace->tool.ordering_requires_timestamps = true; 2469 2470 /* add tid to output */ 2471 trace->multiple_threads = true; 2472 2473 session = perf_session__new(&file, false, &trace->tool); 2474 if (session == NULL) 2475 return -1; 2476 2477 if (symbol__init(&session->header.env) < 0) 2478 goto out; 2479 2480 trace->host = &session->machines.host; 2481 2482 err = perf_session__set_tracepoints_handlers(session, handlers); 2483 if (err) 2484 goto out; 2485 2486 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2487 "raw_syscalls:sys_enter"); 2488 /* older kernels have syscalls tp versus raw_syscalls */ 2489 if (evsel == NULL) 2490 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2491 "syscalls:sys_enter"); 2492 2493 if (evsel && 2494 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || 2495 perf_evsel__init_sc_tp_ptr_field(evsel, args))) { 2496 pr_err("Error during initialize raw_syscalls:sys_enter event\n"); 2497 goto out; 2498 } 2499 2500 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2501 "raw_syscalls:sys_exit"); 2502 if (evsel == NULL) 2503 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2504 "syscalls:sys_exit"); 2505 if (evsel && 2506 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || 2507 perf_evsel__init_sc_tp_uint_field(evsel, ret))) { 2508 pr_err("Error during initialize raw_syscalls:sys_exit event\n"); 2509 goto out; 2510 } 2511 2512 evlist__for_each(session->evlist, evsel) { 2513 if (evsel->attr.type == PERF_TYPE_SOFTWARE && 2514 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || 2515 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || 2516 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS)) 2517 evsel->handler = trace__pgfault; 2518 } 2519 2520 err = parse_target_str(trace); 2521 if (err != 0) 2522 goto out; 2523 2524 setup_pager(); 2525 2526 err = perf_session__process_events(session); 2527 if (err) 2528 pr_err("Failed to process events, error %d", err); 2529 2530 else if (trace->summary) 2531 trace__fprintf_thread_summary(trace, trace->output); 2532 2533 out: 2534 perf_session__delete(session); 2535 2536 return err; 2537 } 2538 2539 static size_t trace__fprintf_threads_header(FILE *fp) 2540 { 2541 size_t printed; 2542 2543 printed = fprintf(fp, "\n Summary of events:\n\n"); 2544 2545 return printed; 2546 } 2547 2548 static size_t thread__dump_stats(struct thread_trace *ttrace, 2549 struct trace *trace, FILE *fp) 2550 { 2551 struct stats *stats; 2552 size_t printed = 0; 2553 struct syscall *sc; 2554 struct int_node *inode = intlist__first(ttrace->syscall_stats); 2555 2556 if (inode == NULL) 2557 return 0; 2558 2559 printed += fprintf(fp, "\n"); 2560 2561 printed += fprintf(fp, " syscall calls min avg max stddev\n"); 2562 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); 2563 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); 2564 2565 /* each int_node is a syscall */ 2566 while (inode) { 2567 stats = inode->priv; 2568 if (stats) { 2569 double min = (double)(stats->min) / NSEC_PER_MSEC; 2570 double max = (double)(stats->max) / NSEC_PER_MSEC; 2571 double avg = avg_stats(stats); 2572 double pct; 2573 u64 n = (u64) stats->n; 2574 2575 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; 2576 avg /= NSEC_PER_MSEC; 2577 2578 sc = &trace->syscalls.table[inode->i]; 2579 printed += fprintf(fp, " %-15s", sc->name); 2580 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", 2581 n, min, avg); 2582 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); 2583 } 2584 2585 inode = intlist__next(inode); 2586 } 2587 2588 printed += fprintf(fp, "\n\n"); 2589 2590 return printed; 2591 } 2592 2593 /* struct used to pass data to per-thread function */ 2594 struct summary_data { 2595 FILE *fp; 2596 struct trace *trace; 2597 size_t printed; 2598 }; 2599 2600 static int trace__fprintf_one_thread(struct thread *thread, void *priv) 2601 { 2602 struct summary_data *data = priv; 2603 FILE *fp = data->fp; 2604 size_t printed = data->printed; 2605 struct trace *trace = data->trace; 2606 struct thread_trace *ttrace = thread__priv(thread); 2607 double ratio; 2608 2609 if (ttrace == NULL) 2610 return 0; 2611 2612 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 2613 2614 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid); 2615 printed += fprintf(fp, "%lu events, ", ttrace->nr_events); 2616 printed += fprintf(fp, "%.1f%%", ratio); 2617 if (ttrace->pfmaj) 2618 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj); 2619 if (ttrace->pfmin) 2620 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin); 2621 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); 2622 printed += thread__dump_stats(ttrace, trace, fp); 2623 2624 data->printed += printed; 2625 2626 return 0; 2627 } 2628 2629 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 2630 { 2631 struct summary_data data = { 2632 .fp = fp, 2633 .trace = trace 2634 }; 2635 data.printed = trace__fprintf_threads_header(fp); 2636 2637 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data); 2638 2639 return data.printed; 2640 } 2641 2642 static int trace__set_duration(const struct option *opt, const char *str, 2643 int unset __maybe_unused) 2644 { 2645 struct trace *trace = opt->value; 2646 2647 trace->duration_filter = atof(str); 2648 return 0; 2649 } 2650 2651 static int trace__set_filter_pids(const struct option *opt, const char *str, 2652 int unset __maybe_unused) 2653 { 2654 int ret = -1; 2655 size_t i; 2656 struct trace *trace = opt->value; 2657 /* 2658 * FIXME: introduce a intarray class, plain parse csv and create a 2659 * { int nr, int entries[] } struct... 2660 */ 2661 struct intlist *list = intlist__new(str); 2662 2663 if (list == NULL) 2664 return -1; 2665 2666 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; 2667 trace->filter_pids.entries = calloc(i, sizeof(pid_t)); 2668 2669 if (trace->filter_pids.entries == NULL) 2670 goto out; 2671 2672 trace->filter_pids.entries[0] = getpid(); 2673 2674 for (i = 1; i < trace->filter_pids.nr; ++i) 2675 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; 2676 2677 intlist__delete(list); 2678 ret = 0; 2679 out: 2680 return ret; 2681 } 2682 2683 static int trace__open_output(struct trace *trace, const char *filename) 2684 { 2685 struct stat st; 2686 2687 if (!stat(filename, &st) && st.st_size) { 2688 char oldname[PATH_MAX]; 2689 2690 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 2691 unlink(oldname); 2692 rename(filename, oldname); 2693 } 2694 2695 trace->output = fopen(filename, "w"); 2696 2697 return trace->output == NULL ? -errno : 0; 2698 } 2699 2700 static int parse_pagefaults(const struct option *opt, const char *str, 2701 int unset __maybe_unused) 2702 { 2703 int *trace_pgfaults = opt->value; 2704 2705 if (strcmp(str, "all") == 0) 2706 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN; 2707 else if (strcmp(str, "maj") == 0) 2708 *trace_pgfaults |= TRACE_PFMAJ; 2709 else if (strcmp(str, "min") == 0) 2710 *trace_pgfaults |= TRACE_PFMIN; 2711 else 2712 return -1; 2713 2714 return 0; 2715 } 2716 2717 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) 2718 { 2719 struct perf_evsel *evsel; 2720 2721 evlist__for_each(evlist, evsel) 2722 evsel->handler = handler; 2723 } 2724 2725 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) 2726 { 2727 const char *trace_usage[] = { 2728 "perf trace [<options>] [<command>]", 2729 "perf trace [<options>] -- <command> [<options>]", 2730 "perf trace record [<options>] [<command>]", 2731 "perf trace record [<options>] -- <command> [<options>]", 2732 NULL 2733 }; 2734 struct trace trace = { 2735 .audit = { 2736 .machine = audit_detect_machine(), 2737 .open_id = audit_name_to_syscall("open", trace.audit.machine), 2738 }, 2739 .syscalls = { 2740 . max = -1, 2741 }, 2742 .opts = { 2743 .target = { 2744 .uid = UINT_MAX, 2745 .uses_mmap = true, 2746 }, 2747 .user_freq = UINT_MAX, 2748 .user_interval = ULLONG_MAX, 2749 .no_buffering = true, 2750 .mmap_pages = UINT_MAX, 2751 .proc_map_timeout = 500, 2752 }, 2753 .output = stdout, 2754 .show_comm = true, 2755 .trace_syscalls = true, 2756 }; 2757 const char *output_name = NULL; 2758 const char *ev_qualifier_str = NULL; 2759 const struct option trace_options[] = { 2760 OPT_CALLBACK(0, "event", &trace.evlist, "event", 2761 "event selector. use 'perf list' to list available events", 2762 parse_events_option), 2763 OPT_BOOLEAN(0, "comm", &trace.show_comm, 2764 "show the thread COMM next to its id"), 2765 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), 2766 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"), 2767 OPT_STRING('o', "output", &output_name, "file", "output file name"), 2768 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 2769 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 2770 "trace events on existing process id"), 2771 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 2772 "trace events on existing thread id"), 2773 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids", 2774 "pids to filter (by the kernel)", trace__set_filter_pids), 2775 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 2776 "system-wide collection from all CPUs"), 2777 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 2778 "list of cpus to monitor"), 2779 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 2780 "child tasks do not inherit counters"), 2781 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", 2782 "number of mmap data pages", 2783 perf_evlist__parse_mmap_pages), 2784 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 2785 "user to profile"), 2786 OPT_CALLBACK(0, "duration", &trace, "float", 2787 "show only events with duration > N.M ms", 2788 trace__set_duration), 2789 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 2790 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 2791 OPT_BOOLEAN('T', "time", &trace.full_time, 2792 "Show full timestamp, not time relative to first start"), 2793 OPT_BOOLEAN('s', "summary", &trace.summary_only, 2794 "Show only syscall summary with statistics"), 2795 OPT_BOOLEAN('S', "with-summary", &trace.summary, 2796 "Show all syscalls and summary with statistics"), 2797 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", 2798 "Trace pagefaults", parse_pagefaults, "maj"), 2799 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), 2800 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), 2801 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, 2802 "per thread proc mmap processing timeout in ms"), 2803 OPT_END() 2804 }; 2805 const char * const trace_subcommands[] = { "record", NULL }; 2806 int err; 2807 char bf[BUFSIZ]; 2808 2809 signal(SIGSEGV, sighandler_dump_stack); 2810 signal(SIGFPE, sighandler_dump_stack); 2811 2812 trace.evlist = perf_evlist__new(); 2813 2814 if (trace.evlist == NULL) { 2815 pr_err("Not enough memory to run!\n"); 2816 err = -ENOMEM; 2817 goto out; 2818 } 2819 2820 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, 2821 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); 2822 2823 if (trace.trace_pgfaults) { 2824 trace.opts.sample_address = true; 2825 trace.opts.sample_time = true; 2826 } 2827 2828 if (trace.evlist->nr_entries > 0) 2829 evlist__set_evsel_handler(trace.evlist, trace__event_handler); 2830 2831 if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) 2832 return trace__record(&trace, argc-1, &argv[1]); 2833 2834 /* summary_only implies summary option, but don't overwrite summary if set */ 2835 if (trace.summary_only) 2836 trace.summary = trace.summary_only; 2837 2838 if (!trace.trace_syscalls && !trace.trace_pgfaults && 2839 trace.evlist->nr_entries == 0 /* Was --events used? */) { 2840 pr_err("Please specify something to trace.\n"); 2841 return -1; 2842 } 2843 2844 if (output_name != NULL) { 2845 err = trace__open_output(&trace, output_name); 2846 if (err < 0) { 2847 perror("failed to create output file"); 2848 goto out; 2849 } 2850 } 2851 2852 if (ev_qualifier_str != NULL) { 2853 const char *s = ev_qualifier_str; 2854 2855 trace.not_ev_qualifier = *s == '!'; 2856 if (trace.not_ev_qualifier) 2857 ++s; 2858 trace.ev_qualifier = strlist__new(true, s); 2859 if (trace.ev_qualifier == NULL) { 2860 fputs("Not enough memory to parse event qualifier", 2861 trace.output); 2862 err = -ENOMEM; 2863 goto out_close; 2864 } 2865 } 2866 2867 err = target__validate(&trace.opts.target); 2868 if (err) { 2869 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2870 fprintf(trace.output, "%s", bf); 2871 goto out_close; 2872 } 2873 2874 err = target__parse_uid(&trace.opts.target); 2875 if (err) { 2876 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2877 fprintf(trace.output, "%s", bf); 2878 goto out_close; 2879 } 2880 2881 if (!argc && target__none(&trace.opts.target)) 2882 trace.opts.target.system_wide = true; 2883 2884 if (input_name) 2885 err = trace__replay(&trace); 2886 else 2887 err = trace__run(&trace, argc, argv); 2888 2889 out_close: 2890 if (output_name != NULL) 2891 fclose(trace.output); 2892 out: 2893 return err; 2894 } 2895