1 /* 2 * builtin-trace.c 3 * 4 * Builtin 'trace' command: 5 * 6 * Display a continuously updated trace of any workload, CPU, specific PID, 7 * system wide, etc. Default format is loosely strace like, but any other 8 * event may be specified using --event. 9 * 10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 11 * 12 * Initially based on the 'trace' prototype by Thomas Gleixner: 13 * 14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") 15 * 16 * Released under the GPL v2. (and only v2, not any later version) 17 */ 18 19 #include <traceevent/event-parse.h> 20 #include <api/fs/tracing_path.h> 21 #include "builtin.h" 22 #include "util/cgroup.h" 23 #include "util/color.h" 24 #include "util/debug.h" 25 #include "util/env.h" 26 #include "util/event.h" 27 #include "util/evlist.h" 28 #include <subcmd/exec-cmd.h> 29 #include "util/machine.h" 30 #include "util/path.h" 31 #include "util/session.h" 32 #include "util/thread.h" 33 #include <subcmd/parse-options.h> 34 #include "util/strlist.h" 35 #include "util/intlist.h" 36 #include "util/thread_map.h" 37 #include "util/stat.h" 38 #include "trace/beauty/beauty.h" 39 #include "trace-event.h" 40 #include "util/parse-events.h" 41 #include "util/bpf-loader.h" 42 #include "callchain.h" 43 #include "print_binary.h" 44 #include "string2.h" 45 #include "syscalltbl.h" 46 #include "rb_resort.h" 47 48 #include <errno.h> 49 #include <inttypes.h> 50 #include <poll.h> 51 #include <signal.h> 52 #include <stdlib.h> 53 #include <string.h> 54 #include <linux/err.h> 55 #include <linux/filter.h> 56 #include <linux/kernel.h> 57 #include <linux/random.h> 58 #include <linux/stringify.h> 59 #include <linux/time64.h> 60 #include <fcntl.h> 61 62 #include "sane_ctype.h" 63 64 #ifndef O_CLOEXEC 65 # define O_CLOEXEC 02000000 66 #endif 67 68 #ifndef F_LINUX_SPECIFIC_BASE 69 # define F_LINUX_SPECIFIC_BASE 1024 70 #endif 71 72 struct trace { 73 struct perf_tool tool; 74 struct syscalltbl *sctbl; 75 struct { 76 int max; 77 struct syscall *table; 78 struct { 79 struct perf_evsel *sys_enter, 80 *sys_exit; 81 } events; 82 } syscalls; 83 struct record_opts opts; 84 struct perf_evlist *evlist; 85 struct machine *host; 86 struct thread *current; 87 struct cgroup *cgroup; 88 u64 base_time; 89 FILE *output; 90 unsigned long nr_events; 91 struct strlist *ev_qualifier; 92 struct { 93 size_t nr; 94 int *entries; 95 } ev_qualifier_ids; 96 struct { 97 size_t nr; 98 pid_t *entries; 99 } filter_pids; 100 double duration_filter; 101 double runtime_ms; 102 struct { 103 u64 vfs_getname, 104 proc_getname; 105 } stats; 106 unsigned int max_stack; 107 unsigned int min_stack; 108 bool not_ev_qualifier; 109 bool live; 110 bool full_time; 111 bool sched; 112 bool multiple_threads; 113 bool summary; 114 bool summary_only; 115 bool failure_only; 116 bool show_comm; 117 bool print_sample; 118 bool show_tool_stats; 119 bool trace_syscalls; 120 bool kernel_syscallchains; 121 bool force; 122 bool vfs_getname; 123 int trace_pgfaults; 124 int open_id; 125 }; 126 127 struct tp_field { 128 int offset; 129 union { 130 u64 (*integer)(struct tp_field *field, struct perf_sample *sample); 131 void *(*pointer)(struct tp_field *field, struct perf_sample *sample); 132 }; 133 }; 134 135 #define TP_UINT_FIELD(bits) \ 136 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ 137 { \ 138 u##bits value; \ 139 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 140 return value; \ 141 } 142 143 TP_UINT_FIELD(8); 144 TP_UINT_FIELD(16); 145 TP_UINT_FIELD(32); 146 TP_UINT_FIELD(64); 147 148 #define TP_UINT_FIELD__SWAPPED(bits) \ 149 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ 150 { \ 151 u##bits value; \ 152 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 153 return bswap_##bits(value);\ 154 } 155 156 TP_UINT_FIELD__SWAPPED(16); 157 TP_UINT_FIELD__SWAPPED(32); 158 TP_UINT_FIELD__SWAPPED(64); 159 160 static int tp_field__init_uint(struct tp_field *field, 161 struct format_field *format_field, 162 bool needs_swap) 163 { 164 field->offset = format_field->offset; 165 166 switch (format_field->size) { 167 case 1: 168 field->integer = tp_field__u8; 169 break; 170 case 2: 171 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; 172 break; 173 case 4: 174 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; 175 break; 176 case 8: 177 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; 178 break; 179 default: 180 return -1; 181 } 182 183 return 0; 184 } 185 186 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) 187 { 188 return sample->raw_data + field->offset; 189 } 190 191 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) 192 { 193 field->offset = format_field->offset; 194 field->pointer = tp_field__ptr; 195 return 0; 196 } 197 198 struct syscall_tp { 199 struct tp_field id; 200 union { 201 struct tp_field args, ret; 202 }; 203 }; 204 205 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, 206 struct tp_field *field, 207 const char *name) 208 { 209 struct format_field *format_field = perf_evsel__field(evsel, name); 210 211 if (format_field == NULL) 212 return -1; 213 214 return tp_field__init_uint(field, format_field, evsel->needs_swap); 215 } 216 217 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ 218 ({ struct syscall_tp *sc = evsel->priv;\ 219 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) 220 221 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, 222 struct tp_field *field, 223 const char *name) 224 { 225 struct format_field *format_field = perf_evsel__field(evsel, name); 226 227 if (format_field == NULL) 228 return -1; 229 230 return tp_field__init_ptr(field, format_field); 231 } 232 233 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ 234 ({ struct syscall_tp *sc = evsel->priv;\ 235 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) 236 237 static void perf_evsel__delete_priv(struct perf_evsel *evsel) 238 { 239 zfree(&evsel->priv); 240 perf_evsel__delete(evsel); 241 } 242 243 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler) 244 { 245 evsel->priv = malloc(sizeof(struct syscall_tp)); 246 if (evsel->priv != NULL) { 247 if (perf_evsel__init_sc_tp_uint_field(evsel, id)) 248 goto out_delete; 249 250 evsel->handler = handler; 251 return 0; 252 } 253 254 return -ENOMEM; 255 256 out_delete: 257 zfree(&evsel->priv); 258 return -ENOENT; 259 } 260 261 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler) 262 { 263 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); 264 265 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ 266 if (IS_ERR(evsel)) 267 evsel = perf_evsel__newtp("syscalls", direction); 268 269 if (IS_ERR(evsel)) 270 return NULL; 271 272 if (perf_evsel__init_syscall_tp(evsel, handler)) 273 goto out_delete; 274 275 return evsel; 276 277 out_delete: 278 perf_evsel__delete_priv(evsel); 279 return NULL; 280 } 281 282 #define perf_evsel__sc_tp_uint(evsel, name, sample) \ 283 ({ struct syscall_tp *fields = evsel->priv; \ 284 fields->name.integer(&fields->name, sample); }) 285 286 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ 287 ({ struct syscall_tp *fields = evsel->priv; \ 288 fields->name.pointer(&fields->name, sample); }) 289 290 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val) 291 { 292 int idx = val - sa->offset; 293 294 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) 295 return scnprintf(bf, size, intfmt, val); 296 297 return scnprintf(bf, size, "%s", sa->entries[idx]); 298 } 299 300 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, 301 const char *intfmt, 302 struct syscall_arg *arg) 303 { 304 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val); 305 } 306 307 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, 308 struct syscall_arg *arg) 309 { 310 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg); 311 } 312 313 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 314 315 struct strarrays { 316 int nr_entries; 317 struct strarray **entries; 318 }; 319 320 #define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \ 321 .nr_entries = ARRAY_SIZE(array), \ 322 .entries = array, \ 323 } 324 325 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, 326 struct syscall_arg *arg) 327 { 328 struct strarrays *sas = arg->parm; 329 int i; 330 331 for (i = 0; i < sas->nr_entries; ++i) { 332 struct strarray *sa = sas->entries[i]; 333 int idx = arg->val - sa->offset; 334 335 if (idx >= 0 && idx < sa->nr_entries) { 336 if (sa->entries[idx] == NULL) 337 break; 338 return scnprintf(bf, size, "%s", sa->entries[idx]); 339 } 340 } 341 342 return scnprintf(bf, size, "%d", arg->val); 343 } 344 345 #ifndef AT_FDCWD 346 #define AT_FDCWD -100 347 #endif 348 349 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, 350 struct syscall_arg *arg) 351 { 352 int fd = arg->val; 353 354 if (fd == AT_FDCWD) 355 return scnprintf(bf, size, "CWD"); 356 357 return syscall_arg__scnprintf_fd(bf, size, arg); 358 } 359 360 #define SCA_FDAT syscall_arg__scnprintf_fd_at 361 362 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 363 struct syscall_arg *arg); 364 365 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd 366 367 size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg) 368 { 369 return scnprintf(bf, size, "%#lx", arg->val); 370 } 371 372 size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg) 373 { 374 return scnprintf(bf, size, "%d", arg->val); 375 } 376 377 size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg) 378 { 379 return scnprintf(bf, size, "%ld", arg->val); 380 } 381 382 static const char *bpf_cmd[] = { 383 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM", 384 "MAP_GET_NEXT_KEY", "PROG_LOAD", 385 }; 386 static DEFINE_STRARRAY(bpf_cmd); 387 388 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 389 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); 390 391 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 392 static DEFINE_STRARRAY(itimers); 393 394 static const char *keyctl_options[] = { 395 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN", 396 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ", 397 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT", 398 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT", 399 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT", 400 }; 401 static DEFINE_STRARRAY(keyctl_options); 402 403 static const char *whences[] = { "SET", "CUR", "END", 404 #ifdef SEEK_DATA 405 "DATA", 406 #endif 407 #ifdef SEEK_HOLE 408 "HOLE", 409 #endif 410 }; 411 static DEFINE_STRARRAY(whences); 412 413 static const char *fcntl_cmds[] = { 414 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", 415 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64", 416 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX", 417 "GETOWNER_UIDS", 418 }; 419 static DEFINE_STRARRAY(fcntl_cmds); 420 421 static const char *fcntl_linux_specific_cmds[] = { 422 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC", 423 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS", 424 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT", 425 }; 426 427 static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE); 428 429 static struct strarray *fcntl_cmds_arrays[] = { 430 &strarray__fcntl_cmds, 431 &strarray__fcntl_linux_specific_cmds, 432 }; 433 434 static DEFINE_STRARRAYS(fcntl_cmds_arrays); 435 436 static const char *rlimit_resources[] = { 437 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", 438 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", 439 "RTTIME", 440 }; 441 static DEFINE_STRARRAY(rlimit_resources); 442 443 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; 444 static DEFINE_STRARRAY(sighow); 445 446 static const char *clockid[] = { 447 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", 448 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME", 449 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI" 450 }; 451 static DEFINE_STRARRAY(clockid); 452 453 static const char *socket_families[] = { 454 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", 455 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", 456 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", 457 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", 458 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", 459 "ALG", "NFC", "VSOCK", 460 }; 461 static DEFINE_STRARRAY(socket_families); 462 463 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, 464 struct syscall_arg *arg) 465 { 466 size_t printed = 0; 467 int mode = arg->val; 468 469 if (mode == F_OK) /* 0 */ 470 return scnprintf(bf, size, "F"); 471 #define P_MODE(n) \ 472 if (mode & n##_OK) { \ 473 printed += scnprintf(bf + printed, size - printed, "%s", #n); \ 474 mode &= ~n##_OK; \ 475 } 476 477 P_MODE(R); 478 P_MODE(W); 479 P_MODE(X); 480 #undef P_MODE 481 482 if (mode) 483 printed += scnprintf(bf + printed, size - printed, "|%#x", mode); 484 485 return printed; 486 } 487 488 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode 489 490 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, 491 struct syscall_arg *arg); 492 493 #define SCA_FILENAME syscall_arg__scnprintf_filename 494 495 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, 496 struct syscall_arg *arg) 497 { 498 int printed = 0, flags = arg->val; 499 500 #define P_FLAG(n) \ 501 if (flags & O_##n) { \ 502 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 503 flags &= ~O_##n; \ 504 } 505 506 P_FLAG(CLOEXEC); 507 P_FLAG(NONBLOCK); 508 #undef P_FLAG 509 510 if (flags) 511 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 512 513 return printed; 514 } 515 516 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags 517 518 #ifndef GRND_NONBLOCK 519 #define GRND_NONBLOCK 0x0001 520 #endif 521 #ifndef GRND_RANDOM 522 #define GRND_RANDOM 0x0002 523 #endif 524 525 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, 526 struct syscall_arg *arg) 527 { 528 int printed = 0, flags = arg->val; 529 530 #define P_FLAG(n) \ 531 if (flags & GRND_##n) { \ 532 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 533 flags &= ~GRND_##n; \ 534 } 535 536 P_FLAG(RANDOM); 537 P_FLAG(NONBLOCK); 538 #undef P_FLAG 539 540 if (flags) 541 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 542 543 return printed; 544 } 545 546 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags 547 548 #define STRARRAY(name, array) \ 549 { .scnprintf = SCA_STRARRAY, \ 550 .parm = &strarray__##array, } 551 552 #include "trace/beauty/arch_errno_names.c" 553 #include "trace/beauty/eventfd.c" 554 #include "trace/beauty/futex_op.c" 555 #include "trace/beauty/futex_val3.c" 556 #include "trace/beauty/mmap.c" 557 #include "trace/beauty/mode_t.c" 558 #include "trace/beauty/msg_flags.c" 559 #include "trace/beauty/open_flags.c" 560 #include "trace/beauty/perf_event_open.c" 561 #include "trace/beauty/pid.c" 562 #include "trace/beauty/sched_policy.c" 563 #include "trace/beauty/seccomp.c" 564 #include "trace/beauty/signum.c" 565 #include "trace/beauty/socket_type.c" 566 #include "trace/beauty/waitid_options.c" 567 568 struct syscall_arg_fmt { 569 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 570 void *parm; 571 const char *name; 572 bool show_zero; 573 }; 574 575 static struct syscall_fmt { 576 const char *name; 577 const char *alias; 578 struct syscall_arg_fmt arg[6]; 579 u8 nr_args; 580 bool errpid; 581 bool timeout; 582 bool hexret; 583 } syscall_fmts[] = { 584 { .name = "access", 585 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, 586 { .name = "bpf", 587 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, 588 { .name = "brk", .hexret = true, 589 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, }, 590 { .name = "clock_gettime", 591 .arg = { [0] = STRARRAY(clk_id, clockid), }, }, 592 { .name = "clone", .errpid = true, .nr_args = 5, 593 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, }, 594 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, }, 595 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, }, 596 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, }, 597 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, }, 598 { .name = "close", 599 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, 600 { .name = "epoll_ctl", 601 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, 602 { .name = "eventfd2", 603 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, }, 604 { .name = "fchmodat", 605 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 606 { .name = "fchownat", 607 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 608 { .name = "fcntl", 609 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */ 610 .parm = &strarrays__fcntl_cmds_arrays, 611 .show_zero = true, }, 612 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, }, 613 { .name = "flock", 614 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, }, 615 { .name = "fstat", .alias = "newfstat", }, 616 { .name = "fstatat", .alias = "newfstatat", }, 617 { .name = "futex", 618 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, 619 [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, }, 620 { .name = "futimesat", 621 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 622 { .name = "getitimer", 623 .arg = { [0] = STRARRAY(which, itimers), }, }, 624 { .name = "getpid", .errpid = true, }, 625 { .name = "getpgid", .errpid = true, }, 626 { .name = "getppid", .errpid = true, }, 627 { .name = "getrandom", 628 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, }, 629 { .name = "getrlimit", 630 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, 631 { .name = "gettid", .errpid = true, }, 632 { .name = "ioctl", 633 .arg = { 634 #if defined(__i386__) || defined(__x86_64__) 635 /* 636 * FIXME: Make this available to all arches. 637 */ 638 [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ }, 639 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, 640 #else 641 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, 642 #endif 643 { .name = "kcmp", .nr_args = 5, 644 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, }, 645 [1] = { .name = "pid2", .scnprintf = SCA_PID, }, 646 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, }, 647 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, }, 648 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, }, 649 { .name = "keyctl", 650 .arg = { [0] = STRARRAY(option, keyctl_options), }, }, 651 { .name = "kill", 652 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 653 { .name = "linkat", 654 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 655 { .name = "lseek", 656 .arg = { [2] = STRARRAY(whence, whences), }, }, 657 { .name = "lstat", .alias = "newlstat", }, 658 { .name = "madvise", 659 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 660 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, }, 661 { .name = "mkdirat", 662 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 663 { .name = "mknodat", 664 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 665 { .name = "mlock", 666 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 667 { .name = "mlockall", 668 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 669 { .name = "mmap", .hexret = true, 670 /* The standard mmap maps to old_mmap on s390x */ 671 #if defined(__s390x__) 672 .alias = "old_mmap", 673 #endif 674 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, 675 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, 676 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, }, 677 { .name = "mprotect", 678 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 679 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, 680 { .name = "mq_unlink", 681 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, }, 682 { .name = "mremap", .hexret = true, 683 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, 684 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, 685 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, }, 686 { .name = "munlock", 687 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 688 { .name = "munmap", 689 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 690 { .name = "name_to_handle_at", 691 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 692 { .name = "newfstatat", 693 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 694 { .name = "open", 695 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, 696 { .name = "open_by_handle_at", 697 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, 698 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, 699 { .name = "openat", 700 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, 701 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, 702 { .name = "perf_event_open", 703 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ }, 704 [3] = { .scnprintf = SCA_FD, /* group_fd */ }, 705 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, 706 { .name = "pipe2", 707 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, }, 708 { .name = "pkey_alloc", 709 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, }, 710 { .name = "pkey_free", 711 .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, }, 712 { .name = "pkey_mprotect", 713 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 714 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, 715 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, }, 716 { .name = "poll", .timeout = true, }, 717 { .name = "ppoll", .timeout = true, }, 718 { .name = "prctl", .alias = "arch_prctl", 719 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ }, 720 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ }, 721 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, }, 722 { .name = "pread", .alias = "pread64", }, 723 { .name = "preadv", .alias = "pread", }, 724 { .name = "prlimit64", 725 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, }, 726 { .name = "pwrite", .alias = "pwrite64", }, 727 { .name = "readlinkat", 728 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 729 { .name = "recvfrom", 730 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 731 { .name = "recvmmsg", 732 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 733 { .name = "recvmsg", 734 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 735 { .name = "renameat", 736 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 737 { .name = "rt_sigaction", 738 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 739 { .name = "rt_sigprocmask", 740 .arg = { [0] = STRARRAY(how, sighow), }, }, 741 { .name = "rt_sigqueueinfo", 742 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 743 { .name = "rt_tgsigqueueinfo", 744 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 745 { .name = "sched_setscheduler", 746 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, }, 747 { .name = "seccomp", 748 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ }, 749 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, }, 750 { .name = "select", .timeout = true, }, 751 { .name = "sendmmsg", 752 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 753 { .name = "sendmsg", 754 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 755 { .name = "sendto", 756 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 757 { .name = "set_tid_address", .errpid = true, }, 758 { .name = "setitimer", 759 .arg = { [0] = STRARRAY(which, itimers), }, }, 760 { .name = "setrlimit", 761 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, 762 { .name = "socket", 763 .arg = { [0] = STRARRAY(family, socket_families), 764 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, 765 [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, }, 766 { .name = "socketpair", 767 .arg = { [0] = STRARRAY(family, socket_families), 768 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, 769 [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, }, 770 { .name = "stat", .alias = "newstat", }, 771 { .name = "statx", 772 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ }, 773 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } , 774 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, }, 775 { .name = "swapoff", 776 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, 777 { .name = "swapon", 778 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, 779 { .name = "symlinkat", 780 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 781 { .name = "tgkill", 782 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 783 { .name = "tkill", 784 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 785 { .name = "uname", .alias = "newuname", }, 786 { .name = "unlinkat", 787 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 788 { .name = "utimensat", 789 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, }, 790 { .name = "wait4", .errpid = true, 791 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, 792 { .name = "waitid", .errpid = true, 793 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, 794 }; 795 796 static int syscall_fmt__cmp(const void *name, const void *fmtp) 797 { 798 const struct syscall_fmt *fmt = fmtp; 799 return strcmp(name, fmt->name); 800 } 801 802 static struct syscall_fmt *syscall_fmt__find(const char *name) 803 { 804 const int nmemb = ARRAY_SIZE(syscall_fmts); 805 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 806 } 807 808 struct syscall { 809 struct event_format *tp_format; 810 int nr_args; 811 struct format_field *args; 812 const char *name; 813 bool is_exit; 814 struct syscall_fmt *fmt; 815 struct syscall_arg_fmt *arg_fmt; 816 }; 817 818 /* 819 * We need to have this 'calculated' boolean because in some cases we really 820 * don't know what is the duration of a syscall, for instance, when we start 821 * a session and some threads are waiting for a syscall to finish, say 'poll', 822 * in which case all we can do is to print "( ? ) for duration and for the 823 * start timestamp. 824 */ 825 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp) 826 { 827 double duration = (double)t / NSEC_PER_MSEC; 828 size_t printed = fprintf(fp, "("); 829 830 if (!calculated) 831 printed += fprintf(fp, " "); 832 else if (duration >= 1.0) 833 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 834 else if (duration >= 0.01) 835 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 836 else 837 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 838 return printed + fprintf(fp, "): "); 839 } 840 841 /** 842 * filename.ptr: The filename char pointer that will be vfs_getname'd 843 * filename.entry_str_pos: Where to insert the string translated from 844 * filename.ptr by the vfs_getname tracepoint/kprobe. 845 * ret_scnprintf: syscall args may set this to a different syscall return 846 * formatter, for instance, fcntl may return fds, file flags, etc. 847 */ 848 struct thread_trace { 849 u64 entry_time; 850 bool entry_pending; 851 unsigned long nr_events; 852 unsigned long pfmaj, pfmin; 853 char *entry_str; 854 double runtime_ms; 855 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 856 struct { 857 unsigned long ptr; 858 short int entry_str_pos; 859 bool pending_open; 860 unsigned int namelen; 861 char *name; 862 } filename; 863 struct { 864 int max; 865 char **table; 866 } paths; 867 868 struct intlist *syscall_stats; 869 }; 870 871 static struct thread_trace *thread_trace__new(void) 872 { 873 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); 874 875 if (ttrace) 876 ttrace->paths.max = -1; 877 878 ttrace->syscall_stats = intlist__new(NULL); 879 880 return ttrace; 881 } 882 883 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 884 { 885 struct thread_trace *ttrace; 886 887 if (thread == NULL) 888 goto fail; 889 890 if (thread__priv(thread) == NULL) 891 thread__set_priv(thread, thread_trace__new()); 892 893 if (thread__priv(thread) == NULL) 894 goto fail; 895 896 ttrace = thread__priv(thread); 897 ++ttrace->nr_events; 898 899 return ttrace; 900 fail: 901 color_fprintf(fp, PERF_COLOR_RED, 902 "WARNING: not enough memory, dropping samples!\n"); 903 return NULL; 904 } 905 906 907 void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, 908 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg)) 909 { 910 struct thread_trace *ttrace = thread__priv(arg->thread); 911 912 ttrace->ret_scnprintf = ret_scnprintf; 913 } 914 915 #define TRACE_PFMAJ (1 << 0) 916 #define TRACE_PFMIN (1 << 1) 917 918 static const size_t trace__entry_str_size = 2048; 919 920 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) 921 { 922 struct thread_trace *ttrace = thread__priv(thread); 923 924 if (fd > ttrace->paths.max) { 925 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); 926 927 if (npath == NULL) 928 return -1; 929 930 if (ttrace->paths.max != -1) { 931 memset(npath + ttrace->paths.max + 1, 0, 932 (fd - ttrace->paths.max) * sizeof(char *)); 933 } else { 934 memset(npath, 0, (fd + 1) * sizeof(char *)); 935 } 936 937 ttrace->paths.table = npath; 938 ttrace->paths.max = fd; 939 } 940 941 ttrace->paths.table[fd] = strdup(pathname); 942 943 return ttrace->paths.table[fd] != NULL ? 0 : -1; 944 } 945 946 static int thread__read_fd_path(struct thread *thread, int fd) 947 { 948 char linkname[PATH_MAX], pathname[PATH_MAX]; 949 struct stat st; 950 int ret; 951 952 if (thread->pid_ == thread->tid) { 953 scnprintf(linkname, sizeof(linkname), 954 "/proc/%d/fd/%d", thread->pid_, fd); 955 } else { 956 scnprintf(linkname, sizeof(linkname), 957 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); 958 } 959 960 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) 961 return -1; 962 963 ret = readlink(linkname, pathname, sizeof(pathname)); 964 965 if (ret < 0 || ret > st.st_size) 966 return -1; 967 968 pathname[ret] = '\0'; 969 return trace__set_fd_pathname(thread, fd, pathname); 970 } 971 972 static const char *thread__fd_path(struct thread *thread, int fd, 973 struct trace *trace) 974 { 975 struct thread_trace *ttrace = thread__priv(thread); 976 977 if (ttrace == NULL) 978 return NULL; 979 980 if (fd < 0) 981 return NULL; 982 983 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) { 984 if (!trace->live) 985 return NULL; 986 ++trace->stats.proc_getname; 987 if (thread__read_fd_path(thread, fd)) 988 return NULL; 989 } 990 991 return ttrace->paths.table[fd]; 992 } 993 994 size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg) 995 { 996 int fd = arg->val; 997 size_t printed = scnprintf(bf, size, "%d", fd); 998 const char *path = thread__fd_path(arg->thread, fd, arg->trace); 999 1000 if (path) 1001 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 1002 1003 return printed; 1004 } 1005 1006 size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size) 1007 { 1008 size_t printed = scnprintf(bf, size, "%d", fd); 1009 struct thread *thread = machine__find_thread(trace->host, pid, pid); 1010 1011 if (thread) { 1012 const char *path = thread__fd_path(thread, fd, trace); 1013 1014 if (path) 1015 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 1016 1017 thread__put(thread); 1018 } 1019 1020 return printed; 1021 } 1022 1023 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 1024 struct syscall_arg *arg) 1025 { 1026 int fd = arg->val; 1027 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); 1028 struct thread_trace *ttrace = thread__priv(arg->thread); 1029 1030 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) 1031 zfree(&ttrace->paths.table[fd]); 1032 1033 return printed; 1034 } 1035 1036 static void thread__set_filename_pos(struct thread *thread, const char *bf, 1037 unsigned long ptr) 1038 { 1039 struct thread_trace *ttrace = thread__priv(thread); 1040 1041 ttrace->filename.ptr = ptr; 1042 ttrace->filename.entry_str_pos = bf - ttrace->entry_str; 1043 } 1044 1045 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, 1046 struct syscall_arg *arg) 1047 { 1048 unsigned long ptr = arg->val; 1049 1050 if (!arg->trace->vfs_getname) 1051 return scnprintf(bf, size, "%#x", ptr); 1052 1053 thread__set_filename_pos(arg->thread, bf, ptr); 1054 return 0; 1055 } 1056 1057 static bool trace__filter_duration(struct trace *trace, double t) 1058 { 1059 return t < (trace->duration_filter * NSEC_PER_MSEC); 1060 } 1061 1062 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1063 { 1064 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 1065 1066 return fprintf(fp, "%10.3f ", ts); 1067 } 1068 1069 /* 1070 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are 1071 * using ttrace->entry_time for a thread that receives a sys_exit without 1072 * first having received a sys_enter ("poll" issued before tracing session 1073 * starts, lost sys_enter exit due to ring buffer overflow). 1074 */ 1075 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1076 { 1077 if (tstamp > 0) 1078 return __trace__fprintf_tstamp(trace, tstamp, fp); 1079 1080 return fprintf(fp, " ? "); 1081 } 1082 1083 static bool done = false; 1084 static bool interrupted = false; 1085 1086 static void sig_handler(int sig) 1087 { 1088 done = true; 1089 interrupted = sig == SIGINT; 1090 } 1091 1092 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 1093 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) 1094 { 1095 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 1096 printed += fprintf_duration(duration, duration_calculated, fp); 1097 1098 if (trace->multiple_threads) { 1099 if (trace->show_comm) 1100 printed += fprintf(fp, "%.14s/", thread__comm_str(thread)); 1101 printed += fprintf(fp, "%d ", thread->tid); 1102 } 1103 1104 return printed; 1105 } 1106 1107 static int trace__process_event(struct trace *trace, struct machine *machine, 1108 union perf_event *event, struct perf_sample *sample) 1109 { 1110 int ret = 0; 1111 1112 switch (event->header.type) { 1113 case PERF_RECORD_LOST: 1114 color_fprintf(trace->output, PERF_COLOR_RED, 1115 "LOST %" PRIu64 " events!\n", event->lost.lost); 1116 ret = machine__process_lost_event(machine, event, sample); 1117 break; 1118 default: 1119 ret = machine__process_event(machine, event, sample); 1120 break; 1121 } 1122 1123 return ret; 1124 } 1125 1126 static int trace__tool_process(struct perf_tool *tool, 1127 union perf_event *event, 1128 struct perf_sample *sample, 1129 struct machine *machine) 1130 { 1131 struct trace *trace = container_of(tool, struct trace, tool); 1132 return trace__process_event(trace, machine, event, sample); 1133 } 1134 1135 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) 1136 { 1137 struct machine *machine = vmachine; 1138 1139 if (machine->kptr_restrict_warned) 1140 return NULL; 1141 1142 if (symbol_conf.kptr_restrict) { 1143 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" 1144 "Check /proc/sys/kernel/kptr_restrict.\n\n" 1145 "Kernel samples will not be resolved.\n"); 1146 machine->kptr_restrict_warned = true; 1147 return NULL; 1148 } 1149 1150 return machine__resolve_kernel_addr(vmachine, addrp, modp); 1151 } 1152 1153 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 1154 { 1155 int err = symbol__init(NULL); 1156 1157 if (err) 1158 return err; 1159 1160 trace->host = machine__new_host(); 1161 if (trace->host == NULL) 1162 return -ENOMEM; 1163 1164 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr); 1165 if (err < 0) 1166 goto out; 1167 1168 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1169 evlist->threads, trace__tool_process, false, 1170 trace->opts.proc_map_timeout, 1); 1171 out: 1172 if (err) 1173 symbol__exit(); 1174 1175 return err; 1176 } 1177 1178 static void trace__symbols__exit(struct trace *trace) 1179 { 1180 machine__exit(trace->host); 1181 trace->host = NULL; 1182 1183 symbol__exit(); 1184 } 1185 1186 static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) 1187 { 1188 int idx; 1189 1190 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0) 1191 nr_args = sc->fmt->nr_args; 1192 1193 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt)); 1194 if (sc->arg_fmt == NULL) 1195 return -1; 1196 1197 for (idx = 0; idx < nr_args; ++idx) { 1198 if (sc->fmt) 1199 sc->arg_fmt[idx] = sc->fmt->arg[idx]; 1200 } 1201 1202 sc->nr_args = nr_args; 1203 return 0; 1204 } 1205 1206 static int syscall__set_arg_fmts(struct syscall *sc) 1207 { 1208 struct format_field *field; 1209 int idx = 0, len; 1210 1211 for (field = sc->args; field; field = field->next, ++idx) { 1212 if (sc->fmt && sc->fmt->arg[idx].scnprintf) 1213 continue; 1214 1215 if (strcmp(field->type, "const char *") == 0 && 1216 (strcmp(field->name, "filename") == 0 || 1217 strcmp(field->name, "path") == 0 || 1218 strcmp(field->name, "pathname") == 0)) 1219 sc->arg_fmt[idx].scnprintf = SCA_FILENAME; 1220 else if (field->flags & FIELD_IS_POINTER) 1221 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex; 1222 else if (strcmp(field->type, "pid_t") == 0) 1223 sc->arg_fmt[idx].scnprintf = SCA_PID; 1224 else if (strcmp(field->type, "umode_t") == 0) 1225 sc->arg_fmt[idx].scnprintf = SCA_MODE_T; 1226 else if ((strcmp(field->type, "int") == 0 || 1227 strcmp(field->type, "unsigned int") == 0 || 1228 strcmp(field->type, "long") == 0) && 1229 (len = strlen(field->name)) >= 2 && 1230 strcmp(field->name + len - 2, "fd") == 0) { 1231 /* 1232 * /sys/kernel/tracing/events/syscalls/sys_enter* 1233 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c 1234 * 65 int 1235 * 23 unsigned int 1236 * 7 unsigned long 1237 */ 1238 sc->arg_fmt[idx].scnprintf = SCA_FD; 1239 } 1240 } 1241 1242 return 0; 1243 } 1244 1245 static int trace__read_syscall_info(struct trace *trace, int id) 1246 { 1247 char tp_name[128]; 1248 struct syscall *sc; 1249 const char *name = syscalltbl__name(trace->sctbl, id); 1250 1251 if (name == NULL) 1252 return -1; 1253 1254 if (id > trace->syscalls.max) { 1255 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1256 1257 if (nsyscalls == NULL) 1258 return -1; 1259 1260 if (trace->syscalls.max != -1) { 1261 memset(nsyscalls + trace->syscalls.max + 1, 0, 1262 (id - trace->syscalls.max) * sizeof(*sc)); 1263 } else { 1264 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 1265 } 1266 1267 trace->syscalls.table = nsyscalls; 1268 trace->syscalls.max = id; 1269 } 1270 1271 sc = trace->syscalls.table + id; 1272 sc->name = name; 1273 1274 sc->fmt = syscall_fmt__find(sc->name); 1275 1276 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1277 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1278 1279 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) { 1280 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 1281 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1282 } 1283 1284 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields)) 1285 return -1; 1286 1287 if (IS_ERR(sc->tp_format)) 1288 return -1; 1289 1290 sc->args = sc->tp_format->format.fields; 1291 /* 1292 * We need to check and discard the first variable '__syscall_nr' 1293 * or 'nr' that mean the syscall number. It is needless here. 1294 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels. 1295 */ 1296 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) { 1297 sc->args = sc->args->next; 1298 --sc->nr_args; 1299 } 1300 1301 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); 1302 1303 return syscall__set_arg_fmts(sc); 1304 } 1305 1306 static int trace__validate_ev_qualifier(struct trace *trace) 1307 { 1308 int err = 0, i; 1309 size_t nr_allocated; 1310 struct str_node *pos; 1311 1312 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); 1313 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr * 1314 sizeof(trace->ev_qualifier_ids.entries[0])); 1315 1316 if (trace->ev_qualifier_ids.entries == NULL) { 1317 fputs("Error:\tNot enough memory for allocating events qualifier ids\n", 1318 trace->output); 1319 err = -EINVAL; 1320 goto out; 1321 } 1322 1323 nr_allocated = trace->ev_qualifier_ids.nr; 1324 i = 0; 1325 1326 strlist__for_each_entry(pos, trace->ev_qualifier) { 1327 const char *sc = pos->s; 1328 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1; 1329 1330 if (id < 0) { 1331 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next); 1332 if (id >= 0) 1333 goto matches; 1334 1335 if (err == 0) { 1336 fputs("Error:\tInvalid syscall ", trace->output); 1337 err = -EINVAL; 1338 } else { 1339 fputs(", ", trace->output); 1340 } 1341 1342 fputs(sc, trace->output); 1343 } 1344 matches: 1345 trace->ev_qualifier_ids.entries[i++] = id; 1346 if (match_next == -1) 1347 continue; 1348 1349 while (1) { 1350 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next); 1351 if (id < 0) 1352 break; 1353 if (nr_allocated == trace->ev_qualifier_ids.nr) { 1354 void *entries; 1355 1356 nr_allocated += 8; 1357 entries = realloc(trace->ev_qualifier_ids.entries, 1358 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0])); 1359 if (entries == NULL) { 1360 err = -ENOMEM; 1361 fputs("\nError:\t Not enough memory for parsing\n", trace->output); 1362 goto out_free; 1363 } 1364 trace->ev_qualifier_ids.entries = entries; 1365 } 1366 trace->ev_qualifier_ids.nr++; 1367 trace->ev_qualifier_ids.entries[i++] = id; 1368 } 1369 } 1370 1371 if (err < 0) { 1372 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" 1373 "\nHint:\tand: 'man syscalls'\n", trace->output); 1374 out_free: 1375 zfree(&trace->ev_qualifier_ids.entries); 1376 trace->ev_qualifier_ids.nr = 0; 1377 } 1378 out: 1379 return err; 1380 } 1381 1382 /* 1383 * args is to be interpreted as a series of longs but we need to handle 1384 * 8-byte unaligned accesses. args points to raw_data within the event 1385 * and raw_data is guaranteed to be 8-byte unaligned because it is 1386 * preceded by raw_size which is a u32. So we need to copy args to a temp 1387 * variable to read it. Most notably this avoids extended load instructions 1388 * on unaligned addresses 1389 */ 1390 unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx) 1391 { 1392 unsigned long val; 1393 unsigned char *p = arg->args + sizeof(unsigned long) * idx; 1394 1395 memcpy(&val, p, sizeof(val)); 1396 return val; 1397 } 1398 1399 static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size, 1400 struct syscall_arg *arg) 1401 { 1402 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name) 1403 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name); 1404 1405 return scnprintf(bf, size, "arg%d: ", arg->idx); 1406 } 1407 1408 static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, 1409 struct syscall_arg *arg, unsigned long val) 1410 { 1411 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) { 1412 arg->val = val; 1413 if (sc->arg_fmt[arg->idx].parm) 1414 arg->parm = sc->arg_fmt[arg->idx].parm; 1415 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg); 1416 } 1417 return scnprintf(bf, size, "%ld", val); 1418 } 1419 1420 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 1421 unsigned char *args, struct trace *trace, 1422 struct thread *thread) 1423 { 1424 size_t printed = 0; 1425 unsigned long val; 1426 u8 bit = 1; 1427 struct syscall_arg arg = { 1428 .args = args, 1429 .idx = 0, 1430 .mask = 0, 1431 .trace = trace, 1432 .thread = thread, 1433 }; 1434 struct thread_trace *ttrace = thread__priv(thread); 1435 1436 /* 1437 * Things like fcntl will set this in its 'cmd' formatter to pick the 1438 * right formatter for the return value (an fd? file flags?), which is 1439 * not needed for syscalls that always return a given type, say an fd. 1440 */ 1441 ttrace->ret_scnprintf = NULL; 1442 1443 if (sc->args != NULL) { 1444 struct format_field *field; 1445 1446 for (field = sc->args; field; 1447 field = field->next, ++arg.idx, bit <<= 1) { 1448 if (arg.mask & bit) 1449 continue; 1450 1451 val = syscall_arg__val(&arg, arg.idx); 1452 1453 /* 1454 * Suppress this argument if its value is zero and 1455 * and we don't have a string associated in an 1456 * strarray for it. 1457 */ 1458 if (val == 0 && 1459 !(sc->arg_fmt && 1460 (sc->arg_fmt[arg.idx].show_zero || 1461 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY || 1462 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) && 1463 sc->arg_fmt[arg.idx].parm)) 1464 continue; 1465 1466 printed += scnprintf(bf + printed, size - printed, 1467 "%s%s: ", printed ? ", " : "", field->name); 1468 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); 1469 } 1470 } else if (IS_ERR(sc->tp_format)) { 1471 /* 1472 * If we managed to read the tracepoint /format file, then we 1473 * may end up not having any args, like with gettid(), so only 1474 * print the raw args when we didn't manage to read it. 1475 */ 1476 while (arg.idx < sc->nr_args) { 1477 if (arg.mask & bit) 1478 goto next_arg; 1479 val = syscall_arg__val(&arg, arg.idx); 1480 if (printed) 1481 printed += scnprintf(bf + printed, size - printed, ", "); 1482 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg); 1483 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); 1484 next_arg: 1485 ++arg.idx; 1486 bit <<= 1; 1487 } 1488 } 1489 1490 return printed; 1491 } 1492 1493 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 1494 union perf_event *event, 1495 struct perf_sample *sample); 1496 1497 static struct syscall *trace__syscall_info(struct trace *trace, 1498 struct perf_evsel *evsel, int id) 1499 { 1500 1501 if (id < 0) { 1502 1503 /* 1504 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 1505 * before that, leaving at a higher verbosity level till that is 1506 * explained. Reproduced with plain ftrace with: 1507 * 1508 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 1509 * grep "NR -1 " /t/trace_pipe 1510 * 1511 * After generating some load on the machine. 1512 */ 1513 if (verbose > 1) { 1514 static u64 n; 1515 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 1516 id, perf_evsel__name(evsel), ++n); 1517 } 1518 return NULL; 1519 } 1520 1521 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 1522 trace__read_syscall_info(trace, id)) 1523 goto out_cant_read; 1524 1525 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 1526 goto out_cant_read; 1527 1528 return &trace->syscalls.table[id]; 1529 1530 out_cant_read: 1531 if (verbose > 0) { 1532 fprintf(trace->output, "Problems reading syscall %d", id); 1533 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 1534 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 1535 fputs(" information\n", trace->output); 1536 } 1537 return NULL; 1538 } 1539 1540 static void thread__update_stats(struct thread_trace *ttrace, 1541 int id, struct perf_sample *sample) 1542 { 1543 struct int_node *inode; 1544 struct stats *stats; 1545 u64 duration = 0; 1546 1547 inode = intlist__findnew(ttrace->syscall_stats, id); 1548 if (inode == NULL) 1549 return; 1550 1551 stats = inode->priv; 1552 if (stats == NULL) { 1553 stats = malloc(sizeof(struct stats)); 1554 if (stats == NULL) 1555 return; 1556 init_stats(stats); 1557 inode->priv = stats; 1558 } 1559 1560 if (ttrace->entry_time && sample->time > ttrace->entry_time) 1561 duration = sample->time - ttrace->entry_time; 1562 1563 update_stats(stats, duration); 1564 } 1565 1566 static int trace__printf_interrupted_entry(struct trace *trace) 1567 { 1568 struct thread_trace *ttrace; 1569 size_t printed; 1570 1571 if (trace->failure_only || trace->current == NULL) 1572 return 0; 1573 1574 ttrace = thread__priv(trace->current); 1575 1576 if (!ttrace->entry_pending) 1577 return 0; 1578 1579 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output); 1580 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); 1581 ttrace->entry_pending = false; 1582 1583 return printed; 1584 } 1585 1586 static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, 1587 struct perf_sample *sample, struct thread *thread) 1588 { 1589 int printed = 0; 1590 1591 if (trace->print_sample) { 1592 double ts = (double)sample->time / NSEC_PER_MSEC; 1593 1594 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n", 1595 perf_evsel__name(evsel), ts, 1596 thread__comm_str(thread), 1597 sample->pid, sample->tid, sample->cpu); 1598 } 1599 1600 return printed; 1601 } 1602 1603 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 1604 union perf_event *event __maybe_unused, 1605 struct perf_sample *sample) 1606 { 1607 char *msg; 1608 void *args; 1609 size_t printed = 0; 1610 struct thread *thread; 1611 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1612 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1613 struct thread_trace *ttrace; 1614 1615 if (sc == NULL) 1616 return -1; 1617 1618 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1619 ttrace = thread__trace(thread, trace->output); 1620 if (ttrace == NULL) 1621 goto out_put; 1622 1623 trace__fprintf_sample(trace, evsel, sample, thread); 1624 1625 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1626 1627 if (ttrace->entry_str == NULL) { 1628 ttrace->entry_str = malloc(trace__entry_str_size); 1629 if (!ttrace->entry_str) 1630 goto out_put; 1631 } 1632 1633 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) 1634 trace__printf_interrupted_entry(trace); 1635 1636 ttrace->entry_time = sample->time; 1637 msg = ttrace->entry_str; 1638 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); 1639 1640 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed, 1641 args, trace, thread); 1642 1643 if (sc->is_exit) { 1644 if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) { 1645 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output); 1646 fprintf(trace->output, "%-70s)\n", ttrace->entry_str); 1647 } 1648 } else { 1649 ttrace->entry_pending = true; 1650 /* See trace__vfs_getname & trace__sys_exit */ 1651 ttrace->filename.pending_open = false; 1652 } 1653 1654 if (trace->current != thread) { 1655 thread__put(trace->current); 1656 trace->current = thread__get(thread); 1657 } 1658 err = 0; 1659 out_put: 1660 thread__put(thread); 1661 return err; 1662 } 1663 1664 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel, 1665 struct perf_sample *sample, 1666 struct callchain_cursor *cursor) 1667 { 1668 struct addr_location al; 1669 int max_stack = evsel->attr.sample_max_stack ? 1670 evsel->attr.sample_max_stack : 1671 trace->max_stack; 1672 1673 if (machine__resolve(trace->host, &al, sample) < 0 || 1674 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack)) 1675 return -1; 1676 1677 return 0; 1678 } 1679 1680 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample) 1681 { 1682 /* TODO: user-configurable print_opts */ 1683 const unsigned int print_opts = EVSEL__PRINT_SYM | 1684 EVSEL__PRINT_DSO | 1685 EVSEL__PRINT_UNKNOWN_AS_ADDR; 1686 1687 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); 1688 } 1689 1690 static const char *errno_to_name(struct perf_evsel *evsel, int err) 1691 { 1692 struct perf_env *env = perf_evsel__env(evsel); 1693 const char *arch_name = perf_env__arch(env); 1694 1695 return arch_syscalls__strerrno(arch_name, err); 1696 } 1697 1698 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1699 union perf_event *event __maybe_unused, 1700 struct perf_sample *sample) 1701 { 1702 long ret; 1703 u64 duration = 0; 1704 bool duration_calculated = false; 1705 struct thread *thread; 1706 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0; 1707 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1708 struct thread_trace *ttrace; 1709 1710 if (sc == NULL) 1711 return -1; 1712 1713 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1714 ttrace = thread__trace(thread, trace->output); 1715 if (ttrace == NULL) 1716 goto out_put; 1717 1718 trace__fprintf_sample(trace, evsel, sample, thread); 1719 1720 if (trace->summary) 1721 thread__update_stats(ttrace, id, sample); 1722 1723 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 1724 1725 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) { 1726 trace__set_fd_pathname(thread, ret, ttrace->filename.name); 1727 ttrace->filename.pending_open = false; 1728 ++trace->stats.vfs_getname; 1729 } 1730 1731 if (ttrace->entry_time) { 1732 duration = sample->time - ttrace->entry_time; 1733 if (trace__filter_duration(trace, duration)) 1734 goto out; 1735 duration_calculated = true; 1736 } else if (trace->duration_filter) 1737 goto out; 1738 1739 if (sample->callchain) { 1740 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 1741 if (callchain_ret == 0) { 1742 if (callchain_cursor.nr < trace->min_stack) 1743 goto out; 1744 callchain_ret = 1; 1745 } 1746 } 1747 1748 if (trace->summary_only || (ret >= 0 && trace->failure_only)) 1749 goto out; 1750 1751 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output); 1752 1753 if (ttrace->entry_pending) { 1754 fprintf(trace->output, "%-70s", ttrace->entry_str); 1755 } else { 1756 fprintf(trace->output, " ... ["); 1757 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 1758 fprintf(trace->output, "]: %s()", sc->name); 1759 } 1760 1761 if (sc->fmt == NULL) { 1762 if (ret < 0) 1763 goto errno_print; 1764 signed_print: 1765 fprintf(trace->output, ") = %ld", ret); 1766 } else if (ret < 0) { 1767 errno_print: { 1768 char bf[STRERR_BUFSIZE]; 1769 const char *emsg = str_error_r(-ret, bf, sizeof(bf)), 1770 *e = errno_to_name(evsel, -ret); 1771 1772 fprintf(trace->output, ") = -1 %s %s", e, emsg); 1773 } 1774 } else if (ret == 0 && sc->fmt->timeout) 1775 fprintf(trace->output, ") = 0 Timeout"); 1776 else if (ttrace->ret_scnprintf) { 1777 char bf[1024]; 1778 struct syscall_arg arg = { 1779 .val = ret, 1780 .thread = thread, 1781 .trace = trace, 1782 }; 1783 ttrace->ret_scnprintf(bf, sizeof(bf), &arg); 1784 ttrace->ret_scnprintf = NULL; 1785 fprintf(trace->output, ") = %s", bf); 1786 } else if (sc->fmt->hexret) 1787 fprintf(trace->output, ") = %#lx", ret); 1788 else if (sc->fmt->errpid) { 1789 struct thread *child = machine__find_thread(trace->host, ret, ret); 1790 1791 if (child != NULL) { 1792 fprintf(trace->output, ") = %ld", ret); 1793 if (child->comm_set) 1794 fprintf(trace->output, " (%s)", thread__comm_str(child)); 1795 thread__put(child); 1796 } 1797 } else 1798 goto signed_print; 1799 1800 fputc('\n', trace->output); 1801 1802 if (callchain_ret > 0) 1803 trace__fprintf_callchain(trace, sample); 1804 else if (callchain_ret < 0) 1805 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 1806 out: 1807 ttrace->entry_pending = false; 1808 err = 0; 1809 out_put: 1810 thread__put(thread); 1811 return err; 1812 } 1813 1814 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 1815 union perf_event *event __maybe_unused, 1816 struct perf_sample *sample) 1817 { 1818 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1819 struct thread_trace *ttrace; 1820 size_t filename_len, entry_str_len, to_move; 1821 ssize_t remaining_space; 1822 char *pos; 1823 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname"); 1824 1825 if (!thread) 1826 goto out; 1827 1828 ttrace = thread__priv(thread); 1829 if (!ttrace) 1830 goto out_put; 1831 1832 filename_len = strlen(filename); 1833 if (filename_len == 0) 1834 goto out_put; 1835 1836 if (ttrace->filename.namelen < filename_len) { 1837 char *f = realloc(ttrace->filename.name, filename_len + 1); 1838 1839 if (f == NULL) 1840 goto out_put; 1841 1842 ttrace->filename.namelen = filename_len; 1843 ttrace->filename.name = f; 1844 } 1845 1846 strcpy(ttrace->filename.name, filename); 1847 ttrace->filename.pending_open = true; 1848 1849 if (!ttrace->filename.ptr) 1850 goto out_put; 1851 1852 entry_str_len = strlen(ttrace->entry_str); 1853 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */ 1854 if (remaining_space <= 0) 1855 goto out_put; 1856 1857 if (filename_len > (size_t)remaining_space) { 1858 filename += filename_len - remaining_space; 1859 filename_len = remaining_space; 1860 } 1861 1862 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */ 1863 pos = ttrace->entry_str + ttrace->filename.entry_str_pos; 1864 memmove(pos + filename_len, pos, to_move); 1865 memcpy(pos, filename, filename_len); 1866 1867 ttrace->filename.ptr = 0; 1868 ttrace->filename.entry_str_pos = 0; 1869 out_put: 1870 thread__put(thread); 1871 out: 1872 return 0; 1873 } 1874 1875 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 1876 union perf_event *event __maybe_unused, 1877 struct perf_sample *sample) 1878 { 1879 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1880 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 1881 struct thread *thread = machine__findnew_thread(trace->host, 1882 sample->pid, 1883 sample->tid); 1884 struct thread_trace *ttrace = thread__trace(thread, trace->output); 1885 1886 if (ttrace == NULL) 1887 goto out_dump; 1888 1889 ttrace->runtime_ms += runtime_ms; 1890 trace->runtime_ms += runtime_ms; 1891 out_put: 1892 thread__put(thread); 1893 return 0; 1894 1895 out_dump: 1896 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 1897 evsel->name, 1898 perf_evsel__strval(evsel, sample, "comm"), 1899 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1900 runtime, 1901 perf_evsel__intval(evsel, sample, "vruntime")); 1902 goto out_put; 1903 } 1904 1905 static int bpf_output__printer(enum binary_printer_ops op, 1906 unsigned int val, void *extra __maybe_unused, FILE *fp) 1907 { 1908 unsigned char ch = (unsigned char)val; 1909 1910 switch (op) { 1911 case BINARY_PRINT_CHAR_DATA: 1912 return fprintf(fp, "%c", isprint(ch) ? ch : '.'); 1913 case BINARY_PRINT_DATA_BEGIN: 1914 case BINARY_PRINT_LINE_BEGIN: 1915 case BINARY_PRINT_ADDR: 1916 case BINARY_PRINT_NUM_DATA: 1917 case BINARY_PRINT_NUM_PAD: 1918 case BINARY_PRINT_SEP: 1919 case BINARY_PRINT_CHAR_PAD: 1920 case BINARY_PRINT_LINE_END: 1921 case BINARY_PRINT_DATA_END: 1922 default: 1923 break; 1924 } 1925 1926 return 0; 1927 } 1928 1929 static void bpf_output__fprintf(struct trace *trace, 1930 struct perf_sample *sample) 1931 { 1932 binary__fprintf(sample->raw_data, sample->raw_size, 8, 1933 bpf_output__printer, NULL, trace->output); 1934 } 1935 1936 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, 1937 union perf_event *event __maybe_unused, 1938 struct perf_sample *sample) 1939 { 1940 int callchain_ret = 0; 1941 1942 if (sample->callchain) { 1943 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 1944 if (callchain_ret == 0) { 1945 if (callchain_cursor.nr < trace->min_stack) 1946 goto out; 1947 callchain_ret = 1; 1948 } 1949 } 1950 1951 trace__printf_interrupted_entry(trace); 1952 trace__fprintf_tstamp(trace, sample->time, trace->output); 1953 1954 if (trace->trace_syscalls) 1955 fprintf(trace->output, "( ): "); 1956 1957 fprintf(trace->output, "%s:", evsel->name); 1958 1959 if (perf_evsel__is_bpf_output(evsel)) { 1960 bpf_output__fprintf(trace, sample); 1961 } else if (evsel->tp_format) { 1962 event_format__fprintf(evsel->tp_format, sample->cpu, 1963 sample->raw_data, sample->raw_size, 1964 trace->output); 1965 } 1966 1967 fprintf(trace->output, "\n"); 1968 1969 if (callchain_ret > 0) 1970 trace__fprintf_callchain(trace, sample); 1971 else if (callchain_ret < 0) 1972 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 1973 out: 1974 return 0; 1975 } 1976 1977 static void print_location(FILE *f, struct perf_sample *sample, 1978 struct addr_location *al, 1979 bool print_dso, bool print_sym) 1980 { 1981 1982 if ((verbose > 0 || print_dso) && al->map) 1983 fprintf(f, "%s@", al->map->dso->long_name); 1984 1985 if ((verbose > 0 || print_sym) && al->sym) 1986 fprintf(f, "%s+0x%" PRIx64, al->sym->name, 1987 al->addr - al->sym->start); 1988 else if (al->map) 1989 fprintf(f, "0x%" PRIx64, al->addr); 1990 else 1991 fprintf(f, "0x%" PRIx64, sample->addr); 1992 } 1993 1994 static int trace__pgfault(struct trace *trace, 1995 struct perf_evsel *evsel, 1996 union perf_event *event __maybe_unused, 1997 struct perf_sample *sample) 1998 { 1999 struct thread *thread; 2000 struct addr_location al; 2001 char map_type = 'd'; 2002 struct thread_trace *ttrace; 2003 int err = -1; 2004 int callchain_ret = 0; 2005 2006 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 2007 2008 if (sample->callchain) { 2009 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 2010 if (callchain_ret == 0) { 2011 if (callchain_cursor.nr < trace->min_stack) 2012 goto out_put; 2013 callchain_ret = 1; 2014 } 2015 } 2016 2017 ttrace = thread__trace(thread, trace->output); 2018 if (ttrace == NULL) 2019 goto out_put; 2020 2021 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) 2022 ttrace->pfmaj++; 2023 else 2024 ttrace->pfmin++; 2025 2026 if (trace->summary_only) 2027 goto out; 2028 2029 thread__find_symbol(thread, sample->cpumode, sample->ip, &al); 2030 2031 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); 2032 2033 fprintf(trace->output, "%sfault [", 2034 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? 2035 "maj" : "min"); 2036 2037 print_location(trace->output, sample, &al, false, true); 2038 2039 fprintf(trace->output, "] => "); 2040 2041 thread__find_symbol(thread, sample->cpumode, sample->addr, &al); 2042 2043 if (!al.map) { 2044 thread__find_symbol(thread, sample->cpumode, sample->addr, &al); 2045 2046 if (al.map) 2047 map_type = 'x'; 2048 else 2049 map_type = '?'; 2050 } 2051 2052 print_location(trace->output, sample, &al, true, false); 2053 2054 fprintf(trace->output, " (%c%c)\n", map_type, al.level); 2055 2056 if (callchain_ret > 0) 2057 trace__fprintf_callchain(trace, sample); 2058 else if (callchain_ret < 0) 2059 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 2060 out: 2061 err = 0; 2062 out_put: 2063 thread__put(thread); 2064 return err; 2065 } 2066 2067 static void trace__set_base_time(struct trace *trace, 2068 struct perf_evsel *evsel, 2069 struct perf_sample *sample) 2070 { 2071 /* 2072 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust 2073 * and don't use sample->time unconditionally, we may end up having 2074 * some other event in the future without PERF_SAMPLE_TIME for good 2075 * reason, i.e. we may not be interested in its timestamps, just in 2076 * it taking place, picking some piece of information when it 2077 * appears in our event stream (vfs_getname comes to mind). 2078 */ 2079 if (trace->base_time == 0 && !trace->full_time && 2080 (evsel->attr.sample_type & PERF_SAMPLE_TIME)) 2081 trace->base_time = sample->time; 2082 } 2083 2084 static int trace__process_sample(struct perf_tool *tool, 2085 union perf_event *event, 2086 struct perf_sample *sample, 2087 struct perf_evsel *evsel, 2088 struct machine *machine __maybe_unused) 2089 { 2090 struct trace *trace = container_of(tool, struct trace, tool); 2091 struct thread *thread; 2092 int err = 0; 2093 2094 tracepoint_handler handler = evsel->handler; 2095 2096 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 2097 if (thread && thread__is_filtered(thread)) 2098 goto out; 2099 2100 trace__set_base_time(trace, evsel, sample); 2101 2102 if (handler) { 2103 ++trace->nr_events; 2104 handler(trace, evsel, event, sample); 2105 } 2106 out: 2107 thread__put(thread); 2108 return err; 2109 } 2110 2111 static int trace__record(struct trace *trace, int argc, const char **argv) 2112 { 2113 unsigned int rec_argc, i, j; 2114 const char **rec_argv; 2115 const char * const record_args[] = { 2116 "record", 2117 "-R", 2118 "-m", "1024", 2119 "-c", "1", 2120 }; 2121 2122 const char * const sc_args[] = { "-e", }; 2123 unsigned int sc_args_nr = ARRAY_SIZE(sc_args); 2124 const char * const majpf_args[] = { "-e", "major-faults" }; 2125 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args); 2126 const char * const minpf_args[] = { "-e", "minor-faults" }; 2127 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args); 2128 2129 /* +1 is for the event string below */ 2130 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 + 2131 majpf_args_nr + minpf_args_nr + argc; 2132 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2133 2134 if (rec_argv == NULL) 2135 return -ENOMEM; 2136 2137 j = 0; 2138 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2139 rec_argv[j++] = record_args[i]; 2140 2141 if (trace->trace_syscalls) { 2142 for (i = 0; i < sc_args_nr; i++) 2143 rec_argv[j++] = sc_args[i]; 2144 2145 /* event string may be different for older kernels - e.g., RHEL6 */ 2146 if (is_valid_tracepoint("raw_syscalls:sys_enter")) 2147 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; 2148 else if (is_valid_tracepoint("syscalls:sys_enter")) 2149 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; 2150 else { 2151 pr_err("Neither raw_syscalls nor syscalls events exist.\n"); 2152 free(rec_argv); 2153 return -1; 2154 } 2155 } 2156 2157 if (trace->trace_pgfaults & TRACE_PFMAJ) 2158 for (i = 0; i < majpf_args_nr; i++) 2159 rec_argv[j++] = majpf_args[i]; 2160 2161 if (trace->trace_pgfaults & TRACE_PFMIN) 2162 for (i = 0; i < minpf_args_nr; i++) 2163 rec_argv[j++] = minpf_args[i]; 2164 2165 for (i = 0; i < (unsigned int)argc; i++) 2166 rec_argv[j++] = argv[i]; 2167 2168 return cmd_record(j, rec_argv); 2169 } 2170 2171 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 2172 2173 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 2174 { 2175 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); 2176 2177 if (IS_ERR(evsel)) 2178 return false; 2179 2180 if (perf_evsel__field(evsel, "pathname") == NULL) { 2181 perf_evsel__delete(evsel); 2182 return false; 2183 } 2184 2185 evsel->handler = trace__vfs_getname; 2186 perf_evlist__add(evlist, evsel); 2187 return true; 2188 } 2189 2190 static struct perf_evsel *perf_evsel__new_pgfault(u64 config) 2191 { 2192 struct perf_evsel *evsel; 2193 struct perf_event_attr attr = { 2194 .type = PERF_TYPE_SOFTWARE, 2195 .mmap_data = 1, 2196 }; 2197 2198 attr.config = config; 2199 attr.sample_period = 1; 2200 2201 event_attr_init(&attr); 2202 2203 evsel = perf_evsel__new(&attr); 2204 if (evsel) 2205 evsel->handler = trace__pgfault; 2206 2207 return evsel; 2208 } 2209 2210 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) 2211 { 2212 const u32 type = event->header.type; 2213 struct perf_evsel *evsel; 2214 2215 if (type != PERF_RECORD_SAMPLE) { 2216 trace__process_event(trace, trace->host, event, sample); 2217 return; 2218 } 2219 2220 evsel = perf_evlist__id2evsel(trace->evlist, sample->id); 2221 if (evsel == NULL) { 2222 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); 2223 return; 2224 } 2225 2226 trace__set_base_time(trace, evsel, sample); 2227 2228 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 2229 sample->raw_data == NULL) { 2230 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 2231 perf_evsel__name(evsel), sample->tid, 2232 sample->cpu, sample->raw_size); 2233 } else { 2234 tracepoint_handler handler = evsel->handler; 2235 handler(trace, evsel, event, sample); 2236 } 2237 } 2238 2239 static int trace__add_syscall_newtp(struct trace *trace) 2240 { 2241 int ret = -1; 2242 struct perf_evlist *evlist = trace->evlist; 2243 struct perf_evsel *sys_enter, *sys_exit; 2244 2245 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter); 2246 if (sys_enter == NULL) 2247 goto out; 2248 2249 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) 2250 goto out_delete_sys_enter; 2251 2252 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit); 2253 if (sys_exit == NULL) 2254 goto out_delete_sys_enter; 2255 2256 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) 2257 goto out_delete_sys_exit; 2258 2259 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); 2260 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); 2261 2262 perf_evlist__add(evlist, sys_enter); 2263 perf_evlist__add(evlist, sys_exit); 2264 2265 if (callchain_param.enabled && !trace->kernel_syscallchains) { 2266 /* 2267 * We're interested only in the user space callchain 2268 * leading to the syscall, allow overriding that for 2269 * debugging reasons using --kernel_syscall_callchains 2270 */ 2271 sys_exit->attr.exclude_callchain_kernel = 1; 2272 } 2273 2274 trace->syscalls.events.sys_enter = sys_enter; 2275 trace->syscalls.events.sys_exit = sys_exit; 2276 2277 ret = 0; 2278 out: 2279 return ret; 2280 2281 out_delete_sys_exit: 2282 perf_evsel__delete_priv(sys_exit); 2283 out_delete_sys_enter: 2284 perf_evsel__delete_priv(sys_enter); 2285 goto out; 2286 } 2287 2288 static int trace__set_ev_qualifier_filter(struct trace *trace) 2289 { 2290 int err = -1; 2291 struct perf_evsel *sys_exit; 2292 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, 2293 trace->ev_qualifier_ids.nr, 2294 trace->ev_qualifier_ids.entries); 2295 2296 if (filter == NULL) 2297 goto out_enomem; 2298 2299 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter, 2300 filter)) { 2301 sys_exit = trace->syscalls.events.sys_exit; 2302 err = perf_evsel__append_tp_filter(sys_exit, filter); 2303 } 2304 2305 free(filter); 2306 out: 2307 return err; 2308 out_enomem: 2309 errno = ENOMEM; 2310 goto out; 2311 } 2312 2313 static int trace__set_filter_loop_pids(struct trace *trace) 2314 { 2315 unsigned int nr = 1; 2316 pid_t pids[32] = { 2317 getpid(), 2318 }; 2319 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]); 2320 2321 while (thread && nr < ARRAY_SIZE(pids)) { 2322 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid); 2323 2324 if (parent == NULL) 2325 break; 2326 2327 if (!strcmp(thread__comm_str(parent), "sshd")) { 2328 pids[nr++] = parent->tid; 2329 break; 2330 } 2331 thread = parent; 2332 } 2333 2334 return perf_evlist__set_filter_pids(trace->evlist, nr, pids); 2335 } 2336 2337 static int trace__run(struct trace *trace, int argc, const char **argv) 2338 { 2339 struct perf_evlist *evlist = trace->evlist; 2340 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL; 2341 int err = -1, i; 2342 unsigned long before; 2343 const bool forks = argc > 0; 2344 bool draining = false; 2345 2346 trace->live = true; 2347 2348 if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) 2349 goto out_error_raw_syscalls; 2350 2351 if (trace->trace_syscalls) 2352 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist); 2353 2354 if ((trace->trace_pgfaults & TRACE_PFMAJ)) { 2355 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); 2356 if (pgfault_maj == NULL) 2357 goto out_error_mem; 2358 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); 2359 perf_evlist__add(evlist, pgfault_maj); 2360 } 2361 2362 if ((trace->trace_pgfaults & TRACE_PFMIN)) { 2363 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); 2364 if (pgfault_min == NULL) 2365 goto out_error_mem; 2366 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); 2367 perf_evlist__add(evlist, pgfault_min); 2368 } 2369 2370 if (trace->sched && 2371 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 2372 trace__sched_stat_runtime)) 2373 goto out_error_sched_stat_runtime; 2374 2375 /* 2376 * If a global cgroup was set, apply it to all the events without an 2377 * explicit cgroup. I.e.: 2378 * 2379 * trace -G A -e sched:*switch 2380 * 2381 * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc 2382 * _and_ sched:sched_switch to the 'A' cgroup, while: 2383 * 2384 * trace -e sched:*switch -G A 2385 * 2386 * will only set the sched:sched_switch event to the 'A' cgroup, all the 2387 * other events (raw_syscalls:sys_{enter,exit}, etc are left "without" 2388 * a cgroup (on the root cgroup, sys wide, etc). 2389 * 2390 * Multiple cgroups: 2391 * 2392 * trace -G A -e sched:*switch -G B 2393 * 2394 * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes 2395 * to the 'B' cgroup. 2396 * 2397 * evlist__set_default_cgroup() grabs a reference of the passed cgroup 2398 * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL. 2399 */ 2400 if (trace->cgroup) 2401 evlist__set_default_cgroup(trace->evlist, trace->cgroup); 2402 2403 err = perf_evlist__create_maps(evlist, &trace->opts.target); 2404 if (err < 0) { 2405 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 2406 goto out_delete_evlist; 2407 } 2408 2409 err = trace__symbols_init(trace, evlist); 2410 if (err < 0) { 2411 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 2412 goto out_delete_evlist; 2413 } 2414 2415 perf_evlist__config(evlist, &trace->opts, &callchain_param); 2416 2417 signal(SIGCHLD, sig_handler); 2418 signal(SIGINT, sig_handler); 2419 2420 if (forks) { 2421 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 2422 argv, false, NULL); 2423 if (err < 0) { 2424 fprintf(trace->output, "Couldn't run the workload!\n"); 2425 goto out_delete_evlist; 2426 } 2427 } 2428 2429 err = perf_evlist__open(evlist); 2430 if (err < 0) 2431 goto out_error_open; 2432 2433 err = bpf__apply_obj_config(); 2434 if (err) { 2435 char errbuf[BUFSIZ]; 2436 2437 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 2438 pr_err("ERROR: Apply config to BPF failed: %s\n", 2439 errbuf); 2440 goto out_error_open; 2441 } 2442 2443 /* 2444 * Better not use !target__has_task() here because we need to cover the 2445 * case where no threads were specified in the command line, but a 2446 * workload was, and in that case we will fill in the thread_map when 2447 * we fork the workload in perf_evlist__prepare_workload. 2448 */ 2449 if (trace->filter_pids.nr > 0) 2450 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); 2451 else if (thread_map__pid(evlist->threads, 0) == -1) 2452 err = trace__set_filter_loop_pids(trace); 2453 2454 if (err < 0) 2455 goto out_error_mem; 2456 2457 if (trace->ev_qualifier_ids.nr > 0) { 2458 err = trace__set_ev_qualifier_filter(trace); 2459 if (err < 0) 2460 goto out_errno; 2461 2462 pr_debug("event qualifier tracepoint filter: %s\n", 2463 trace->syscalls.events.sys_exit->filter); 2464 } 2465 2466 err = perf_evlist__apply_filters(evlist, &evsel); 2467 if (err < 0) 2468 goto out_error_apply_filters; 2469 2470 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages); 2471 if (err < 0) 2472 goto out_error_mmap; 2473 2474 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay) 2475 perf_evlist__enable(evlist); 2476 2477 if (forks) 2478 perf_evlist__start_workload(evlist); 2479 2480 if (trace->opts.initial_delay) { 2481 usleep(trace->opts.initial_delay * 1000); 2482 perf_evlist__enable(evlist); 2483 } 2484 2485 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || 2486 evlist->threads->nr > 1 || 2487 perf_evlist__first(evlist)->attr.inherit; 2488 2489 /* 2490 * Now that we already used evsel->attr to ask the kernel to setup the 2491 * events, lets reuse evsel->attr.sample_max_stack as the limit in 2492 * trace__resolve_callchain(), allowing per-event max-stack settings 2493 * to override an explicitely set --max-stack global setting. 2494 */ 2495 evlist__for_each_entry(evlist, evsel) { 2496 if (evsel__has_callchain(evsel) && 2497 evsel->attr.sample_max_stack == 0) 2498 evsel->attr.sample_max_stack = trace->max_stack; 2499 } 2500 again: 2501 before = trace->nr_events; 2502 2503 for (i = 0; i < evlist->nr_mmaps; i++) { 2504 union perf_event *event; 2505 struct perf_mmap *md; 2506 2507 md = &evlist->mmap[i]; 2508 if (perf_mmap__read_init(md) < 0) 2509 continue; 2510 2511 while ((event = perf_mmap__read_event(md)) != NULL) { 2512 struct perf_sample sample; 2513 2514 ++trace->nr_events; 2515 2516 err = perf_evlist__parse_sample(evlist, event, &sample); 2517 if (err) { 2518 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 2519 goto next_event; 2520 } 2521 2522 trace__handle_event(trace, event, &sample); 2523 next_event: 2524 perf_mmap__consume(md); 2525 2526 if (interrupted) 2527 goto out_disable; 2528 2529 if (done && !draining) { 2530 perf_evlist__disable(evlist); 2531 draining = true; 2532 } 2533 } 2534 perf_mmap__read_done(md); 2535 } 2536 2537 if (trace->nr_events == before) { 2538 int timeout = done ? 100 : -1; 2539 2540 if (!draining && perf_evlist__poll(evlist, timeout) > 0) { 2541 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0) 2542 draining = true; 2543 2544 goto again; 2545 } 2546 } else { 2547 goto again; 2548 } 2549 2550 out_disable: 2551 thread__zput(trace->current); 2552 2553 perf_evlist__disable(evlist); 2554 2555 if (!err) { 2556 if (trace->summary) 2557 trace__fprintf_thread_summary(trace, trace->output); 2558 2559 if (trace->show_tool_stats) { 2560 fprintf(trace->output, "Stats:\n " 2561 " vfs_getname : %" PRIu64 "\n" 2562 " proc_getname: %" PRIu64 "\n", 2563 trace->stats.vfs_getname, 2564 trace->stats.proc_getname); 2565 } 2566 } 2567 2568 out_delete_evlist: 2569 trace__symbols__exit(trace); 2570 2571 perf_evlist__delete(evlist); 2572 cgroup__put(trace->cgroup); 2573 trace->evlist = NULL; 2574 trace->live = false; 2575 return err; 2576 { 2577 char errbuf[BUFSIZ]; 2578 2579 out_error_sched_stat_runtime: 2580 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); 2581 goto out_error; 2582 2583 out_error_raw_syscalls: 2584 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); 2585 goto out_error; 2586 2587 out_error_mmap: 2588 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); 2589 goto out_error; 2590 2591 out_error_open: 2592 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); 2593 2594 out_error: 2595 fprintf(trace->output, "%s\n", errbuf); 2596 goto out_delete_evlist; 2597 2598 out_error_apply_filters: 2599 fprintf(trace->output, 2600 "Failed to set filter \"%s\" on event %s with %d (%s)\n", 2601 evsel->filter, perf_evsel__name(evsel), errno, 2602 str_error_r(errno, errbuf, sizeof(errbuf))); 2603 goto out_delete_evlist; 2604 } 2605 out_error_mem: 2606 fprintf(trace->output, "Not enough memory to run!\n"); 2607 goto out_delete_evlist; 2608 2609 out_errno: 2610 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno)); 2611 goto out_delete_evlist; 2612 } 2613 2614 static int trace__replay(struct trace *trace) 2615 { 2616 const struct perf_evsel_str_handler handlers[] = { 2617 { "probe:vfs_getname", trace__vfs_getname, }, 2618 }; 2619 struct perf_data data = { 2620 .file = { 2621 .path = input_name, 2622 }, 2623 .mode = PERF_DATA_MODE_READ, 2624 .force = trace->force, 2625 }; 2626 struct perf_session *session; 2627 struct perf_evsel *evsel; 2628 int err = -1; 2629 2630 trace->tool.sample = trace__process_sample; 2631 trace->tool.mmap = perf_event__process_mmap; 2632 trace->tool.mmap2 = perf_event__process_mmap2; 2633 trace->tool.comm = perf_event__process_comm; 2634 trace->tool.exit = perf_event__process_exit; 2635 trace->tool.fork = perf_event__process_fork; 2636 trace->tool.attr = perf_event__process_attr; 2637 trace->tool.tracing_data = perf_event__process_tracing_data; 2638 trace->tool.build_id = perf_event__process_build_id; 2639 trace->tool.namespaces = perf_event__process_namespaces; 2640 2641 trace->tool.ordered_events = true; 2642 trace->tool.ordering_requires_timestamps = true; 2643 2644 /* add tid to output */ 2645 trace->multiple_threads = true; 2646 2647 session = perf_session__new(&data, false, &trace->tool); 2648 if (session == NULL) 2649 return -1; 2650 2651 if (trace->opts.target.pid) 2652 symbol_conf.pid_list_str = strdup(trace->opts.target.pid); 2653 2654 if (trace->opts.target.tid) 2655 symbol_conf.tid_list_str = strdup(trace->opts.target.tid); 2656 2657 if (symbol__init(&session->header.env) < 0) 2658 goto out; 2659 2660 trace->host = &session->machines.host; 2661 2662 err = perf_session__set_tracepoints_handlers(session, handlers); 2663 if (err) 2664 goto out; 2665 2666 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2667 "raw_syscalls:sys_enter"); 2668 /* older kernels have syscalls tp versus raw_syscalls */ 2669 if (evsel == NULL) 2670 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2671 "syscalls:sys_enter"); 2672 2673 if (evsel && 2674 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || 2675 perf_evsel__init_sc_tp_ptr_field(evsel, args))) { 2676 pr_err("Error during initialize raw_syscalls:sys_enter event\n"); 2677 goto out; 2678 } 2679 2680 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2681 "raw_syscalls:sys_exit"); 2682 if (evsel == NULL) 2683 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2684 "syscalls:sys_exit"); 2685 if (evsel && 2686 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || 2687 perf_evsel__init_sc_tp_uint_field(evsel, ret))) { 2688 pr_err("Error during initialize raw_syscalls:sys_exit event\n"); 2689 goto out; 2690 } 2691 2692 evlist__for_each_entry(session->evlist, evsel) { 2693 if (evsel->attr.type == PERF_TYPE_SOFTWARE && 2694 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || 2695 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || 2696 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS)) 2697 evsel->handler = trace__pgfault; 2698 } 2699 2700 setup_pager(); 2701 2702 err = perf_session__process_events(session); 2703 if (err) 2704 pr_err("Failed to process events, error %d", err); 2705 2706 else if (trace->summary) 2707 trace__fprintf_thread_summary(trace, trace->output); 2708 2709 out: 2710 perf_session__delete(session); 2711 2712 return err; 2713 } 2714 2715 static size_t trace__fprintf_threads_header(FILE *fp) 2716 { 2717 size_t printed; 2718 2719 printed = fprintf(fp, "\n Summary of events:\n\n"); 2720 2721 return printed; 2722 } 2723 2724 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs, 2725 struct stats *stats; 2726 double msecs; 2727 int syscall; 2728 ) 2729 { 2730 struct int_node *source = rb_entry(nd, struct int_node, rb_node); 2731 struct stats *stats = source->priv; 2732 2733 entry->syscall = source->i; 2734 entry->stats = stats; 2735 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0; 2736 } 2737 2738 static size_t thread__dump_stats(struct thread_trace *ttrace, 2739 struct trace *trace, FILE *fp) 2740 { 2741 size_t printed = 0; 2742 struct syscall *sc; 2743 struct rb_node *nd; 2744 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats); 2745 2746 if (syscall_stats == NULL) 2747 return 0; 2748 2749 printed += fprintf(fp, "\n"); 2750 2751 printed += fprintf(fp, " syscall calls total min avg max stddev\n"); 2752 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); 2753 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); 2754 2755 resort_rb__for_each_entry(nd, syscall_stats) { 2756 struct stats *stats = syscall_stats_entry->stats; 2757 if (stats) { 2758 double min = (double)(stats->min) / NSEC_PER_MSEC; 2759 double max = (double)(stats->max) / NSEC_PER_MSEC; 2760 double avg = avg_stats(stats); 2761 double pct; 2762 u64 n = (u64) stats->n; 2763 2764 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; 2765 avg /= NSEC_PER_MSEC; 2766 2767 sc = &trace->syscalls.table[syscall_stats_entry->syscall]; 2768 printed += fprintf(fp, " %-15s", sc->name); 2769 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", 2770 n, syscall_stats_entry->msecs, min, avg); 2771 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); 2772 } 2773 } 2774 2775 resort_rb__delete(syscall_stats); 2776 printed += fprintf(fp, "\n\n"); 2777 2778 return printed; 2779 } 2780 2781 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace) 2782 { 2783 size_t printed = 0; 2784 struct thread_trace *ttrace = thread__priv(thread); 2785 double ratio; 2786 2787 if (ttrace == NULL) 2788 return 0; 2789 2790 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 2791 2792 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid); 2793 printed += fprintf(fp, "%lu events, ", ttrace->nr_events); 2794 printed += fprintf(fp, "%.1f%%", ratio); 2795 if (ttrace->pfmaj) 2796 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj); 2797 if (ttrace->pfmin) 2798 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin); 2799 if (trace->sched) 2800 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); 2801 else if (fputc('\n', fp) != EOF) 2802 ++printed; 2803 2804 printed += thread__dump_stats(ttrace, trace, fp); 2805 2806 return printed; 2807 } 2808 2809 static unsigned long thread__nr_events(struct thread_trace *ttrace) 2810 { 2811 return ttrace ? ttrace->nr_events : 0; 2812 } 2813 2814 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)), 2815 struct thread *thread; 2816 ) 2817 { 2818 entry->thread = rb_entry(nd, struct thread, rb_node); 2819 } 2820 2821 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 2822 { 2823 size_t printed = trace__fprintf_threads_header(fp); 2824 struct rb_node *nd; 2825 int i; 2826 2827 for (i = 0; i < THREADS__TABLE_SIZE; i++) { 2828 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i); 2829 2830 if (threads == NULL) { 2831 fprintf(fp, "%s", "Error sorting output by nr_events!\n"); 2832 return 0; 2833 } 2834 2835 resort_rb__for_each_entry(nd, threads) 2836 printed += trace__fprintf_thread(fp, threads_entry->thread, trace); 2837 2838 resort_rb__delete(threads); 2839 } 2840 return printed; 2841 } 2842 2843 static int trace__set_duration(const struct option *opt, const char *str, 2844 int unset __maybe_unused) 2845 { 2846 struct trace *trace = opt->value; 2847 2848 trace->duration_filter = atof(str); 2849 return 0; 2850 } 2851 2852 static int trace__set_filter_pids(const struct option *opt, const char *str, 2853 int unset __maybe_unused) 2854 { 2855 int ret = -1; 2856 size_t i; 2857 struct trace *trace = opt->value; 2858 /* 2859 * FIXME: introduce a intarray class, plain parse csv and create a 2860 * { int nr, int entries[] } struct... 2861 */ 2862 struct intlist *list = intlist__new(str); 2863 2864 if (list == NULL) 2865 return -1; 2866 2867 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; 2868 trace->filter_pids.entries = calloc(i, sizeof(pid_t)); 2869 2870 if (trace->filter_pids.entries == NULL) 2871 goto out; 2872 2873 trace->filter_pids.entries[0] = getpid(); 2874 2875 for (i = 1; i < trace->filter_pids.nr; ++i) 2876 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; 2877 2878 intlist__delete(list); 2879 ret = 0; 2880 out: 2881 return ret; 2882 } 2883 2884 static int trace__open_output(struct trace *trace, const char *filename) 2885 { 2886 struct stat st; 2887 2888 if (!stat(filename, &st) && st.st_size) { 2889 char oldname[PATH_MAX]; 2890 2891 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 2892 unlink(oldname); 2893 rename(filename, oldname); 2894 } 2895 2896 trace->output = fopen(filename, "w"); 2897 2898 return trace->output == NULL ? -errno : 0; 2899 } 2900 2901 static int parse_pagefaults(const struct option *opt, const char *str, 2902 int unset __maybe_unused) 2903 { 2904 int *trace_pgfaults = opt->value; 2905 2906 if (strcmp(str, "all") == 0) 2907 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN; 2908 else if (strcmp(str, "maj") == 0) 2909 *trace_pgfaults |= TRACE_PFMAJ; 2910 else if (strcmp(str, "min") == 0) 2911 *trace_pgfaults |= TRACE_PFMIN; 2912 else 2913 return -1; 2914 2915 return 0; 2916 } 2917 2918 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) 2919 { 2920 struct perf_evsel *evsel; 2921 2922 evlist__for_each_entry(evlist, evsel) 2923 evsel->handler = handler; 2924 } 2925 2926 /* 2927 * XXX: Hackish, just splitting the combined -e+--event (syscalls 2928 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use 2929 * existing facilities unchanged (trace->ev_qualifier + parse_options()). 2930 * 2931 * It'd be better to introduce a parse_options() variant that would return a 2932 * list with the terms it didn't match to an event... 2933 */ 2934 static int trace__parse_events_option(const struct option *opt, const char *str, 2935 int unset __maybe_unused) 2936 { 2937 struct trace *trace = (struct trace *)opt->value; 2938 const char *s = str; 2939 char *sep = NULL, *lists[2] = { NULL, NULL, }; 2940 int len = strlen(str) + 1, err = -1, list, idx; 2941 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); 2942 char group_name[PATH_MAX]; 2943 2944 if (strace_groups_dir == NULL) 2945 return -1; 2946 2947 if (*s == '!') { 2948 ++s; 2949 trace->not_ev_qualifier = true; 2950 } 2951 2952 while (1) { 2953 if ((sep = strchr(s, ',')) != NULL) 2954 *sep = '\0'; 2955 2956 list = 0; 2957 if (syscalltbl__id(trace->sctbl, s) >= 0 || 2958 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) { 2959 list = 1; 2960 } else { 2961 path__join(group_name, sizeof(group_name), strace_groups_dir, s); 2962 if (access(group_name, R_OK) == 0) 2963 list = 1; 2964 } 2965 2966 if (lists[list]) { 2967 sprintf(lists[list] + strlen(lists[list]), ",%s", s); 2968 } else { 2969 lists[list] = malloc(len); 2970 if (lists[list] == NULL) 2971 goto out; 2972 strcpy(lists[list], s); 2973 } 2974 2975 if (!sep) 2976 break; 2977 2978 *sep = ','; 2979 s = sep + 1; 2980 } 2981 2982 if (lists[1] != NULL) { 2983 struct strlist_config slist_config = { 2984 .dirname = strace_groups_dir, 2985 }; 2986 2987 trace->ev_qualifier = strlist__new(lists[1], &slist_config); 2988 if (trace->ev_qualifier == NULL) { 2989 fputs("Not enough memory to parse event qualifier", trace->output); 2990 goto out; 2991 } 2992 2993 if (trace__validate_ev_qualifier(trace)) 2994 goto out; 2995 trace->trace_syscalls = true; 2996 } 2997 2998 err = 0; 2999 3000 if (lists[0]) { 3001 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", 3002 "event selector. use 'perf list' to list available events", 3003 parse_events_option); 3004 err = parse_events_option(&o, lists[0], 0); 3005 } 3006 out: 3007 if (sep) 3008 *sep = ','; 3009 3010 return err; 3011 } 3012 3013 static int trace__parse_cgroups(const struct option *opt, const char *str, int unset) 3014 { 3015 struct trace *trace = opt->value; 3016 3017 if (!list_empty(&trace->evlist->entries)) 3018 return parse_cgroups(opt, str, unset); 3019 3020 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str); 3021 3022 return 0; 3023 } 3024 3025 int cmd_trace(int argc, const char **argv) 3026 { 3027 const char *trace_usage[] = { 3028 "perf trace [<options>] [<command>]", 3029 "perf trace [<options>] -- <command> [<options>]", 3030 "perf trace record [<options>] [<command>]", 3031 "perf trace record [<options>] -- <command> [<options>]", 3032 NULL 3033 }; 3034 struct trace trace = { 3035 .syscalls = { 3036 . max = -1, 3037 }, 3038 .opts = { 3039 .target = { 3040 .uid = UINT_MAX, 3041 .uses_mmap = true, 3042 }, 3043 .user_freq = UINT_MAX, 3044 .user_interval = ULLONG_MAX, 3045 .no_buffering = true, 3046 .mmap_pages = UINT_MAX, 3047 .proc_map_timeout = 500, 3048 }, 3049 .output = stderr, 3050 .show_comm = true, 3051 .trace_syscalls = false, 3052 .kernel_syscallchains = false, 3053 .max_stack = UINT_MAX, 3054 }; 3055 const char *output_name = NULL; 3056 const struct option trace_options[] = { 3057 OPT_CALLBACK('e', "event", &trace, "event", 3058 "event/syscall selector. use 'perf list' to list available events", 3059 trace__parse_events_option), 3060 OPT_BOOLEAN(0, "comm", &trace.show_comm, 3061 "show the thread COMM next to its id"), 3062 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), 3063 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace", 3064 trace__parse_events_option), 3065 OPT_STRING('o', "output", &output_name, "file", "output file name"), 3066 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 3067 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 3068 "trace events on existing process id"), 3069 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 3070 "trace events on existing thread id"), 3071 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids", 3072 "pids to filter (by the kernel)", trace__set_filter_pids), 3073 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 3074 "system-wide collection from all CPUs"), 3075 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 3076 "list of cpus to monitor"), 3077 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 3078 "child tasks do not inherit counters"), 3079 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", 3080 "number of mmap data pages", 3081 perf_evlist__parse_mmap_pages), 3082 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 3083 "user to profile"), 3084 OPT_CALLBACK(0, "duration", &trace, "float", 3085 "show only events with duration > N.M ms", 3086 trace__set_duration), 3087 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 3088 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 3089 OPT_BOOLEAN('T', "time", &trace.full_time, 3090 "Show full timestamp, not time relative to first start"), 3091 OPT_BOOLEAN(0, "failure", &trace.failure_only, 3092 "Show only syscalls that failed"), 3093 OPT_BOOLEAN('s', "summary", &trace.summary_only, 3094 "Show only syscall summary with statistics"), 3095 OPT_BOOLEAN('S', "with-summary", &trace.summary, 3096 "Show all syscalls and summary with statistics"), 3097 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", 3098 "Trace pagefaults", parse_pagefaults, "maj"), 3099 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), 3100 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), 3101 OPT_CALLBACK(0, "call-graph", &trace.opts, 3102 "record_mode[,record_size]", record_callchain_help, 3103 &record_parse_callchain_opt), 3104 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, 3105 "Show the kernel callchains on the syscall exit path"), 3106 OPT_UINTEGER(0, "min-stack", &trace.min_stack, 3107 "Set the minimum stack depth when parsing the callchain, " 3108 "anything below the specified depth will be ignored."), 3109 OPT_UINTEGER(0, "max-stack", &trace.max_stack, 3110 "Set the maximum stack depth when parsing the callchain, " 3111 "anything beyond the specified depth will be ignored. " 3112 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), 3113 OPT_BOOLEAN(0, "print-sample", &trace.print_sample, 3114 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"), 3115 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, 3116 "per thread proc mmap processing timeout in ms"), 3117 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only", 3118 trace__parse_cgroups), 3119 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, 3120 "ms to wait before starting measurement after program " 3121 "start"), 3122 OPT_END() 3123 }; 3124 bool __maybe_unused max_stack_user_set = true; 3125 bool mmap_pages_user_set = true; 3126 const char * const trace_subcommands[] = { "record", NULL }; 3127 int err; 3128 char bf[BUFSIZ]; 3129 3130 signal(SIGSEGV, sighandler_dump_stack); 3131 signal(SIGFPE, sighandler_dump_stack); 3132 3133 trace.evlist = perf_evlist__new(); 3134 trace.sctbl = syscalltbl__new(); 3135 3136 if (trace.evlist == NULL || trace.sctbl == NULL) { 3137 pr_err("Not enough memory to run!\n"); 3138 err = -ENOMEM; 3139 goto out; 3140 } 3141 3142 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, 3143 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); 3144 3145 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) { 3146 usage_with_options_msg(trace_usage, trace_options, 3147 "cgroup monitoring only available in system-wide mode"); 3148 } 3149 3150 err = bpf__setup_stdout(trace.evlist); 3151 if (err) { 3152 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf)); 3153 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf); 3154 goto out; 3155 } 3156 3157 err = -1; 3158 3159 if (trace.trace_pgfaults) { 3160 trace.opts.sample_address = true; 3161 trace.opts.sample_time = true; 3162 } 3163 3164 if (trace.opts.mmap_pages == UINT_MAX) 3165 mmap_pages_user_set = false; 3166 3167 if (trace.max_stack == UINT_MAX) { 3168 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack(); 3169 max_stack_user_set = false; 3170 } 3171 3172 #ifdef HAVE_DWARF_UNWIND_SUPPORT 3173 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) { 3174 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false); 3175 } 3176 #endif 3177 3178 if (callchain_param.enabled) { 3179 if (!mmap_pages_user_set && geteuid() == 0) 3180 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4; 3181 3182 symbol_conf.use_callchain = true; 3183 } 3184 3185 if (trace.evlist->nr_entries > 0) 3186 evlist__set_evsel_handler(trace.evlist, trace__event_handler); 3187 3188 if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) 3189 return trace__record(&trace, argc-1, &argv[1]); 3190 3191 /* summary_only implies summary option, but don't overwrite summary if set */ 3192 if (trace.summary_only) 3193 trace.summary = trace.summary_only; 3194 3195 if (!trace.trace_syscalls && !trace.trace_pgfaults && 3196 trace.evlist->nr_entries == 0 /* Was --events used? */) { 3197 trace.trace_syscalls = true; 3198 } 3199 3200 if (output_name != NULL) { 3201 err = trace__open_output(&trace, output_name); 3202 if (err < 0) { 3203 perror("failed to create output file"); 3204 goto out; 3205 } 3206 } 3207 3208 trace.open_id = syscalltbl__id(trace.sctbl, "open"); 3209 3210 err = target__validate(&trace.opts.target); 3211 if (err) { 3212 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 3213 fprintf(trace.output, "%s", bf); 3214 goto out_close; 3215 } 3216 3217 err = target__parse_uid(&trace.opts.target); 3218 if (err) { 3219 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 3220 fprintf(trace.output, "%s", bf); 3221 goto out_close; 3222 } 3223 3224 if (!argc && target__none(&trace.opts.target)) 3225 trace.opts.target.system_wide = true; 3226 3227 if (input_name) 3228 err = trace__replay(&trace); 3229 else 3230 err = trace__run(&trace, argc, argv); 3231 3232 out_close: 3233 if (output_name != NULL) 3234 fclose(trace.output); 3235 out: 3236 return err; 3237 } 3238