xref: /openbmc/linux/tools/perf/builtin-trace.c (revision 293d5b43)
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18 
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
39 #include "rb_resort.h"
40 
41 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
42 #include <stdlib.h>
43 #include <linux/err.h>
44 #include <linux/filter.h>
45 #include <linux/audit.h>
46 #include <linux/random.h>
47 #include <linux/stringify.h>
48 #include <linux/time64.h>
49 
50 #ifndef O_CLOEXEC
51 # define O_CLOEXEC		02000000
52 #endif
53 
54 struct trace {
55 	struct perf_tool	tool;
56 	struct syscalltbl	*sctbl;
57 	struct {
58 		int		max;
59 		struct syscall  *table;
60 		struct {
61 			struct perf_evsel *sys_enter,
62 					  *sys_exit;
63 		}		events;
64 	} syscalls;
65 	struct record_opts	opts;
66 	struct perf_evlist	*evlist;
67 	struct machine		*host;
68 	struct thread		*current;
69 	u64			base_time;
70 	FILE			*output;
71 	unsigned long		nr_events;
72 	struct strlist		*ev_qualifier;
73 	struct {
74 		size_t		nr;
75 		int		*entries;
76 	}			ev_qualifier_ids;
77 	struct intlist		*tid_list;
78 	struct intlist		*pid_list;
79 	struct {
80 		size_t		nr;
81 		pid_t		*entries;
82 	}			filter_pids;
83 	double			duration_filter;
84 	double			runtime_ms;
85 	struct {
86 		u64		vfs_getname,
87 				proc_getname;
88 	} stats;
89 	unsigned int		max_stack;
90 	unsigned int		min_stack;
91 	bool			not_ev_qualifier;
92 	bool			live;
93 	bool			full_time;
94 	bool			sched;
95 	bool			multiple_threads;
96 	bool			summary;
97 	bool			summary_only;
98 	bool			show_comm;
99 	bool			show_tool_stats;
100 	bool			trace_syscalls;
101 	bool			kernel_syscallchains;
102 	bool			force;
103 	bool			vfs_getname;
104 	int			trace_pgfaults;
105 	int			open_id;
106 };
107 
108 struct tp_field {
109 	int offset;
110 	union {
111 		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
112 		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
113 	};
114 };
115 
116 #define TP_UINT_FIELD(bits) \
117 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
118 { \
119 	u##bits value; \
120 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
121 	return value;  \
122 }
123 
124 TP_UINT_FIELD(8);
125 TP_UINT_FIELD(16);
126 TP_UINT_FIELD(32);
127 TP_UINT_FIELD(64);
128 
129 #define TP_UINT_FIELD__SWAPPED(bits) \
130 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
131 { \
132 	u##bits value; \
133 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
134 	return bswap_##bits(value);\
135 }
136 
137 TP_UINT_FIELD__SWAPPED(16);
138 TP_UINT_FIELD__SWAPPED(32);
139 TP_UINT_FIELD__SWAPPED(64);
140 
141 static int tp_field__init_uint(struct tp_field *field,
142 			       struct format_field *format_field,
143 			       bool needs_swap)
144 {
145 	field->offset = format_field->offset;
146 
147 	switch (format_field->size) {
148 	case 1:
149 		field->integer = tp_field__u8;
150 		break;
151 	case 2:
152 		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
153 		break;
154 	case 4:
155 		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
156 		break;
157 	case 8:
158 		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
159 		break;
160 	default:
161 		return -1;
162 	}
163 
164 	return 0;
165 }
166 
167 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
168 {
169 	return sample->raw_data + field->offset;
170 }
171 
172 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
173 {
174 	field->offset = format_field->offset;
175 	field->pointer = tp_field__ptr;
176 	return 0;
177 }
178 
179 struct syscall_tp {
180 	struct tp_field id;
181 	union {
182 		struct tp_field args, ret;
183 	};
184 };
185 
186 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
187 					  struct tp_field *field,
188 					  const char *name)
189 {
190 	struct format_field *format_field = perf_evsel__field(evsel, name);
191 
192 	if (format_field == NULL)
193 		return -1;
194 
195 	return tp_field__init_uint(field, format_field, evsel->needs_swap);
196 }
197 
198 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
199 	({ struct syscall_tp *sc = evsel->priv;\
200 	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
201 
202 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
203 					 struct tp_field *field,
204 					 const char *name)
205 {
206 	struct format_field *format_field = perf_evsel__field(evsel, name);
207 
208 	if (format_field == NULL)
209 		return -1;
210 
211 	return tp_field__init_ptr(field, format_field);
212 }
213 
214 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
215 	({ struct syscall_tp *sc = evsel->priv;\
216 	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
217 
218 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
219 {
220 	zfree(&evsel->priv);
221 	perf_evsel__delete(evsel);
222 }
223 
224 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
225 {
226 	evsel->priv = malloc(sizeof(struct syscall_tp));
227 	if (evsel->priv != NULL) {
228 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
229 			goto out_delete;
230 
231 		evsel->handler = handler;
232 		return 0;
233 	}
234 
235 	return -ENOMEM;
236 
237 out_delete:
238 	zfree(&evsel->priv);
239 	return -ENOENT;
240 }
241 
242 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
243 {
244 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
245 
246 	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
247 	if (IS_ERR(evsel))
248 		evsel = perf_evsel__newtp("syscalls", direction);
249 
250 	if (IS_ERR(evsel))
251 		return NULL;
252 
253 	if (perf_evsel__init_syscall_tp(evsel, handler))
254 		goto out_delete;
255 
256 	return evsel;
257 
258 out_delete:
259 	perf_evsel__delete_priv(evsel);
260 	return NULL;
261 }
262 
263 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
264 	({ struct syscall_tp *fields = evsel->priv; \
265 	   fields->name.integer(&fields->name, sample); })
266 
267 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
268 	({ struct syscall_tp *fields = evsel->priv; \
269 	   fields->name.pointer(&fields->name, sample); })
270 
271 struct syscall_arg {
272 	unsigned long val;
273 	struct thread *thread;
274 	struct trace  *trace;
275 	void	      *parm;
276 	u8	      idx;
277 	u8	      mask;
278 };
279 
280 struct strarray {
281 	int	    offset;
282 	int	    nr_entries;
283 	const char **entries;
284 };
285 
286 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
287 	.nr_entries = ARRAY_SIZE(array), \
288 	.entries = array, \
289 }
290 
291 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
292 	.offset	    = off, \
293 	.nr_entries = ARRAY_SIZE(array), \
294 	.entries = array, \
295 }
296 
297 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298 						const char *intfmt,
299 					        struct syscall_arg *arg)
300 {
301 	struct strarray *sa = arg->parm;
302 	int idx = arg->val - sa->offset;
303 
304 	if (idx < 0 || idx >= sa->nr_entries)
305 		return scnprintf(bf, size, intfmt, arg->val);
306 
307 	return scnprintf(bf, size, "%s", sa->entries[idx]);
308 }
309 
310 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
311 					      struct syscall_arg *arg)
312 {
313 	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
314 }
315 
316 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
317 
318 #if defined(__i386__) || defined(__x86_64__)
319 /*
320  * FIXME: Make this available to all arches as soon as the ioctl beautifier
321  * 	  gets rewritten to support all arches.
322  */
323 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
324 						 struct syscall_arg *arg)
325 {
326 	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
327 }
328 
329 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
330 #endif /* defined(__i386__) || defined(__x86_64__) */
331 
332 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
333 					struct syscall_arg *arg);
334 
335 #define SCA_FD syscall_arg__scnprintf_fd
336 
337 #ifndef AT_FDCWD
338 #define AT_FDCWD	-100
339 #endif
340 
341 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
342 					   struct syscall_arg *arg)
343 {
344 	int fd = arg->val;
345 
346 	if (fd == AT_FDCWD)
347 		return scnprintf(bf, size, "CWD");
348 
349 	return syscall_arg__scnprintf_fd(bf, size, arg);
350 }
351 
352 #define SCA_FDAT syscall_arg__scnprintf_fd_at
353 
354 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
355 					      struct syscall_arg *arg);
356 
357 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
358 
359 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
360 					 struct syscall_arg *arg)
361 {
362 	return scnprintf(bf, size, "%#lx", arg->val);
363 }
364 
365 #define SCA_HEX syscall_arg__scnprintf_hex
366 
367 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
368 					 struct syscall_arg *arg)
369 {
370 	return scnprintf(bf, size, "%d", arg->val);
371 }
372 
373 #define SCA_INT syscall_arg__scnprintf_int
374 
375 static const char *bpf_cmd[] = {
376 	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
377 	"MAP_GET_NEXT_KEY", "PROG_LOAD",
378 };
379 static DEFINE_STRARRAY(bpf_cmd);
380 
381 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
382 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
383 
384 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
385 static DEFINE_STRARRAY(itimers);
386 
387 static const char *keyctl_options[] = {
388 	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
389 	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
390 	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
391 	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
392 	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
393 };
394 static DEFINE_STRARRAY(keyctl_options);
395 
396 static const char *whences[] = { "SET", "CUR", "END",
397 #ifdef SEEK_DATA
398 "DATA",
399 #endif
400 #ifdef SEEK_HOLE
401 "HOLE",
402 #endif
403 };
404 static DEFINE_STRARRAY(whences);
405 
406 static const char *fcntl_cmds[] = {
407 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
408 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
409 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
410 	"F_GETOWNER_UIDS",
411 };
412 static DEFINE_STRARRAY(fcntl_cmds);
413 
414 static const char *rlimit_resources[] = {
415 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
416 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
417 	"RTTIME",
418 };
419 static DEFINE_STRARRAY(rlimit_resources);
420 
421 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
422 static DEFINE_STRARRAY(sighow);
423 
424 static const char *clockid[] = {
425 	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
426 	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
427 	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
428 };
429 static DEFINE_STRARRAY(clockid);
430 
431 static const char *socket_families[] = {
432 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
433 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
434 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
435 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
436 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
437 	"ALG", "NFC", "VSOCK",
438 };
439 static DEFINE_STRARRAY(socket_families);
440 
441 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
442 						 struct syscall_arg *arg)
443 {
444 	size_t printed = 0;
445 	int mode = arg->val;
446 
447 	if (mode == F_OK) /* 0 */
448 		return scnprintf(bf, size, "F");
449 #define	P_MODE(n) \
450 	if (mode & n##_OK) { \
451 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
452 		mode &= ~n##_OK; \
453 	}
454 
455 	P_MODE(R);
456 	P_MODE(W);
457 	P_MODE(X);
458 #undef P_MODE
459 
460 	if (mode)
461 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
462 
463 	return printed;
464 }
465 
466 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
467 
468 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
469 					      struct syscall_arg *arg);
470 
471 #define SCA_FILENAME syscall_arg__scnprintf_filename
472 
473 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
474 						struct syscall_arg *arg)
475 {
476 	int printed = 0, flags = arg->val;
477 
478 #define	P_FLAG(n) \
479 	if (flags & O_##n) { \
480 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
481 		flags &= ~O_##n; \
482 	}
483 
484 	P_FLAG(CLOEXEC);
485 	P_FLAG(NONBLOCK);
486 #undef P_FLAG
487 
488 	if (flags)
489 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
490 
491 	return printed;
492 }
493 
494 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
495 
496 #if defined(__i386__) || defined(__x86_64__)
497 /*
498  * FIXME: Make this available to all arches.
499  */
500 #define TCGETS		0x5401
501 
502 static const char *tioctls[] = {
503 	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
504 	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
505 	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
506 	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
507 	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
508 	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
509 	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
510 	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
511 	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
512 	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
513 	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
514 	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
515 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
516 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
517 	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
518 };
519 
520 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
521 #endif /* defined(__i386__) || defined(__x86_64__) */
522 
523 #ifndef GRND_NONBLOCK
524 #define GRND_NONBLOCK	0x0001
525 #endif
526 #ifndef GRND_RANDOM
527 #define GRND_RANDOM	0x0002
528 #endif
529 
530 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
531 						   struct syscall_arg *arg)
532 {
533 	int printed = 0, flags = arg->val;
534 
535 #define	P_FLAG(n) \
536 	if (flags & GRND_##n) { \
537 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
538 		flags &= ~GRND_##n; \
539 	}
540 
541 	P_FLAG(RANDOM);
542 	P_FLAG(NONBLOCK);
543 #undef P_FLAG
544 
545 	if (flags)
546 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
547 
548 	return printed;
549 }
550 
551 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
552 
553 #define STRARRAY(arg, name, array) \
554 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
555 	  .arg_parm	 = { [arg] = &strarray__##array, }
556 
557 #include "trace/beauty/eventfd.c"
558 #include "trace/beauty/flock.c"
559 #include "trace/beauty/futex_op.c"
560 #include "trace/beauty/mmap.c"
561 #include "trace/beauty/mode_t.c"
562 #include "trace/beauty/msg_flags.c"
563 #include "trace/beauty/open_flags.c"
564 #include "trace/beauty/perf_event_open.c"
565 #include "trace/beauty/pid.c"
566 #include "trace/beauty/sched_policy.c"
567 #include "trace/beauty/seccomp.c"
568 #include "trace/beauty/signum.c"
569 #include "trace/beauty/socket_type.c"
570 #include "trace/beauty/waitid_options.c"
571 
572 static struct syscall_fmt {
573 	const char *name;
574 	const char *alias;
575 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
576 	void	   *arg_parm[6];
577 	bool	   errmsg;
578 	bool	   errpid;
579 	bool	   timeout;
580 	bool	   hexret;
581 } syscall_fmts[] = {
582 	{ .name	    = "access",	    .errmsg = true,
583 	  .arg_scnprintf = { [1] = SCA_ACCMODE,  /* mode */ }, },
584 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
585 	{ .name	    = "bpf",	    .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
586 	{ .name	    = "brk",	    .hexret = true,
587 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
588 	{ .name	    = "chdir",	    .errmsg = true, },
589 	{ .name	    = "chmod",	    .errmsg = true, },
590 	{ .name	    = "chroot",	    .errmsg = true, },
591 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
592 	{ .name	    = "clone",	    .errpid = true, },
593 	{ .name	    = "close",	    .errmsg = true,
594 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
595 	{ .name	    = "connect",    .errmsg = true, },
596 	{ .name	    = "creat",	    .errmsg = true, },
597 	{ .name	    = "dup",	    .errmsg = true, },
598 	{ .name	    = "dup2",	    .errmsg = true, },
599 	{ .name	    = "dup3",	    .errmsg = true, },
600 	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
601 	{ .name	    = "eventfd2",   .errmsg = true,
602 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
603 	{ .name	    = "faccessat",  .errmsg = true, },
604 	{ .name	    = "fadvise64",  .errmsg = true, },
605 	{ .name	    = "fallocate",  .errmsg = true, },
606 	{ .name	    = "fchdir",	    .errmsg = true, },
607 	{ .name	    = "fchmod",	    .errmsg = true, },
608 	{ .name	    = "fchmodat",   .errmsg = true,
609 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
610 	{ .name	    = "fchown",	    .errmsg = true, },
611 	{ .name	    = "fchownat",   .errmsg = true,
612 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
613 	{ .name	    = "fcntl",	    .errmsg = true,
614 	  .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
615 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
616 	{ .name	    = "fdatasync",  .errmsg = true, },
617 	{ .name	    = "flock",	    .errmsg = true,
618 	  .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
619 	{ .name	    = "fsetxattr",  .errmsg = true, },
620 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
621 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
622 	{ .name	    = "fstatfs",    .errmsg = true, },
623 	{ .name	    = "fsync",    .errmsg = true, },
624 	{ .name	    = "ftruncate", .errmsg = true, },
625 	{ .name	    = "futex",	    .errmsg = true,
626 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
627 	{ .name	    = "futimesat", .errmsg = true,
628 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
629 	{ .name	    = "getdents",   .errmsg = true, },
630 	{ .name	    = "getdents64", .errmsg = true, },
631 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
632 	{ .name	    = "getpid",	    .errpid = true, },
633 	{ .name	    = "getpgid",    .errpid = true, },
634 	{ .name	    = "getppid",    .errpid = true, },
635 	{ .name	    = "getrandom",  .errmsg = true,
636 	  .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
637 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
638 	{ .name	    = "getxattr",   .errmsg = true, },
639 	{ .name	    = "inotify_add_watch",	    .errmsg = true, },
640 	{ .name	    = "ioctl",	    .errmsg = true,
641 	  .arg_scnprintf = {
642 #if defined(__i386__) || defined(__x86_64__)
643 /*
644  * FIXME: Make this available to all arches.
645  */
646 			     [1] = SCA_STRHEXARRAY, /* cmd */
647 			     [2] = SCA_HEX, /* arg */ },
648 	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
649 #else
650 			     [2] = SCA_HEX, /* arg */ }, },
651 #endif
652 	{ .name	    = "keyctl",	    .errmsg = true, STRARRAY(0, option, keyctl_options), },
653 	{ .name	    = "kill",	    .errmsg = true,
654 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
655 	{ .name	    = "lchown",    .errmsg = true, },
656 	{ .name	    = "lgetxattr",  .errmsg = true, },
657 	{ .name	    = "linkat",	    .errmsg = true,
658 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
659 	{ .name	    = "listxattr",  .errmsg = true, },
660 	{ .name	    = "llistxattr", .errmsg = true, },
661 	{ .name	    = "lremovexattr",  .errmsg = true, },
662 	{ .name	    = "lseek",	    .errmsg = true,
663 	  .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
664 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
665 	{ .name	    = "lsetxattr",  .errmsg = true, },
666 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
667 	{ .name	    = "lsxattr",    .errmsg = true, },
668 	{ .name     = "madvise",    .errmsg = true,
669 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
670 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
671 	{ .name	    = "mkdir",    .errmsg = true, },
672 	{ .name	    = "mkdirat",    .errmsg = true,
673 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
674 	{ .name	    = "mknod",      .errmsg = true, },
675 	{ .name	    = "mknodat",    .errmsg = true,
676 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
677 	{ .name	    = "mlock",	    .errmsg = true,
678 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
679 	{ .name	    = "mlockall",   .errmsg = true,
680 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
681 	{ .name	    = "mmap",	    .hexret = true,
682 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
683 			     [2] = SCA_MMAP_PROT, /* prot */
684 			     [3] = SCA_MMAP_FLAGS, /* flags */ }, },
685 	{ .name	    = "mprotect",   .errmsg = true,
686 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
687 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
688 	{ .name	    = "mq_unlink", .errmsg = true,
689 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
690 	{ .name	    = "mremap",	    .hexret = true,
691 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
692 			     [3] = SCA_MREMAP_FLAGS, /* flags */
693 			     [4] = SCA_HEX, /* new_addr */ }, },
694 	{ .name	    = "munlock",    .errmsg = true,
695 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
696 	{ .name	    = "munmap",	    .errmsg = true,
697 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
698 	{ .name	    = "name_to_handle_at", .errmsg = true,
699 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
700 	{ .name	    = "newfstatat", .errmsg = true,
701 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
702 	{ .name	    = "open",	    .errmsg = true,
703 	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
704 	{ .name	    = "open_by_handle_at", .errmsg = true,
705 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
706 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
707 	{ .name	    = "openat",	    .errmsg = true,
708 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
709 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
710 	{ .name	    = "perf_event_open", .errmsg = true,
711 	  .arg_scnprintf = { [2] = SCA_INT, /* cpu */
712 			     [3] = SCA_FD,  /* group_fd */
713 			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
714 	{ .name	    = "pipe2",	    .errmsg = true,
715 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
716 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
717 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
718 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64", },
719 	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread", },
720 	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
721 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64", },
722 	{ .name	    = "pwritev",    .errmsg = true, },
723 	{ .name	    = "read",	    .errmsg = true, },
724 	{ .name	    = "readlink",   .errmsg = true, },
725 	{ .name	    = "readlinkat", .errmsg = true,
726 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
727 	{ .name	    = "readv",	    .errmsg = true, },
728 	{ .name	    = "recvfrom",   .errmsg = true,
729 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
730 	{ .name	    = "recvmmsg",   .errmsg = true,
731 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
732 	{ .name	    = "recvmsg",    .errmsg = true,
733 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
734 	{ .name	    = "removexattr", .errmsg = true, },
735 	{ .name	    = "renameat",   .errmsg = true,
736 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
737 	{ .name	    = "rmdir",    .errmsg = true, },
738 	{ .name	    = "rt_sigaction", .errmsg = true,
739 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
740 	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
741 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
742 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
743 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
744 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
745 	{ .name	    = "sched_setscheduler",   .errmsg = true,
746 	  .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
747 	{ .name	    = "seccomp", .errmsg = true,
748 	  .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
749 			     [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
750 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
751 	{ .name	    = "sendmmsg",    .errmsg = true,
752 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
753 	{ .name	    = "sendmsg",    .errmsg = true,
754 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
755 	{ .name	    = "sendto",	    .errmsg = true,
756 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
757 	{ .name	    = "set_tid_address", .errpid = true, },
758 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
759 	{ .name	    = "setpgid",    .errmsg = true, },
760 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
761 	{ .name	    = "setxattr",   .errmsg = true, },
762 	{ .name	    = "shutdown",   .errmsg = true, },
763 	{ .name	    = "socket",	    .errmsg = true,
764 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
765 			     [1] = SCA_SK_TYPE, /* type */ },
766 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
767 	{ .name	    = "socketpair", .errmsg = true,
768 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
769 			     [1] = SCA_SK_TYPE, /* type */ },
770 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
771 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
772 	{ .name	    = "statfs",	    .errmsg = true, },
773 	{ .name	    = "swapoff",    .errmsg = true,
774 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
775 	{ .name	    = "swapon",	    .errmsg = true,
776 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
777 	{ .name	    = "symlinkat",  .errmsg = true,
778 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
779 	{ .name	    = "tgkill",	    .errmsg = true,
780 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
781 	{ .name	    = "tkill",	    .errmsg = true,
782 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
783 	{ .name	    = "truncate",   .errmsg = true, },
784 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
785 	{ .name	    = "unlinkat",   .errmsg = true,
786 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
787 	{ .name	    = "utime",  .errmsg = true, },
788 	{ .name	    = "utimensat",  .errmsg = true,
789 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
790 	{ .name	    = "utimes",  .errmsg = true, },
791 	{ .name	    = "vmsplice",  .errmsg = true, },
792 	{ .name	    = "wait4",	    .errpid = true,
793 	  .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
794 	{ .name	    = "waitid",	    .errpid = true,
795 	  .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
796 	{ .name	    = "write",	    .errmsg = true, },
797 	{ .name	    = "writev",	    .errmsg = true, },
798 };
799 
800 static int syscall_fmt__cmp(const void *name, const void *fmtp)
801 {
802 	const struct syscall_fmt *fmt = fmtp;
803 	return strcmp(name, fmt->name);
804 }
805 
806 static struct syscall_fmt *syscall_fmt__find(const char *name)
807 {
808 	const int nmemb = ARRAY_SIZE(syscall_fmts);
809 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
810 }
811 
812 struct syscall {
813 	struct event_format *tp_format;
814 	int		    nr_args;
815 	struct format_field *args;
816 	const char	    *name;
817 	bool		    is_exit;
818 	struct syscall_fmt  *fmt;
819 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
820 	void		    **arg_parm;
821 };
822 
823 static size_t fprintf_duration(unsigned long t, FILE *fp)
824 {
825 	double duration = (double)t / NSEC_PER_MSEC;
826 	size_t printed = fprintf(fp, "(");
827 
828 	if (duration >= 1.0)
829 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
830 	else if (duration >= 0.01)
831 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
832 	else
833 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
834 	return printed + fprintf(fp, "): ");
835 }
836 
837 /**
838  * filename.ptr: The filename char pointer that will be vfs_getname'd
839  * filename.entry_str_pos: Where to insert the string translated from
840  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
841  */
842 struct thread_trace {
843 	u64		  entry_time;
844 	u64		  exit_time;
845 	bool		  entry_pending;
846 	unsigned long	  nr_events;
847 	unsigned long	  pfmaj, pfmin;
848 	char		  *entry_str;
849 	double		  runtime_ms;
850         struct {
851 		unsigned long ptr;
852 		short int     entry_str_pos;
853 		bool	      pending_open;
854 		unsigned int  namelen;
855 		char	      *name;
856 	} filename;
857 	struct {
858 		int	  max;
859 		char	  **table;
860 	} paths;
861 
862 	struct intlist *syscall_stats;
863 };
864 
865 static struct thread_trace *thread_trace__new(void)
866 {
867 	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
868 
869 	if (ttrace)
870 		ttrace->paths.max = -1;
871 
872 	ttrace->syscall_stats = intlist__new(NULL);
873 
874 	return ttrace;
875 }
876 
877 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
878 {
879 	struct thread_trace *ttrace;
880 
881 	if (thread == NULL)
882 		goto fail;
883 
884 	if (thread__priv(thread) == NULL)
885 		thread__set_priv(thread, thread_trace__new());
886 
887 	if (thread__priv(thread) == NULL)
888 		goto fail;
889 
890 	ttrace = thread__priv(thread);
891 	++ttrace->nr_events;
892 
893 	return ttrace;
894 fail:
895 	color_fprintf(fp, PERF_COLOR_RED,
896 		      "WARNING: not enough memory, dropping samples!\n");
897 	return NULL;
898 }
899 
900 #define TRACE_PFMAJ		(1 << 0)
901 #define TRACE_PFMIN		(1 << 1)
902 
903 static const size_t trace__entry_str_size = 2048;
904 
905 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
906 {
907 	struct thread_trace *ttrace = thread__priv(thread);
908 
909 	if (fd > ttrace->paths.max) {
910 		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
911 
912 		if (npath == NULL)
913 			return -1;
914 
915 		if (ttrace->paths.max != -1) {
916 			memset(npath + ttrace->paths.max + 1, 0,
917 			       (fd - ttrace->paths.max) * sizeof(char *));
918 		} else {
919 			memset(npath, 0, (fd + 1) * sizeof(char *));
920 		}
921 
922 		ttrace->paths.table = npath;
923 		ttrace->paths.max   = fd;
924 	}
925 
926 	ttrace->paths.table[fd] = strdup(pathname);
927 
928 	return ttrace->paths.table[fd] != NULL ? 0 : -1;
929 }
930 
931 static int thread__read_fd_path(struct thread *thread, int fd)
932 {
933 	char linkname[PATH_MAX], pathname[PATH_MAX];
934 	struct stat st;
935 	int ret;
936 
937 	if (thread->pid_ == thread->tid) {
938 		scnprintf(linkname, sizeof(linkname),
939 			  "/proc/%d/fd/%d", thread->pid_, fd);
940 	} else {
941 		scnprintf(linkname, sizeof(linkname),
942 			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
943 	}
944 
945 	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
946 		return -1;
947 
948 	ret = readlink(linkname, pathname, sizeof(pathname));
949 
950 	if (ret < 0 || ret > st.st_size)
951 		return -1;
952 
953 	pathname[ret] = '\0';
954 	return trace__set_fd_pathname(thread, fd, pathname);
955 }
956 
957 static const char *thread__fd_path(struct thread *thread, int fd,
958 				   struct trace *trace)
959 {
960 	struct thread_trace *ttrace = thread__priv(thread);
961 
962 	if (ttrace == NULL)
963 		return NULL;
964 
965 	if (fd < 0)
966 		return NULL;
967 
968 	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
969 		if (!trace->live)
970 			return NULL;
971 		++trace->stats.proc_getname;
972 		if (thread__read_fd_path(thread, fd))
973 			return NULL;
974 	}
975 
976 	return ttrace->paths.table[fd];
977 }
978 
979 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
980 					struct syscall_arg *arg)
981 {
982 	int fd = arg->val;
983 	size_t printed = scnprintf(bf, size, "%d", fd);
984 	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
985 
986 	if (path)
987 		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
988 
989 	return printed;
990 }
991 
992 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
993 					      struct syscall_arg *arg)
994 {
995 	int fd = arg->val;
996 	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
997 	struct thread_trace *ttrace = thread__priv(arg->thread);
998 
999 	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1000 		zfree(&ttrace->paths.table[fd]);
1001 
1002 	return printed;
1003 }
1004 
1005 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1006 				     unsigned long ptr)
1007 {
1008 	struct thread_trace *ttrace = thread__priv(thread);
1009 
1010 	ttrace->filename.ptr = ptr;
1011 	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1012 }
1013 
1014 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1015 					      struct syscall_arg *arg)
1016 {
1017 	unsigned long ptr = arg->val;
1018 
1019 	if (!arg->trace->vfs_getname)
1020 		return scnprintf(bf, size, "%#x", ptr);
1021 
1022 	thread__set_filename_pos(arg->thread, bf, ptr);
1023 	return 0;
1024 }
1025 
1026 static bool trace__filter_duration(struct trace *trace, double t)
1027 {
1028 	return t < (trace->duration_filter * NSEC_PER_MSEC);
1029 }
1030 
1031 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1032 {
1033 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1034 
1035 	return fprintf(fp, "%10.3f ", ts);
1036 }
1037 
1038 static bool done = false;
1039 static bool interrupted = false;
1040 
1041 static void sig_handler(int sig)
1042 {
1043 	done = true;
1044 	interrupted = sig == SIGINT;
1045 }
1046 
1047 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1048 					u64 duration, u64 tstamp, FILE *fp)
1049 {
1050 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1051 	printed += fprintf_duration(duration, fp);
1052 
1053 	if (trace->multiple_threads) {
1054 		if (trace->show_comm)
1055 			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1056 		printed += fprintf(fp, "%d ", thread->tid);
1057 	}
1058 
1059 	return printed;
1060 }
1061 
1062 static int trace__process_event(struct trace *trace, struct machine *machine,
1063 				union perf_event *event, struct perf_sample *sample)
1064 {
1065 	int ret = 0;
1066 
1067 	switch (event->header.type) {
1068 	case PERF_RECORD_LOST:
1069 		color_fprintf(trace->output, PERF_COLOR_RED,
1070 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1071 		ret = machine__process_lost_event(machine, event, sample);
1072 		break;
1073 	default:
1074 		ret = machine__process_event(machine, event, sample);
1075 		break;
1076 	}
1077 
1078 	return ret;
1079 }
1080 
1081 static int trace__tool_process(struct perf_tool *tool,
1082 			       union perf_event *event,
1083 			       struct perf_sample *sample,
1084 			       struct machine *machine)
1085 {
1086 	struct trace *trace = container_of(tool, struct trace, tool);
1087 	return trace__process_event(trace, machine, event, sample);
1088 }
1089 
1090 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1091 {
1092 	struct machine *machine = vmachine;
1093 
1094 	if (machine->kptr_restrict_warned)
1095 		return NULL;
1096 
1097 	if (symbol_conf.kptr_restrict) {
1098 		pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1099 			   "Check /proc/sys/kernel/kptr_restrict.\n\n"
1100 			   "Kernel samples will not be resolved.\n");
1101 		machine->kptr_restrict_warned = true;
1102 		return NULL;
1103 	}
1104 
1105 	return machine__resolve_kernel_addr(vmachine, addrp, modp);
1106 }
1107 
1108 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1109 {
1110 	int err = symbol__init(NULL);
1111 
1112 	if (err)
1113 		return err;
1114 
1115 	trace->host = machine__new_host();
1116 	if (trace->host == NULL)
1117 		return -ENOMEM;
1118 
1119 	if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1120 		return -errno;
1121 
1122 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1123 					    evlist->threads, trace__tool_process, false,
1124 					    trace->opts.proc_map_timeout);
1125 	if (err)
1126 		symbol__exit();
1127 
1128 	return err;
1129 }
1130 
1131 static int syscall__set_arg_fmts(struct syscall *sc)
1132 {
1133 	struct format_field *field;
1134 	int idx = 0, len;
1135 
1136 	sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1137 	if (sc->arg_scnprintf == NULL)
1138 		return -1;
1139 
1140 	if (sc->fmt)
1141 		sc->arg_parm = sc->fmt->arg_parm;
1142 
1143 	for (field = sc->args; field; field = field->next) {
1144 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1145 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1146 		else if (strcmp(field->type, "const char *") == 0 &&
1147 			 (strcmp(field->name, "filename") == 0 ||
1148 			  strcmp(field->name, "path") == 0 ||
1149 			  strcmp(field->name, "pathname") == 0))
1150 			sc->arg_scnprintf[idx] = SCA_FILENAME;
1151 		else if (field->flags & FIELD_IS_POINTER)
1152 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1153 		else if (strcmp(field->type, "pid_t") == 0)
1154 			sc->arg_scnprintf[idx] = SCA_PID;
1155 		else if (strcmp(field->type, "umode_t") == 0)
1156 			sc->arg_scnprintf[idx] = SCA_MODE_T;
1157 		else if ((strcmp(field->type, "int") == 0 ||
1158 			  strcmp(field->type, "unsigned int") == 0 ||
1159 			  strcmp(field->type, "long") == 0) &&
1160 			 (len = strlen(field->name)) >= 2 &&
1161 			 strcmp(field->name + len - 2, "fd") == 0) {
1162 			/*
1163 			 * /sys/kernel/tracing/events/syscalls/sys_enter*
1164 			 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1165 			 * 65 int
1166 			 * 23 unsigned int
1167 			 * 7 unsigned long
1168 			 */
1169 			sc->arg_scnprintf[idx] = SCA_FD;
1170 		}
1171 		++idx;
1172 	}
1173 
1174 	return 0;
1175 }
1176 
1177 static int trace__read_syscall_info(struct trace *trace, int id)
1178 {
1179 	char tp_name[128];
1180 	struct syscall *sc;
1181 	const char *name = syscalltbl__name(trace->sctbl, id);
1182 
1183 	if (name == NULL)
1184 		return -1;
1185 
1186 	if (id > trace->syscalls.max) {
1187 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1188 
1189 		if (nsyscalls == NULL)
1190 			return -1;
1191 
1192 		if (trace->syscalls.max != -1) {
1193 			memset(nsyscalls + trace->syscalls.max + 1, 0,
1194 			       (id - trace->syscalls.max) * sizeof(*sc));
1195 		} else {
1196 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1197 		}
1198 
1199 		trace->syscalls.table = nsyscalls;
1200 		trace->syscalls.max   = id;
1201 	}
1202 
1203 	sc = trace->syscalls.table + id;
1204 	sc->name = name;
1205 
1206 	sc->fmt  = syscall_fmt__find(sc->name);
1207 
1208 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1209 	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1210 
1211 	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1212 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1213 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1214 	}
1215 
1216 	if (IS_ERR(sc->tp_format))
1217 		return -1;
1218 
1219 	sc->args = sc->tp_format->format.fields;
1220 	sc->nr_args = sc->tp_format->format.nr_fields;
1221 	/*
1222 	 * We need to check and discard the first variable '__syscall_nr'
1223 	 * or 'nr' that mean the syscall number. It is needless here.
1224 	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1225 	 */
1226 	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1227 		sc->args = sc->args->next;
1228 		--sc->nr_args;
1229 	}
1230 
1231 	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1232 
1233 	return syscall__set_arg_fmts(sc);
1234 }
1235 
1236 static int trace__validate_ev_qualifier(struct trace *trace)
1237 {
1238 	int err = 0, i;
1239 	struct str_node *pos;
1240 
1241 	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1242 	trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1243 						 sizeof(trace->ev_qualifier_ids.entries[0]));
1244 
1245 	if (trace->ev_qualifier_ids.entries == NULL) {
1246 		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1247 		       trace->output);
1248 		err = -EINVAL;
1249 		goto out;
1250 	}
1251 
1252 	i = 0;
1253 
1254 	strlist__for_each_entry(pos, trace->ev_qualifier) {
1255 		const char *sc = pos->s;
1256 		int id = syscalltbl__id(trace->sctbl, sc);
1257 
1258 		if (id < 0) {
1259 			if (err == 0) {
1260 				fputs("Error:\tInvalid syscall ", trace->output);
1261 				err = -EINVAL;
1262 			} else {
1263 				fputs(", ", trace->output);
1264 			}
1265 
1266 			fputs(sc, trace->output);
1267 		}
1268 
1269 		trace->ev_qualifier_ids.entries[i++] = id;
1270 	}
1271 
1272 	if (err < 0) {
1273 		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1274 		      "\nHint:\tand: 'man syscalls'\n", trace->output);
1275 		zfree(&trace->ev_qualifier_ids.entries);
1276 		trace->ev_qualifier_ids.nr = 0;
1277 	}
1278 out:
1279 	return err;
1280 }
1281 
1282 /*
1283  * args is to be interpreted as a series of longs but we need to handle
1284  * 8-byte unaligned accesses. args points to raw_data within the event
1285  * and raw_data is guaranteed to be 8-byte unaligned because it is
1286  * preceded by raw_size which is a u32. So we need to copy args to a temp
1287  * variable to read it. Most notably this avoids extended load instructions
1288  * on unaligned addresses
1289  */
1290 
1291 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1292 				      unsigned char *args, struct trace *trace,
1293 				      struct thread *thread)
1294 {
1295 	size_t printed = 0;
1296 	unsigned char *p;
1297 	unsigned long val;
1298 
1299 	if (sc->args != NULL) {
1300 		struct format_field *field;
1301 		u8 bit = 1;
1302 		struct syscall_arg arg = {
1303 			.idx	= 0,
1304 			.mask	= 0,
1305 			.trace  = trace,
1306 			.thread = thread,
1307 		};
1308 
1309 		for (field = sc->args; field;
1310 		     field = field->next, ++arg.idx, bit <<= 1) {
1311 			if (arg.mask & bit)
1312 				continue;
1313 
1314 			/* special care for unaligned accesses */
1315 			p = args + sizeof(unsigned long) * arg.idx;
1316 			memcpy(&val, p, sizeof(val));
1317 
1318 			/*
1319  			 * Suppress this argument if its value is zero and
1320  			 * and we don't have a string associated in an
1321  			 * strarray for it.
1322  			 */
1323 			if (val == 0 &&
1324 			    !(sc->arg_scnprintf &&
1325 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1326 			      sc->arg_parm[arg.idx]))
1327 				continue;
1328 
1329 			printed += scnprintf(bf + printed, size - printed,
1330 					     "%s%s: ", printed ? ", " : "", field->name);
1331 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1332 				arg.val = val;
1333 				if (sc->arg_parm)
1334 					arg.parm = sc->arg_parm[arg.idx];
1335 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1336 								      size - printed, &arg);
1337 			} else {
1338 				printed += scnprintf(bf + printed, size - printed,
1339 						     "%ld", val);
1340 			}
1341 		}
1342 	} else if (IS_ERR(sc->tp_format)) {
1343 		/*
1344 		 * If we managed to read the tracepoint /format file, then we
1345 		 * may end up not having any args, like with gettid(), so only
1346 		 * print the raw args when we didn't manage to read it.
1347 		 */
1348 		int i = 0;
1349 
1350 		while (i < 6) {
1351 			/* special care for unaligned accesses */
1352 			p = args + sizeof(unsigned long) * i;
1353 			memcpy(&val, p, sizeof(val));
1354 			printed += scnprintf(bf + printed, size - printed,
1355 					     "%sarg%d: %ld",
1356 					     printed ? ", " : "", i, val);
1357 			++i;
1358 		}
1359 	}
1360 
1361 	return printed;
1362 }
1363 
1364 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1365 				  union perf_event *event,
1366 				  struct perf_sample *sample);
1367 
1368 static struct syscall *trace__syscall_info(struct trace *trace,
1369 					   struct perf_evsel *evsel, int id)
1370 {
1371 
1372 	if (id < 0) {
1373 
1374 		/*
1375 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1376 		 * before that, leaving at a higher verbosity level till that is
1377 		 * explained. Reproduced with plain ftrace with:
1378 		 *
1379 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1380 		 * grep "NR -1 " /t/trace_pipe
1381 		 *
1382 		 * After generating some load on the machine.
1383  		 */
1384 		if (verbose > 1) {
1385 			static u64 n;
1386 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1387 				id, perf_evsel__name(evsel), ++n);
1388 		}
1389 		return NULL;
1390 	}
1391 
1392 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1393 	    trace__read_syscall_info(trace, id))
1394 		goto out_cant_read;
1395 
1396 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1397 		goto out_cant_read;
1398 
1399 	return &trace->syscalls.table[id];
1400 
1401 out_cant_read:
1402 	if (verbose) {
1403 		fprintf(trace->output, "Problems reading syscall %d", id);
1404 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1405 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1406 		fputs(" information\n", trace->output);
1407 	}
1408 	return NULL;
1409 }
1410 
1411 static void thread__update_stats(struct thread_trace *ttrace,
1412 				 int id, struct perf_sample *sample)
1413 {
1414 	struct int_node *inode;
1415 	struct stats *stats;
1416 	u64 duration = 0;
1417 
1418 	inode = intlist__findnew(ttrace->syscall_stats, id);
1419 	if (inode == NULL)
1420 		return;
1421 
1422 	stats = inode->priv;
1423 	if (stats == NULL) {
1424 		stats = malloc(sizeof(struct stats));
1425 		if (stats == NULL)
1426 			return;
1427 		init_stats(stats);
1428 		inode->priv = stats;
1429 	}
1430 
1431 	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1432 		duration = sample->time - ttrace->entry_time;
1433 
1434 	update_stats(stats, duration);
1435 }
1436 
1437 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1438 {
1439 	struct thread_trace *ttrace;
1440 	u64 duration;
1441 	size_t printed;
1442 
1443 	if (trace->current == NULL)
1444 		return 0;
1445 
1446 	ttrace = thread__priv(trace->current);
1447 
1448 	if (!ttrace->entry_pending)
1449 		return 0;
1450 
1451 	duration = sample->time - ttrace->entry_time;
1452 
1453 	printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1454 	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1455 	ttrace->entry_pending = false;
1456 
1457 	return printed;
1458 }
1459 
1460 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1461 			    union perf_event *event __maybe_unused,
1462 			    struct perf_sample *sample)
1463 {
1464 	char *msg;
1465 	void *args;
1466 	size_t printed = 0;
1467 	struct thread *thread;
1468 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1469 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1470 	struct thread_trace *ttrace;
1471 
1472 	if (sc == NULL)
1473 		return -1;
1474 
1475 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1476 	ttrace = thread__trace(thread, trace->output);
1477 	if (ttrace == NULL)
1478 		goto out_put;
1479 
1480 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1481 
1482 	if (ttrace->entry_str == NULL) {
1483 		ttrace->entry_str = malloc(trace__entry_str_size);
1484 		if (!ttrace->entry_str)
1485 			goto out_put;
1486 	}
1487 
1488 	if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1489 		trace__printf_interrupted_entry(trace, sample);
1490 
1491 	ttrace->entry_time = sample->time;
1492 	msg = ttrace->entry_str;
1493 	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1494 
1495 	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1496 					   args, trace, thread);
1497 
1498 	if (sc->is_exit) {
1499 		if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1500 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1501 			fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1502 		}
1503 	} else {
1504 		ttrace->entry_pending = true;
1505 		/* See trace__vfs_getname & trace__sys_exit */
1506 		ttrace->filename.pending_open = false;
1507 	}
1508 
1509 	if (trace->current != thread) {
1510 		thread__put(trace->current);
1511 		trace->current = thread__get(thread);
1512 	}
1513 	err = 0;
1514 out_put:
1515 	thread__put(thread);
1516 	return err;
1517 }
1518 
1519 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1520 				    struct perf_sample *sample,
1521 				    struct callchain_cursor *cursor)
1522 {
1523 	struct addr_location al;
1524 
1525 	if (machine__resolve(trace->host, &al, sample) < 0 ||
1526 	    thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1527 		return -1;
1528 
1529 	return 0;
1530 }
1531 
1532 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1533 {
1534 	/* TODO: user-configurable print_opts */
1535 	const unsigned int print_opts = EVSEL__PRINT_SYM |
1536 				        EVSEL__PRINT_DSO |
1537 				        EVSEL__PRINT_UNKNOWN_AS_ADDR;
1538 
1539 	return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1540 }
1541 
1542 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1543 			   union perf_event *event __maybe_unused,
1544 			   struct perf_sample *sample)
1545 {
1546 	long ret;
1547 	u64 duration = 0;
1548 	struct thread *thread;
1549 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1550 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1551 	struct thread_trace *ttrace;
1552 
1553 	if (sc == NULL)
1554 		return -1;
1555 
1556 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1557 	ttrace = thread__trace(thread, trace->output);
1558 	if (ttrace == NULL)
1559 		goto out_put;
1560 
1561 	if (trace->summary)
1562 		thread__update_stats(ttrace, id, sample);
1563 
1564 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1565 
1566 	if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1567 		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1568 		ttrace->filename.pending_open = false;
1569 		++trace->stats.vfs_getname;
1570 	}
1571 
1572 	ttrace->exit_time = sample->time;
1573 
1574 	if (ttrace->entry_time) {
1575 		duration = sample->time - ttrace->entry_time;
1576 		if (trace__filter_duration(trace, duration))
1577 			goto out;
1578 	} else if (trace->duration_filter)
1579 		goto out;
1580 
1581 	if (sample->callchain) {
1582 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1583 		if (callchain_ret == 0) {
1584 			if (callchain_cursor.nr < trace->min_stack)
1585 				goto out;
1586 			callchain_ret = 1;
1587 		}
1588 	}
1589 
1590 	if (trace->summary_only)
1591 		goto out;
1592 
1593 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1594 
1595 	if (ttrace->entry_pending) {
1596 		fprintf(trace->output, "%-70s", ttrace->entry_str);
1597 	} else {
1598 		fprintf(trace->output, " ... [");
1599 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1600 		fprintf(trace->output, "]: %s()", sc->name);
1601 	}
1602 
1603 	if (sc->fmt == NULL) {
1604 signed_print:
1605 		fprintf(trace->output, ") = %ld", ret);
1606 	} else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1607 		char bf[STRERR_BUFSIZE];
1608 		const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1609 			   *e = audit_errno_to_name(-ret);
1610 
1611 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
1612 	} else if (ret == 0 && sc->fmt->timeout)
1613 		fprintf(trace->output, ") = 0 Timeout");
1614 	else if (sc->fmt->hexret)
1615 		fprintf(trace->output, ") = %#lx", ret);
1616 	else if (sc->fmt->errpid) {
1617 		struct thread *child = machine__find_thread(trace->host, ret, ret);
1618 
1619 		if (child != NULL) {
1620 			fprintf(trace->output, ") = %ld", ret);
1621 			if (child->comm_set)
1622 				fprintf(trace->output, " (%s)", thread__comm_str(child));
1623 			thread__put(child);
1624 		}
1625 	} else
1626 		goto signed_print;
1627 
1628 	fputc('\n', trace->output);
1629 
1630 	if (callchain_ret > 0)
1631 		trace__fprintf_callchain(trace, sample);
1632 	else if (callchain_ret < 0)
1633 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1634 out:
1635 	ttrace->entry_pending = false;
1636 	err = 0;
1637 out_put:
1638 	thread__put(thread);
1639 	return err;
1640 }
1641 
1642 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1643 			      union perf_event *event __maybe_unused,
1644 			      struct perf_sample *sample)
1645 {
1646 	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1647 	struct thread_trace *ttrace;
1648 	size_t filename_len, entry_str_len, to_move;
1649 	ssize_t remaining_space;
1650 	char *pos;
1651 	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1652 
1653 	if (!thread)
1654 		goto out;
1655 
1656 	ttrace = thread__priv(thread);
1657 	if (!ttrace)
1658 		goto out;
1659 
1660 	filename_len = strlen(filename);
1661 
1662 	if (ttrace->filename.namelen < filename_len) {
1663 		char *f = realloc(ttrace->filename.name, filename_len + 1);
1664 
1665 		if (f == NULL)
1666 				goto out;
1667 
1668 		ttrace->filename.namelen = filename_len;
1669 		ttrace->filename.name = f;
1670 	}
1671 
1672 	strcpy(ttrace->filename.name, filename);
1673 	ttrace->filename.pending_open = true;
1674 
1675 	if (!ttrace->filename.ptr)
1676 		goto out;
1677 
1678 	entry_str_len = strlen(ttrace->entry_str);
1679 	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1680 	if (remaining_space <= 0)
1681 		goto out;
1682 
1683 	if (filename_len > (size_t)remaining_space) {
1684 		filename += filename_len - remaining_space;
1685 		filename_len = remaining_space;
1686 	}
1687 
1688 	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1689 	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1690 	memmove(pos + filename_len, pos, to_move);
1691 	memcpy(pos, filename, filename_len);
1692 
1693 	ttrace->filename.ptr = 0;
1694 	ttrace->filename.entry_str_pos = 0;
1695 out:
1696 	return 0;
1697 }
1698 
1699 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1700 				     union perf_event *event __maybe_unused,
1701 				     struct perf_sample *sample)
1702 {
1703         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1704 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1705 	struct thread *thread = machine__findnew_thread(trace->host,
1706 							sample->pid,
1707 							sample->tid);
1708 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
1709 
1710 	if (ttrace == NULL)
1711 		goto out_dump;
1712 
1713 	ttrace->runtime_ms += runtime_ms;
1714 	trace->runtime_ms += runtime_ms;
1715 	thread__put(thread);
1716 	return 0;
1717 
1718 out_dump:
1719 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1720 	       evsel->name,
1721 	       perf_evsel__strval(evsel, sample, "comm"),
1722 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1723 	       runtime,
1724 	       perf_evsel__intval(evsel, sample, "vruntime"));
1725 	thread__put(thread);
1726 	return 0;
1727 }
1728 
1729 static void bpf_output__printer(enum binary_printer_ops op,
1730 				unsigned int val, void *extra)
1731 {
1732 	FILE *output = extra;
1733 	unsigned char ch = (unsigned char)val;
1734 
1735 	switch (op) {
1736 	case BINARY_PRINT_CHAR_DATA:
1737 		fprintf(output, "%c", isprint(ch) ? ch : '.');
1738 		break;
1739 	case BINARY_PRINT_DATA_BEGIN:
1740 	case BINARY_PRINT_LINE_BEGIN:
1741 	case BINARY_PRINT_ADDR:
1742 	case BINARY_PRINT_NUM_DATA:
1743 	case BINARY_PRINT_NUM_PAD:
1744 	case BINARY_PRINT_SEP:
1745 	case BINARY_PRINT_CHAR_PAD:
1746 	case BINARY_PRINT_LINE_END:
1747 	case BINARY_PRINT_DATA_END:
1748 	default:
1749 		break;
1750 	}
1751 }
1752 
1753 static void bpf_output__fprintf(struct trace *trace,
1754 				struct perf_sample *sample)
1755 {
1756 	print_binary(sample->raw_data, sample->raw_size, 8,
1757 		     bpf_output__printer, trace->output);
1758 }
1759 
1760 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1761 				union perf_event *event __maybe_unused,
1762 				struct perf_sample *sample)
1763 {
1764 	int callchain_ret = 0;
1765 
1766 	if (sample->callchain) {
1767 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1768 		if (callchain_ret == 0) {
1769 			if (callchain_cursor.nr < trace->min_stack)
1770 				goto out;
1771 			callchain_ret = 1;
1772 		}
1773 	}
1774 
1775 	trace__printf_interrupted_entry(trace, sample);
1776 	trace__fprintf_tstamp(trace, sample->time, trace->output);
1777 
1778 	if (trace->trace_syscalls)
1779 		fprintf(trace->output, "(         ): ");
1780 
1781 	fprintf(trace->output, "%s:", evsel->name);
1782 
1783 	if (perf_evsel__is_bpf_output(evsel)) {
1784 		bpf_output__fprintf(trace, sample);
1785 	} else if (evsel->tp_format) {
1786 		event_format__fprintf(evsel->tp_format, sample->cpu,
1787 				      sample->raw_data, sample->raw_size,
1788 				      trace->output);
1789 	}
1790 
1791 	fprintf(trace->output, ")\n");
1792 
1793 	if (callchain_ret > 0)
1794 		trace__fprintf_callchain(trace, sample);
1795 	else if (callchain_ret < 0)
1796 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1797 out:
1798 	return 0;
1799 }
1800 
1801 static void print_location(FILE *f, struct perf_sample *sample,
1802 			   struct addr_location *al,
1803 			   bool print_dso, bool print_sym)
1804 {
1805 
1806 	if ((verbose || print_dso) && al->map)
1807 		fprintf(f, "%s@", al->map->dso->long_name);
1808 
1809 	if ((verbose || print_sym) && al->sym)
1810 		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1811 			al->addr - al->sym->start);
1812 	else if (al->map)
1813 		fprintf(f, "0x%" PRIx64, al->addr);
1814 	else
1815 		fprintf(f, "0x%" PRIx64, sample->addr);
1816 }
1817 
1818 static int trace__pgfault(struct trace *trace,
1819 			  struct perf_evsel *evsel,
1820 			  union perf_event *event __maybe_unused,
1821 			  struct perf_sample *sample)
1822 {
1823 	struct thread *thread;
1824 	struct addr_location al;
1825 	char map_type = 'd';
1826 	struct thread_trace *ttrace;
1827 	int err = -1;
1828 	int callchain_ret = 0;
1829 
1830 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1831 
1832 	if (sample->callchain) {
1833 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1834 		if (callchain_ret == 0) {
1835 			if (callchain_cursor.nr < trace->min_stack)
1836 				goto out_put;
1837 			callchain_ret = 1;
1838 		}
1839 	}
1840 
1841 	ttrace = thread__trace(thread, trace->output);
1842 	if (ttrace == NULL)
1843 		goto out_put;
1844 
1845 	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1846 		ttrace->pfmaj++;
1847 	else
1848 		ttrace->pfmin++;
1849 
1850 	if (trace->summary_only)
1851 		goto out;
1852 
1853 	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1854 			      sample->ip, &al);
1855 
1856 	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1857 
1858 	fprintf(trace->output, "%sfault [",
1859 		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1860 		"maj" : "min");
1861 
1862 	print_location(trace->output, sample, &al, false, true);
1863 
1864 	fprintf(trace->output, "] => ");
1865 
1866 	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1867 				   sample->addr, &al);
1868 
1869 	if (!al.map) {
1870 		thread__find_addr_location(thread, sample->cpumode,
1871 					   MAP__FUNCTION, sample->addr, &al);
1872 
1873 		if (al.map)
1874 			map_type = 'x';
1875 		else
1876 			map_type = '?';
1877 	}
1878 
1879 	print_location(trace->output, sample, &al, true, false);
1880 
1881 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1882 
1883 	if (callchain_ret > 0)
1884 		trace__fprintf_callchain(trace, sample);
1885 	else if (callchain_ret < 0)
1886 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1887 out:
1888 	err = 0;
1889 out_put:
1890 	thread__put(thread);
1891 	return err;
1892 }
1893 
1894 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1895 {
1896 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1897 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1898 		return false;
1899 
1900 	if (trace->pid_list || trace->tid_list)
1901 		return true;
1902 
1903 	return false;
1904 }
1905 
1906 static void trace__set_base_time(struct trace *trace,
1907 				 struct perf_evsel *evsel,
1908 				 struct perf_sample *sample)
1909 {
1910 	/*
1911 	 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1912 	 * and don't use sample->time unconditionally, we may end up having
1913 	 * some other event in the future without PERF_SAMPLE_TIME for good
1914 	 * reason, i.e. we may not be interested in its timestamps, just in
1915 	 * it taking place, picking some piece of information when it
1916 	 * appears in our event stream (vfs_getname comes to mind).
1917 	 */
1918 	if (trace->base_time == 0 && !trace->full_time &&
1919 	    (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1920 		trace->base_time = sample->time;
1921 }
1922 
1923 static int trace__process_sample(struct perf_tool *tool,
1924 				 union perf_event *event,
1925 				 struct perf_sample *sample,
1926 				 struct perf_evsel *evsel,
1927 				 struct machine *machine __maybe_unused)
1928 {
1929 	struct trace *trace = container_of(tool, struct trace, tool);
1930 	int err = 0;
1931 
1932 	tracepoint_handler handler = evsel->handler;
1933 
1934 	if (skip_sample(trace, sample))
1935 		return 0;
1936 
1937 	trace__set_base_time(trace, evsel, sample);
1938 
1939 	if (handler) {
1940 		++trace->nr_events;
1941 		handler(trace, evsel, event, sample);
1942 	}
1943 
1944 	return err;
1945 }
1946 
1947 static int parse_target_str(struct trace *trace)
1948 {
1949 	if (trace->opts.target.pid) {
1950 		trace->pid_list = intlist__new(trace->opts.target.pid);
1951 		if (trace->pid_list == NULL) {
1952 			pr_err("Error parsing process id string\n");
1953 			return -EINVAL;
1954 		}
1955 	}
1956 
1957 	if (trace->opts.target.tid) {
1958 		trace->tid_list = intlist__new(trace->opts.target.tid);
1959 		if (trace->tid_list == NULL) {
1960 			pr_err("Error parsing thread id string\n");
1961 			return -EINVAL;
1962 		}
1963 	}
1964 
1965 	return 0;
1966 }
1967 
1968 static int trace__record(struct trace *trace, int argc, const char **argv)
1969 {
1970 	unsigned int rec_argc, i, j;
1971 	const char **rec_argv;
1972 	const char * const record_args[] = {
1973 		"record",
1974 		"-R",
1975 		"-m", "1024",
1976 		"-c", "1",
1977 	};
1978 
1979 	const char * const sc_args[] = { "-e", };
1980 	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1981 	const char * const majpf_args[] = { "-e", "major-faults" };
1982 	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1983 	const char * const minpf_args[] = { "-e", "minor-faults" };
1984 	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1985 
1986 	/* +1 is for the event string below */
1987 	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1988 		majpf_args_nr + minpf_args_nr + argc;
1989 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1990 
1991 	if (rec_argv == NULL)
1992 		return -ENOMEM;
1993 
1994 	j = 0;
1995 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1996 		rec_argv[j++] = record_args[i];
1997 
1998 	if (trace->trace_syscalls) {
1999 		for (i = 0; i < sc_args_nr; i++)
2000 			rec_argv[j++] = sc_args[i];
2001 
2002 		/* event string may be different for older kernels - e.g., RHEL6 */
2003 		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2004 			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2005 		else if (is_valid_tracepoint("syscalls:sys_enter"))
2006 			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2007 		else {
2008 			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2009 			return -1;
2010 		}
2011 	}
2012 
2013 	if (trace->trace_pgfaults & TRACE_PFMAJ)
2014 		for (i = 0; i < majpf_args_nr; i++)
2015 			rec_argv[j++] = majpf_args[i];
2016 
2017 	if (trace->trace_pgfaults & TRACE_PFMIN)
2018 		for (i = 0; i < minpf_args_nr; i++)
2019 			rec_argv[j++] = minpf_args[i];
2020 
2021 	for (i = 0; i < (unsigned int)argc; i++)
2022 		rec_argv[j++] = argv[i];
2023 
2024 	return cmd_record(j, rec_argv, NULL);
2025 }
2026 
2027 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2028 
2029 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2030 {
2031 	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2032 
2033 	if (IS_ERR(evsel))
2034 		return false;
2035 
2036 	if (perf_evsel__field(evsel, "pathname") == NULL) {
2037 		perf_evsel__delete(evsel);
2038 		return false;
2039 	}
2040 
2041 	evsel->handler = trace__vfs_getname;
2042 	perf_evlist__add(evlist, evsel);
2043 	return true;
2044 }
2045 
2046 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2047 {
2048 	struct perf_evsel *evsel;
2049 	struct perf_event_attr attr = {
2050 		.type = PERF_TYPE_SOFTWARE,
2051 		.mmap_data = 1,
2052 	};
2053 
2054 	attr.config = config;
2055 	attr.sample_period = 1;
2056 
2057 	event_attr_init(&attr);
2058 
2059 	evsel = perf_evsel__new(&attr);
2060 	if (evsel)
2061 		evsel->handler = trace__pgfault;
2062 
2063 	return evsel;
2064 }
2065 
2066 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2067 {
2068 	const u32 type = event->header.type;
2069 	struct perf_evsel *evsel;
2070 
2071 	if (type != PERF_RECORD_SAMPLE) {
2072 		trace__process_event(trace, trace->host, event, sample);
2073 		return;
2074 	}
2075 
2076 	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2077 	if (evsel == NULL) {
2078 		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2079 		return;
2080 	}
2081 
2082 	trace__set_base_time(trace, evsel, sample);
2083 
2084 	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2085 	    sample->raw_data == NULL) {
2086 		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2087 		       perf_evsel__name(evsel), sample->tid,
2088 		       sample->cpu, sample->raw_size);
2089 	} else {
2090 		tracepoint_handler handler = evsel->handler;
2091 		handler(trace, evsel, event, sample);
2092 	}
2093 }
2094 
2095 static int trace__add_syscall_newtp(struct trace *trace)
2096 {
2097 	int ret = -1;
2098 	struct perf_evlist *evlist = trace->evlist;
2099 	struct perf_evsel *sys_enter, *sys_exit;
2100 
2101 	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2102 	if (sys_enter == NULL)
2103 		goto out;
2104 
2105 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2106 		goto out_delete_sys_enter;
2107 
2108 	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2109 	if (sys_exit == NULL)
2110 		goto out_delete_sys_enter;
2111 
2112 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2113 		goto out_delete_sys_exit;
2114 
2115 	perf_evlist__add(evlist, sys_enter);
2116 	perf_evlist__add(evlist, sys_exit);
2117 
2118 	if (callchain_param.enabled && !trace->kernel_syscallchains) {
2119 		/*
2120 		 * We're interested only in the user space callchain
2121 		 * leading to the syscall, allow overriding that for
2122 		 * debugging reasons using --kernel_syscall_callchains
2123 		 */
2124 		sys_exit->attr.exclude_callchain_kernel = 1;
2125 	}
2126 
2127 	trace->syscalls.events.sys_enter = sys_enter;
2128 	trace->syscalls.events.sys_exit  = sys_exit;
2129 
2130 	ret = 0;
2131 out:
2132 	return ret;
2133 
2134 out_delete_sys_exit:
2135 	perf_evsel__delete_priv(sys_exit);
2136 out_delete_sys_enter:
2137 	perf_evsel__delete_priv(sys_enter);
2138 	goto out;
2139 }
2140 
2141 static int trace__set_ev_qualifier_filter(struct trace *trace)
2142 {
2143 	int err = -1;
2144 	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2145 						trace->ev_qualifier_ids.nr,
2146 						trace->ev_qualifier_ids.entries);
2147 
2148 	if (filter == NULL)
2149 		goto out_enomem;
2150 
2151 	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2152 		err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2153 
2154 	free(filter);
2155 out:
2156 	return err;
2157 out_enomem:
2158 	errno = ENOMEM;
2159 	goto out;
2160 }
2161 
2162 static int trace__run(struct trace *trace, int argc, const char **argv)
2163 {
2164 	struct perf_evlist *evlist = trace->evlist;
2165 	struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2166 	int err = -1, i;
2167 	unsigned long before;
2168 	const bool forks = argc > 0;
2169 	bool draining = false;
2170 
2171 	trace->live = true;
2172 
2173 	if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2174 		goto out_error_raw_syscalls;
2175 
2176 	if (trace->trace_syscalls)
2177 		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2178 
2179 	if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2180 		pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2181 		if (pgfault_maj == NULL)
2182 			goto out_error_mem;
2183 		perf_evlist__add(evlist, pgfault_maj);
2184 	}
2185 
2186 	if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2187 		pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2188 		if (pgfault_min == NULL)
2189 			goto out_error_mem;
2190 		perf_evlist__add(evlist, pgfault_min);
2191 	}
2192 
2193 	if (trace->sched &&
2194 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2195 				   trace__sched_stat_runtime))
2196 		goto out_error_sched_stat_runtime;
2197 
2198 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2199 	if (err < 0) {
2200 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2201 		goto out_delete_evlist;
2202 	}
2203 
2204 	err = trace__symbols_init(trace, evlist);
2205 	if (err < 0) {
2206 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2207 		goto out_delete_evlist;
2208 	}
2209 
2210 	perf_evlist__config(evlist, &trace->opts, NULL);
2211 
2212 	if (callchain_param.enabled) {
2213 		bool use_identifier = false;
2214 
2215 		if (trace->syscalls.events.sys_exit) {
2216 			perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2217 						     &trace->opts, &callchain_param);
2218 			use_identifier = true;
2219 		}
2220 
2221 		if (pgfault_maj) {
2222 			perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2223 			use_identifier = true;
2224 		}
2225 
2226 		if (pgfault_min) {
2227 			perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2228 			use_identifier = true;
2229 		}
2230 
2231 		if (use_identifier) {
2232 		       /*
2233 			* Now we have evsels with different sample_ids, use
2234 			* PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2235 			* from a fixed position in each ring buffer record.
2236 			*
2237 			* As of this the changeset introducing this comment, this
2238 			* isn't strictly needed, as the fields that can come before
2239 			* PERF_SAMPLE_ID are all used, but we'll probably disable
2240 			* some of those for things like copying the payload of
2241 			* pointer syscall arguments, and for vfs_getname we don't
2242 			* need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2243 			* here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2244 			*/
2245 			perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2246 			perf_evlist__reset_sample_bit(evlist, ID);
2247 		}
2248 	}
2249 
2250 	signal(SIGCHLD, sig_handler);
2251 	signal(SIGINT, sig_handler);
2252 
2253 	if (forks) {
2254 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2255 						    argv, false, NULL);
2256 		if (err < 0) {
2257 			fprintf(trace->output, "Couldn't run the workload!\n");
2258 			goto out_delete_evlist;
2259 		}
2260 	}
2261 
2262 	err = perf_evlist__open(evlist);
2263 	if (err < 0)
2264 		goto out_error_open;
2265 
2266 	err = bpf__apply_obj_config();
2267 	if (err) {
2268 		char errbuf[BUFSIZ];
2269 
2270 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2271 		pr_err("ERROR: Apply config to BPF failed: %s\n",
2272 			 errbuf);
2273 		goto out_error_open;
2274 	}
2275 
2276 	/*
2277 	 * Better not use !target__has_task() here because we need to cover the
2278 	 * case where no threads were specified in the command line, but a
2279 	 * workload was, and in that case we will fill in the thread_map when
2280 	 * we fork the workload in perf_evlist__prepare_workload.
2281 	 */
2282 	if (trace->filter_pids.nr > 0)
2283 		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2284 	else if (thread_map__pid(evlist->threads, 0) == -1)
2285 		err = perf_evlist__set_filter_pid(evlist, getpid());
2286 
2287 	if (err < 0)
2288 		goto out_error_mem;
2289 
2290 	if (trace->ev_qualifier_ids.nr > 0) {
2291 		err = trace__set_ev_qualifier_filter(trace);
2292 		if (err < 0)
2293 			goto out_errno;
2294 
2295 		pr_debug("event qualifier tracepoint filter: %s\n",
2296 			 trace->syscalls.events.sys_exit->filter);
2297 	}
2298 
2299 	err = perf_evlist__apply_filters(evlist, &evsel);
2300 	if (err < 0)
2301 		goto out_error_apply_filters;
2302 
2303 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2304 	if (err < 0)
2305 		goto out_error_mmap;
2306 
2307 	if (!target__none(&trace->opts.target))
2308 		perf_evlist__enable(evlist);
2309 
2310 	if (forks)
2311 		perf_evlist__start_workload(evlist);
2312 
2313 	trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2314 				  evlist->threads->nr > 1 ||
2315 				  perf_evlist__first(evlist)->attr.inherit;
2316 again:
2317 	before = trace->nr_events;
2318 
2319 	for (i = 0; i < evlist->nr_mmaps; i++) {
2320 		union perf_event *event;
2321 
2322 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2323 			struct perf_sample sample;
2324 
2325 			++trace->nr_events;
2326 
2327 			err = perf_evlist__parse_sample(evlist, event, &sample);
2328 			if (err) {
2329 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2330 				goto next_event;
2331 			}
2332 
2333 			trace__handle_event(trace, event, &sample);
2334 next_event:
2335 			perf_evlist__mmap_consume(evlist, i);
2336 
2337 			if (interrupted)
2338 				goto out_disable;
2339 
2340 			if (done && !draining) {
2341 				perf_evlist__disable(evlist);
2342 				draining = true;
2343 			}
2344 		}
2345 	}
2346 
2347 	if (trace->nr_events == before) {
2348 		int timeout = done ? 100 : -1;
2349 
2350 		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2351 			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2352 				draining = true;
2353 
2354 			goto again;
2355 		}
2356 	} else {
2357 		goto again;
2358 	}
2359 
2360 out_disable:
2361 	thread__zput(trace->current);
2362 
2363 	perf_evlist__disable(evlist);
2364 
2365 	if (!err) {
2366 		if (trace->summary)
2367 			trace__fprintf_thread_summary(trace, trace->output);
2368 
2369 		if (trace->show_tool_stats) {
2370 			fprintf(trace->output, "Stats:\n "
2371 					       " vfs_getname : %" PRIu64 "\n"
2372 					       " proc_getname: %" PRIu64 "\n",
2373 				trace->stats.vfs_getname,
2374 				trace->stats.proc_getname);
2375 		}
2376 	}
2377 
2378 out_delete_evlist:
2379 	perf_evlist__delete(evlist);
2380 	trace->evlist = NULL;
2381 	trace->live = false;
2382 	return err;
2383 {
2384 	char errbuf[BUFSIZ];
2385 
2386 out_error_sched_stat_runtime:
2387 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2388 	goto out_error;
2389 
2390 out_error_raw_syscalls:
2391 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2392 	goto out_error;
2393 
2394 out_error_mmap:
2395 	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2396 	goto out_error;
2397 
2398 out_error_open:
2399 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2400 
2401 out_error:
2402 	fprintf(trace->output, "%s\n", errbuf);
2403 	goto out_delete_evlist;
2404 
2405 out_error_apply_filters:
2406 	fprintf(trace->output,
2407 		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
2408 		evsel->filter, perf_evsel__name(evsel), errno,
2409 		str_error_r(errno, errbuf, sizeof(errbuf)));
2410 	goto out_delete_evlist;
2411 }
2412 out_error_mem:
2413 	fprintf(trace->output, "Not enough memory to run!\n");
2414 	goto out_delete_evlist;
2415 
2416 out_errno:
2417 	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2418 	goto out_delete_evlist;
2419 }
2420 
2421 static int trace__replay(struct trace *trace)
2422 {
2423 	const struct perf_evsel_str_handler handlers[] = {
2424 		{ "probe:vfs_getname",	     trace__vfs_getname, },
2425 	};
2426 	struct perf_data_file file = {
2427 		.path  = input_name,
2428 		.mode  = PERF_DATA_MODE_READ,
2429 		.force = trace->force,
2430 	};
2431 	struct perf_session *session;
2432 	struct perf_evsel *evsel;
2433 	int err = -1;
2434 
2435 	trace->tool.sample	  = trace__process_sample;
2436 	trace->tool.mmap	  = perf_event__process_mmap;
2437 	trace->tool.mmap2	  = perf_event__process_mmap2;
2438 	trace->tool.comm	  = perf_event__process_comm;
2439 	trace->tool.exit	  = perf_event__process_exit;
2440 	trace->tool.fork	  = perf_event__process_fork;
2441 	trace->tool.attr	  = perf_event__process_attr;
2442 	trace->tool.tracing_data = perf_event__process_tracing_data;
2443 	trace->tool.build_id	  = perf_event__process_build_id;
2444 
2445 	trace->tool.ordered_events = true;
2446 	trace->tool.ordering_requires_timestamps = true;
2447 
2448 	/* add tid to output */
2449 	trace->multiple_threads = true;
2450 
2451 	session = perf_session__new(&file, false, &trace->tool);
2452 	if (session == NULL)
2453 		return -1;
2454 
2455 	if (symbol__init(&session->header.env) < 0)
2456 		goto out;
2457 
2458 	trace->host = &session->machines.host;
2459 
2460 	err = perf_session__set_tracepoints_handlers(session, handlers);
2461 	if (err)
2462 		goto out;
2463 
2464 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2465 						     "raw_syscalls:sys_enter");
2466 	/* older kernels have syscalls tp versus raw_syscalls */
2467 	if (evsel == NULL)
2468 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2469 							     "syscalls:sys_enter");
2470 
2471 	if (evsel &&
2472 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2473 	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2474 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2475 		goto out;
2476 	}
2477 
2478 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2479 						     "raw_syscalls:sys_exit");
2480 	if (evsel == NULL)
2481 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2482 							     "syscalls:sys_exit");
2483 	if (evsel &&
2484 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2485 	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2486 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2487 		goto out;
2488 	}
2489 
2490 	evlist__for_each_entry(session->evlist, evsel) {
2491 		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2492 		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2493 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2494 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2495 			evsel->handler = trace__pgfault;
2496 	}
2497 
2498 	err = parse_target_str(trace);
2499 	if (err != 0)
2500 		goto out;
2501 
2502 	setup_pager();
2503 
2504 	err = perf_session__process_events(session);
2505 	if (err)
2506 		pr_err("Failed to process events, error %d", err);
2507 
2508 	else if (trace->summary)
2509 		trace__fprintf_thread_summary(trace, trace->output);
2510 
2511 out:
2512 	perf_session__delete(session);
2513 
2514 	return err;
2515 }
2516 
2517 static size_t trace__fprintf_threads_header(FILE *fp)
2518 {
2519 	size_t printed;
2520 
2521 	printed  = fprintf(fp, "\n Summary of events:\n\n");
2522 
2523 	return printed;
2524 }
2525 
2526 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2527 	struct stats 	*stats;
2528 	double		msecs;
2529 	int		syscall;
2530 )
2531 {
2532 	struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2533 	struct stats *stats = source->priv;
2534 
2535 	entry->syscall = source->i;
2536 	entry->stats   = stats;
2537 	entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2538 }
2539 
2540 static size_t thread__dump_stats(struct thread_trace *ttrace,
2541 				 struct trace *trace, FILE *fp)
2542 {
2543 	size_t printed = 0;
2544 	struct syscall *sc;
2545 	struct rb_node *nd;
2546 	DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2547 
2548 	if (syscall_stats == NULL)
2549 		return 0;
2550 
2551 	printed += fprintf(fp, "\n");
2552 
2553 	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2554 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2555 	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2556 
2557 	resort_rb__for_each_entry(nd, syscall_stats) {
2558 		struct stats *stats = syscall_stats_entry->stats;
2559 		if (stats) {
2560 			double min = (double)(stats->min) / NSEC_PER_MSEC;
2561 			double max = (double)(stats->max) / NSEC_PER_MSEC;
2562 			double avg = avg_stats(stats);
2563 			double pct;
2564 			u64 n = (u64) stats->n;
2565 
2566 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2567 			avg /= NSEC_PER_MSEC;
2568 
2569 			sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2570 			printed += fprintf(fp, "   %-15s", sc->name);
2571 			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2572 					   n, syscall_stats_entry->msecs, min, avg);
2573 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2574 		}
2575 	}
2576 
2577 	resort_rb__delete(syscall_stats);
2578 	printed += fprintf(fp, "\n\n");
2579 
2580 	return printed;
2581 }
2582 
2583 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2584 {
2585 	size_t printed = 0;
2586 	struct thread_trace *ttrace = thread__priv(thread);
2587 	double ratio;
2588 
2589 	if (ttrace == NULL)
2590 		return 0;
2591 
2592 	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2593 
2594 	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2595 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2596 	printed += fprintf(fp, "%.1f%%", ratio);
2597 	if (ttrace->pfmaj)
2598 		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2599 	if (ttrace->pfmin)
2600 		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2601 	if (trace->sched)
2602 		printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2603 	else if (fputc('\n', fp) != EOF)
2604 		++printed;
2605 
2606 	printed += thread__dump_stats(ttrace, trace, fp);
2607 
2608 	return printed;
2609 }
2610 
2611 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2612 {
2613 	return ttrace ? ttrace->nr_events : 0;
2614 }
2615 
2616 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2617 	struct thread *thread;
2618 )
2619 {
2620 	entry->thread = rb_entry(nd, struct thread, rb_node);
2621 }
2622 
2623 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2624 {
2625 	DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2626 	size_t printed = trace__fprintf_threads_header(fp);
2627 	struct rb_node *nd;
2628 
2629 	if (threads == NULL) {
2630 		fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2631 		return 0;
2632 	}
2633 
2634 	resort_rb__for_each_entry(nd, threads)
2635 		printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2636 
2637 	resort_rb__delete(threads);
2638 
2639 	return printed;
2640 }
2641 
2642 static int trace__set_duration(const struct option *opt, const char *str,
2643 			       int unset __maybe_unused)
2644 {
2645 	struct trace *trace = opt->value;
2646 
2647 	trace->duration_filter = atof(str);
2648 	return 0;
2649 }
2650 
2651 static int trace__set_filter_pids(const struct option *opt, const char *str,
2652 				  int unset __maybe_unused)
2653 {
2654 	int ret = -1;
2655 	size_t i;
2656 	struct trace *trace = opt->value;
2657 	/*
2658 	 * FIXME: introduce a intarray class, plain parse csv and create a
2659 	 * { int nr, int entries[] } struct...
2660 	 */
2661 	struct intlist *list = intlist__new(str);
2662 
2663 	if (list == NULL)
2664 		return -1;
2665 
2666 	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2667 	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2668 
2669 	if (trace->filter_pids.entries == NULL)
2670 		goto out;
2671 
2672 	trace->filter_pids.entries[0] = getpid();
2673 
2674 	for (i = 1; i < trace->filter_pids.nr; ++i)
2675 		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2676 
2677 	intlist__delete(list);
2678 	ret = 0;
2679 out:
2680 	return ret;
2681 }
2682 
2683 static int trace__open_output(struct trace *trace, const char *filename)
2684 {
2685 	struct stat st;
2686 
2687 	if (!stat(filename, &st) && st.st_size) {
2688 		char oldname[PATH_MAX];
2689 
2690 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2691 		unlink(oldname);
2692 		rename(filename, oldname);
2693 	}
2694 
2695 	trace->output = fopen(filename, "w");
2696 
2697 	return trace->output == NULL ? -errno : 0;
2698 }
2699 
2700 static int parse_pagefaults(const struct option *opt, const char *str,
2701 			    int unset __maybe_unused)
2702 {
2703 	int *trace_pgfaults = opt->value;
2704 
2705 	if (strcmp(str, "all") == 0)
2706 		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2707 	else if (strcmp(str, "maj") == 0)
2708 		*trace_pgfaults |= TRACE_PFMAJ;
2709 	else if (strcmp(str, "min") == 0)
2710 		*trace_pgfaults |= TRACE_PFMIN;
2711 	else
2712 		return -1;
2713 
2714 	return 0;
2715 }
2716 
2717 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2718 {
2719 	struct perf_evsel *evsel;
2720 
2721 	evlist__for_each_entry(evlist, evsel)
2722 		evsel->handler = handler;
2723 }
2724 
2725 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2726 {
2727 	const char *trace_usage[] = {
2728 		"perf trace [<options>] [<command>]",
2729 		"perf trace [<options>] -- <command> [<options>]",
2730 		"perf trace record [<options>] [<command>]",
2731 		"perf trace record [<options>] -- <command> [<options>]",
2732 		NULL
2733 	};
2734 	struct trace trace = {
2735 		.syscalls = {
2736 			. max = -1,
2737 		},
2738 		.opts = {
2739 			.target = {
2740 				.uid	   = UINT_MAX,
2741 				.uses_mmap = true,
2742 			},
2743 			.user_freq     = UINT_MAX,
2744 			.user_interval = ULLONG_MAX,
2745 			.no_buffering  = true,
2746 			.mmap_pages    = UINT_MAX,
2747 			.proc_map_timeout  = 500,
2748 		},
2749 		.output = stderr,
2750 		.show_comm = true,
2751 		.trace_syscalls = true,
2752 		.kernel_syscallchains = false,
2753 		.max_stack = UINT_MAX,
2754 	};
2755 	const char *output_name = NULL;
2756 	const char *ev_qualifier_str = NULL;
2757 	const struct option trace_options[] = {
2758 	OPT_CALLBACK(0, "event", &trace.evlist, "event",
2759 		     "event selector. use 'perf list' to list available events",
2760 		     parse_events_option),
2761 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
2762 		    "show the thread COMM next to its id"),
2763 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2764 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2765 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
2766 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2767 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2768 		    "trace events on existing process id"),
2769 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2770 		    "trace events on existing thread id"),
2771 	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2772 		     "pids to filter (by the kernel)", trace__set_filter_pids),
2773 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2774 		    "system-wide collection from all CPUs"),
2775 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2776 		    "list of cpus to monitor"),
2777 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2778 		    "child tasks do not inherit counters"),
2779 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2780 		     "number of mmap data pages",
2781 		     perf_evlist__parse_mmap_pages),
2782 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2783 		   "user to profile"),
2784 	OPT_CALLBACK(0, "duration", &trace, "float",
2785 		     "show only events with duration > N.M ms",
2786 		     trace__set_duration),
2787 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2788 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2789 	OPT_BOOLEAN('T', "time", &trace.full_time,
2790 		    "Show full timestamp, not time relative to first start"),
2791 	OPT_BOOLEAN('s', "summary", &trace.summary_only,
2792 		    "Show only syscall summary with statistics"),
2793 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
2794 		    "Show all syscalls and summary with statistics"),
2795 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2796 		     "Trace pagefaults", parse_pagefaults, "maj"),
2797 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2798 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2799 	OPT_CALLBACK(0, "call-graph", &trace.opts,
2800 		     "record_mode[,record_size]", record_callchain_help,
2801 		     &record_parse_callchain_opt),
2802 	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2803 		    "Show the kernel callchains on the syscall exit path"),
2804 	OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2805 		     "Set the minimum stack depth when parsing the callchain, "
2806 		     "anything below the specified depth will be ignored."),
2807 	OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2808 		     "Set the maximum stack depth when parsing the callchain, "
2809 		     "anything beyond the specified depth will be ignored. "
2810 		     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2811 	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2812 			"per thread proc mmap processing timeout in ms"),
2813 	OPT_END()
2814 	};
2815 	bool __maybe_unused max_stack_user_set = true;
2816 	bool mmap_pages_user_set = true;
2817 	const char * const trace_subcommands[] = { "record", NULL };
2818 	int err;
2819 	char bf[BUFSIZ];
2820 
2821 	signal(SIGSEGV, sighandler_dump_stack);
2822 	signal(SIGFPE, sighandler_dump_stack);
2823 
2824 	trace.evlist = perf_evlist__new();
2825 	trace.sctbl = syscalltbl__new();
2826 
2827 	if (trace.evlist == NULL || trace.sctbl == NULL) {
2828 		pr_err("Not enough memory to run!\n");
2829 		err = -ENOMEM;
2830 		goto out;
2831 	}
2832 
2833 	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2834 				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2835 
2836 	err = bpf__setup_stdout(trace.evlist);
2837 	if (err) {
2838 		bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2839 		pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2840 		goto out;
2841 	}
2842 
2843 	err = -1;
2844 
2845 	if (trace.trace_pgfaults) {
2846 		trace.opts.sample_address = true;
2847 		trace.opts.sample_time = true;
2848 	}
2849 
2850 	if (trace.opts.mmap_pages == UINT_MAX)
2851 		mmap_pages_user_set = false;
2852 
2853 	if (trace.max_stack == UINT_MAX) {
2854 		trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2855 		max_stack_user_set = false;
2856 	}
2857 
2858 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2859 	if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
2860 		record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2861 #endif
2862 
2863 	if (callchain_param.enabled) {
2864 		if (!mmap_pages_user_set && geteuid() == 0)
2865 			trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2866 
2867 		symbol_conf.use_callchain = true;
2868 	}
2869 
2870 	if (trace.evlist->nr_entries > 0)
2871 		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2872 
2873 	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2874 		return trace__record(&trace, argc-1, &argv[1]);
2875 
2876 	/* summary_only implies summary option, but don't overwrite summary if set */
2877 	if (trace.summary_only)
2878 		trace.summary = trace.summary_only;
2879 
2880 	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2881 	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
2882 		pr_err("Please specify something to trace.\n");
2883 		return -1;
2884 	}
2885 
2886 	if (!trace.trace_syscalls && ev_qualifier_str) {
2887 		pr_err("The -e option can't be used with --no-syscalls.\n");
2888 		goto out;
2889 	}
2890 
2891 	if (output_name != NULL) {
2892 		err = trace__open_output(&trace, output_name);
2893 		if (err < 0) {
2894 			perror("failed to create output file");
2895 			goto out;
2896 		}
2897 	}
2898 
2899 	trace.open_id = syscalltbl__id(trace.sctbl, "open");
2900 
2901 	if (ev_qualifier_str != NULL) {
2902 		const char *s = ev_qualifier_str;
2903 		struct strlist_config slist_config = {
2904 			.dirname = system_path(STRACE_GROUPS_DIR),
2905 		};
2906 
2907 		trace.not_ev_qualifier = *s == '!';
2908 		if (trace.not_ev_qualifier)
2909 			++s;
2910 		trace.ev_qualifier = strlist__new(s, &slist_config);
2911 		if (trace.ev_qualifier == NULL) {
2912 			fputs("Not enough memory to parse event qualifier",
2913 			      trace.output);
2914 			err = -ENOMEM;
2915 			goto out_close;
2916 		}
2917 
2918 		err = trace__validate_ev_qualifier(&trace);
2919 		if (err)
2920 			goto out_close;
2921 	}
2922 
2923 	err = target__validate(&trace.opts.target);
2924 	if (err) {
2925 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2926 		fprintf(trace.output, "%s", bf);
2927 		goto out_close;
2928 	}
2929 
2930 	err = target__parse_uid(&trace.opts.target);
2931 	if (err) {
2932 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2933 		fprintf(trace.output, "%s", bf);
2934 		goto out_close;
2935 	}
2936 
2937 	if (!argc && target__none(&trace.opts.target))
2938 		trace.opts.target.system_wide = true;
2939 
2940 	if (input_name)
2941 		err = trace__replay(&trace);
2942 	else
2943 		err = trace__run(&trace, argc, argv);
2944 
2945 out_close:
2946 	if (output_name != NULL)
2947 		fclose(trace.output);
2948 out:
2949 	return err;
2950 }
2951