xref: /openbmc/linux/tools/perf/builtin-trace.c (revision 8e694cd2)
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18 
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
39 #include "rb_resort.h"
40 
41 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
42 #include <stdlib.h>
43 #include <linux/err.h>
44 #include <linux/filter.h>
45 #include <linux/audit.h>
46 #include <sys/ptrace.h>
47 #include <linux/random.h>
48 #include <linux/stringify.h>
49 
50 #ifndef O_CLOEXEC
51 # define O_CLOEXEC		02000000
52 #endif
53 
54 struct trace {
55 	struct perf_tool	tool;
56 	struct syscalltbl	*sctbl;
57 	struct {
58 		int		max;
59 		struct syscall  *table;
60 		struct {
61 			struct perf_evsel *sys_enter,
62 					  *sys_exit;
63 		}		events;
64 	} syscalls;
65 	struct record_opts	opts;
66 	struct perf_evlist	*evlist;
67 	struct machine		*host;
68 	struct thread		*current;
69 	u64			base_time;
70 	FILE			*output;
71 	unsigned long		nr_events;
72 	struct strlist		*ev_qualifier;
73 	struct {
74 		size_t		nr;
75 		int		*entries;
76 	}			ev_qualifier_ids;
77 	struct intlist		*tid_list;
78 	struct intlist		*pid_list;
79 	struct {
80 		size_t		nr;
81 		pid_t		*entries;
82 	}			filter_pids;
83 	double			duration_filter;
84 	double			runtime_ms;
85 	struct {
86 		u64		vfs_getname,
87 				proc_getname;
88 	} stats;
89 	unsigned int		max_stack;
90 	unsigned int		min_stack;
91 	bool			not_ev_qualifier;
92 	bool			live;
93 	bool			full_time;
94 	bool			sched;
95 	bool			multiple_threads;
96 	bool			summary;
97 	bool			summary_only;
98 	bool			show_comm;
99 	bool			show_tool_stats;
100 	bool			trace_syscalls;
101 	bool			kernel_syscallchains;
102 	bool			force;
103 	bool			vfs_getname;
104 	int			trace_pgfaults;
105 	int			open_id;
106 };
107 
108 struct tp_field {
109 	int offset;
110 	union {
111 		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
112 		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
113 	};
114 };
115 
116 #define TP_UINT_FIELD(bits) \
117 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
118 { \
119 	u##bits value; \
120 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
121 	return value;  \
122 }
123 
124 TP_UINT_FIELD(8);
125 TP_UINT_FIELD(16);
126 TP_UINT_FIELD(32);
127 TP_UINT_FIELD(64);
128 
129 #define TP_UINT_FIELD__SWAPPED(bits) \
130 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
131 { \
132 	u##bits value; \
133 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
134 	return bswap_##bits(value);\
135 }
136 
137 TP_UINT_FIELD__SWAPPED(16);
138 TP_UINT_FIELD__SWAPPED(32);
139 TP_UINT_FIELD__SWAPPED(64);
140 
141 static int tp_field__init_uint(struct tp_field *field,
142 			       struct format_field *format_field,
143 			       bool needs_swap)
144 {
145 	field->offset = format_field->offset;
146 
147 	switch (format_field->size) {
148 	case 1:
149 		field->integer = tp_field__u8;
150 		break;
151 	case 2:
152 		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
153 		break;
154 	case 4:
155 		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
156 		break;
157 	case 8:
158 		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
159 		break;
160 	default:
161 		return -1;
162 	}
163 
164 	return 0;
165 }
166 
167 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
168 {
169 	return sample->raw_data + field->offset;
170 }
171 
172 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
173 {
174 	field->offset = format_field->offset;
175 	field->pointer = tp_field__ptr;
176 	return 0;
177 }
178 
179 struct syscall_tp {
180 	struct tp_field id;
181 	union {
182 		struct tp_field args, ret;
183 	};
184 };
185 
186 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
187 					  struct tp_field *field,
188 					  const char *name)
189 {
190 	struct format_field *format_field = perf_evsel__field(evsel, name);
191 
192 	if (format_field == NULL)
193 		return -1;
194 
195 	return tp_field__init_uint(field, format_field, evsel->needs_swap);
196 }
197 
198 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
199 	({ struct syscall_tp *sc = evsel->priv;\
200 	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
201 
202 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
203 					 struct tp_field *field,
204 					 const char *name)
205 {
206 	struct format_field *format_field = perf_evsel__field(evsel, name);
207 
208 	if (format_field == NULL)
209 		return -1;
210 
211 	return tp_field__init_ptr(field, format_field);
212 }
213 
214 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
215 	({ struct syscall_tp *sc = evsel->priv;\
216 	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
217 
218 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
219 {
220 	zfree(&evsel->priv);
221 	perf_evsel__delete(evsel);
222 }
223 
224 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
225 {
226 	evsel->priv = malloc(sizeof(struct syscall_tp));
227 	if (evsel->priv != NULL) {
228 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
229 			goto out_delete;
230 
231 		evsel->handler = handler;
232 		return 0;
233 	}
234 
235 	return -ENOMEM;
236 
237 out_delete:
238 	zfree(&evsel->priv);
239 	return -ENOENT;
240 }
241 
242 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
243 {
244 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
245 
246 	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
247 	if (IS_ERR(evsel))
248 		evsel = perf_evsel__newtp("syscalls", direction);
249 
250 	if (IS_ERR(evsel))
251 		return NULL;
252 
253 	if (perf_evsel__init_syscall_tp(evsel, handler))
254 		goto out_delete;
255 
256 	return evsel;
257 
258 out_delete:
259 	perf_evsel__delete_priv(evsel);
260 	return NULL;
261 }
262 
263 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
264 	({ struct syscall_tp *fields = evsel->priv; \
265 	   fields->name.integer(&fields->name, sample); })
266 
267 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
268 	({ struct syscall_tp *fields = evsel->priv; \
269 	   fields->name.pointer(&fields->name, sample); })
270 
271 struct syscall_arg {
272 	unsigned long val;
273 	struct thread *thread;
274 	struct trace  *trace;
275 	void	      *parm;
276 	u8	      idx;
277 	u8	      mask;
278 };
279 
280 struct strarray {
281 	int	    offset;
282 	int	    nr_entries;
283 	const char **entries;
284 };
285 
286 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
287 	.nr_entries = ARRAY_SIZE(array), \
288 	.entries = array, \
289 }
290 
291 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
292 	.offset	    = off, \
293 	.nr_entries = ARRAY_SIZE(array), \
294 	.entries = array, \
295 }
296 
297 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298 						const char *intfmt,
299 					        struct syscall_arg *arg)
300 {
301 	struct strarray *sa = arg->parm;
302 	int idx = arg->val - sa->offset;
303 
304 	if (idx < 0 || idx >= sa->nr_entries)
305 		return scnprintf(bf, size, intfmt, arg->val);
306 
307 	return scnprintf(bf, size, "%s", sa->entries[idx]);
308 }
309 
310 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
311 					      struct syscall_arg *arg)
312 {
313 	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
314 }
315 
316 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
317 
318 #if defined(__i386__) || defined(__x86_64__)
319 /*
320  * FIXME: Make this available to all arches as soon as the ioctl beautifier
321  * 	  gets rewritten to support all arches.
322  */
323 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
324 						 struct syscall_arg *arg)
325 {
326 	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
327 }
328 
329 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
330 #endif /* defined(__i386__) || defined(__x86_64__) */
331 
332 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
333 					struct syscall_arg *arg);
334 
335 #define SCA_FD syscall_arg__scnprintf_fd
336 
337 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
338 					   struct syscall_arg *arg)
339 {
340 	int fd = arg->val;
341 
342 	if (fd == AT_FDCWD)
343 		return scnprintf(bf, size, "CWD");
344 
345 	return syscall_arg__scnprintf_fd(bf, size, arg);
346 }
347 
348 #define SCA_FDAT syscall_arg__scnprintf_fd_at
349 
350 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
351 					      struct syscall_arg *arg);
352 
353 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
354 
355 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
356 					 struct syscall_arg *arg)
357 {
358 	return scnprintf(bf, size, "%#lx", arg->val);
359 }
360 
361 #define SCA_HEX syscall_arg__scnprintf_hex
362 
363 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
364 					 struct syscall_arg *arg)
365 {
366 	return scnprintf(bf, size, "%d", arg->val);
367 }
368 
369 #define SCA_INT syscall_arg__scnprintf_int
370 
371 static const char *bpf_cmd[] = {
372 	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
373 	"MAP_GET_NEXT_KEY", "PROG_LOAD",
374 };
375 static DEFINE_STRARRAY(bpf_cmd);
376 
377 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
378 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
379 
380 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
381 static DEFINE_STRARRAY(itimers);
382 
383 static const char *keyctl_options[] = {
384 	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
385 	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
386 	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
387 	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
388 	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
389 };
390 static DEFINE_STRARRAY(keyctl_options);
391 
392 static const char *whences[] = { "SET", "CUR", "END",
393 #ifdef SEEK_DATA
394 "DATA",
395 #endif
396 #ifdef SEEK_HOLE
397 "HOLE",
398 #endif
399 };
400 static DEFINE_STRARRAY(whences);
401 
402 static const char *fcntl_cmds[] = {
403 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
404 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
405 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
406 	"F_GETOWNER_UIDS",
407 };
408 static DEFINE_STRARRAY(fcntl_cmds);
409 
410 static const char *rlimit_resources[] = {
411 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
412 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
413 	"RTTIME",
414 };
415 static DEFINE_STRARRAY(rlimit_resources);
416 
417 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
418 static DEFINE_STRARRAY(sighow);
419 
420 static const char *clockid[] = {
421 	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
422 	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
423 	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
424 };
425 static DEFINE_STRARRAY(clockid);
426 
427 static const char *socket_families[] = {
428 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
429 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
430 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
431 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
432 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
433 	"ALG", "NFC", "VSOCK",
434 };
435 static DEFINE_STRARRAY(socket_families);
436 
437 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
438 						 struct syscall_arg *arg)
439 {
440 	size_t printed = 0;
441 	int mode = arg->val;
442 
443 	if (mode == F_OK) /* 0 */
444 		return scnprintf(bf, size, "F");
445 #define	P_MODE(n) \
446 	if (mode & n##_OK) { \
447 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
448 		mode &= ~n##_OK; \
449 	}
450 
451 	P_MODE(R);
452 	P_MODE(W);
453 	P_MODE(X);
454 #undef P_MODE
455 
456 	if (mode)
457 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
458 
459 	return printed;
460 }
461 
462 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
463 
464 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
465 					      struct syscall_arg *arg);
466 
467 #define SCA_FILENAME syscall_arg__scnprintf_filename
468 
469 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
470 						struct syscall_arg *arg)
471 {
472 	int printed = 0, flags = arg->val;
473 
474 #define	P_FLAG(n) \
475 	if (flags & O_##n) { \
476 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
477 		flags &= ~O_##n; \
478 	}
479 
480 	P_FLAG(CLOEXEC);
481 	P_FLAG(NONBLOCK);
482 #undef P_FLAG
483 
484 	if (flags)
485 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
486 
487 	return printed;
488 }
489 
490 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
491 
492 #if defined(__i386__) || defined(__x86_64__)
493 /*
494  * FIXME: Make this available to all arches.
495  */
496 #define TCGETS		0x5401
497 
498 static const char *tioctls[] = {
499 	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
500 	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
501 	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
502 	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
503 	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
504 	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
505 	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
506 	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
507 	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
508 	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
509 	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
510 	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
511 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
512 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
513 	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
514 };
515 
516 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
517 #endif /* defined(__i386__) || defined(__x86_64__) */
518 
519 #ifndef GRND_NONBLOCK
520 #define GRND_NONBLOCK	0x0001
521 #endif
522 #ifndef GRND_RANDOM
523 #define GRND_RANDOM	0x0002
524 #endif
525 
526 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
527 						   struct syscall_arg *arg)
528 {
529 	int printed = 0, flags = arg->val;
530 
531 #define	P_FLAG(n) \
532 	if (flags & GRND_##n) { \
533 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
534 		flags &= ~GRND_##n; \
535 	}
536 
537 	P_FLAG(RANDOM);
538 	P_FLAG(NONBLOCK);
539 #undef P_FLAG
540 
541 	if (flags)
542 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
543 
544 	return printed;
545 }
546 
547 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
548 
549 #define STRARRAY(arg, name, array) \
550 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
551 	  .arg_parm	 = { [arg] = &strarray__##array, }
552 
553 #include "trace/beauty/eventfd.c"
554 #include "trace/beauty/flock.c"
555 #include "trace/beauty/futex_op.c"
556 #include "trace/beauty/mmap.c"
557 #include "trace/beauty/mode_t.c"
558 #include "trace/beauty/msg_flags.c"
559 #include "trace/beauty/open_flags.c"
560 #include "trace/beauty/perf_event_open.c"
561 #include "trace/beauty/pid.c"
562 #include "trace/beauty/sched_policy.c"
563 #include "trace/beauty/seccomp.c"
564 #include "trace/beauty/signum.c"
565 #include "trace/beauty/socket_type.c"
566 #include "trace/beauty/waitid_options.c"
567 
568 static struct syscall_fmt {
569 	const char *name;
570 	const char *alias;
571 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
572 	void	   *arg_parm[6];
573 	bool	   errmsg;
574 	bool	   errpid;
575 	bool	   timeout;
576 	bool	   hexret;
577 } syscall_fmts[] = {
578 	{ .name	    = "access",	    .errmsg = true,
579 	  .arg_scnprintf = { [1] = SCA_ACCMODE,  /* mode */ }, },
580 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
581 	{ .name	    = "bpf",	    .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
582 	{ .name	    = "brk",	    .hexret = true,
583 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
584 	{ .name	    = "chdir",	    .errmsg = true, },
585 	{ .name	    = "chmod",	    .errmsg = true, },
586 	{ .name	    = "chroot",	    .errmsg = true, },
587 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
588 	{ .name	    = "clone",	    .errpid = true, },
589 	{ .name	    = "close",	    .errmsg = true,
590 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
591 	{ .name	    = "connect",    .errmsg = true, },
592 	{ .name	    = "creat",	    .errmsg = true, },
593 	{ .name	    = "dup",	    .errmsg = true, },
594 	{ .name	    = "dup2",	    .errmsg = true, },
595 	{ .name	    = "dup3",	    .errmsg = true, },
596 	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
597 	{ .name	    = "eventfd2",   .errmsg = true,
598 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
599 	{ .name	    = "faccessat",  .errmsg = true, },
600 	{ .name	    = "fadvise64",  .errmsg = true, },
601 	{ .name	    = "fallocate",  .errmsg = true, },
602 	{ .name	    = "fchdir",	    .errmsg = true, },
603 	{ .name	    = "fchmod",	    .errmsg = true, },
604 	{ .name	    = "fchmodat",   .errmsg = true,
605 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
606 	{ .name	    = "fchown",	    .errmsg = true, },
607 	{ .name	    = "fchownat",   .errmsg = true,
608 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
609 	{ .name	    = "fcntl",	    .errmsg = true,
610 	  .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
611 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
612 	{ .name	    = "fdatasync",  .errmsg = true, },
613 	{ .name	    = "flock",	    .errmsg = true,
614 	  .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
615 	{ .name	    = "fsetxattr",  .errmsg = true, },
616 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
617 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
618 	{ .name	    = "fstatfs",    .errmsg = true, },
619 	{ .name	    = "fsync",    .errmsg = true, },
620 	{ .name	    = "ftruncate", .errmsg = true, },
621 	{ .name	    = "futex",	    .errmsg = true,
622 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
623 	{ .name	    = "futimesat", .errmsg = true,
624 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
625 	{ .name	    = "getdents",   .errmsg = true, },
626 	{ .name	    = "getdents64", .errmsg = true, },
627 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
628 	{ .name	    = "getpid",	    .errpid = true, },
629 	{ .name	    = "getpgid",    .errpid = true, },
630 	{ .name	    = "getppid",    .errpid = true, },
631 	{ .name	    = "getrandom",  .errmsg = true,
632 	  .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
633 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
634 	{ .name	    = "getxattr",   .errmsg = true, },
635 	{ .name	    = "inotify_add_watch",	    .errmsg = true, },
636 	{ .name	    = "ioctl",	    .errmsg = true,
637 	  .arg_scnprintf = {
638 #if defined(__i386__) || defined(__x86_64__)
639 /*
640  * FIXME: Make this available to all arches.
641  */
642 			     [1] = SCA_STRHEXARRAY, /* cmd */
643 			     [2] = SCA_HEX, /* arg */ },
644 	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
645 #else
646 			     [2] = SCA_HEX, /* arg */ }, },
647 #endif
648 	{ .name	    = "keyctl",	    .errmsg = true, STRARRAY(0, option, keyctl_options), },
649 	{ .name	    = "kill",	    .errmsg = true,
650 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
651 	{ .name	    = "lchown",    .errmsg = true, },
652 	{ .name	    = "lgetxattr",  .errmsg = true, },
653 	{ .name	    = "linkat",	    .errmsg = true,
654 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
655 	{ .name	    = "listxattr",  .errmsg = true, },
656 	{ .name	    = "llistxattr", .errmsg = true, },
657 	{ .name	    = "lremovexattr",  .errmsg = true, },
658 	{ .name	    = "lseek",	    .errmsg = true,
659 	  .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
660 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
661 	{ .name	    = "lsetxattr",  .errmsg = true, },
662 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
663 	{ .name	    = "lsxattr",    .errmsg = true, },
664 	{ .name     = "madvise",    .errmsg = true,
665 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
666 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
667 	{ .name	    = "mkdir",    .errmsg = true, },
668 	{ .name	    = "mkdirat",    .errmsg = true,
669 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
670 	{ .name	    = "mknod",      .errmsg = true, },
671 	{ .name	    = "mknodat",    .errmsg = true,
672 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
673 	{ .name	    = "mlock",	    .errmsg = true,
674 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
675 	{ .name	    = "mlockall",   .errmsg = true,
676 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
677 	{ .name	    = "mmap",	    .hexret = true,
678 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
679 			     [2] = SCA_MMAP_PROT, /* prot */
680 			     [3] = SCA_MMAP_FLAGS, /* flags */ }, },
681 	{ .name	    = "mprotect",   .errmsg = true,
682 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
683 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
684 	{ .name	    = "mq_unlink", .errmsg = true,
685 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
686 	{ .name	    = "mremap",	    .hexret = true,
687 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
688 			     [3] = SCA_MREMAP_FLAGS, /* flags */
689 			     [4] = SCA_HEX, /* new_addr */ }, },
690 	{ .name	    = "munlock",    .errmsg = true,
691 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
692 	{ .name	    = "munmap",	    .errmsg = true,
693 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
694 	{ .name	    = "name_to_handle_at", .errmsg = true,
695 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
696 	{ .name	    = "newfstatat", .errmsg = true,
697 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
698 	{ .name	    = "open",	    .errmsg = true,
699 	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
700 	{ .name	    = "open_by_handle_at", .errmsg = true,
701 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
702 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
703 	{ .name	    = "openat",	    .errmsg = true,
704 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
705 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
706 	{ .name	    = "perf_event_open", .errmsg = true,
707 	  .arg_scnprintf = { [2] = SCA_INT, /* cpu */
708 			     [3] = SCA_FD,  /* group_fd */
709 			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
710 	{ .name	    = "pipe2",	    .errmsg = true,
711 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
712 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
713 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
714 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64", },
715 	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread", },
716 	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
717 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64", },
718 	{ .name	    = "pwritev",    .errmsg = true, },
719 	{ .name	    = "read",	    .errmsg = true, },
720 	{ .name	    = "readlink",   .errmsg = true, },
721 	{ .name	    = "readlinkat", .errmsg = true,
722 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
723 	{ .name	    = "readv",	    .errmsg = true, },
724 	{ .name	    = "recvfrom",   .errmsg = true,
725 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
726 	{ .name	    = "recvmmsg",   .errmsg = true,
727 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
728 	{ .name	    = "recvmsg",    .errmsg = true,
729 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
730 	{ .name	    = "removexattr", .errmsg = true, },
731 	{ .name	    = "renameat",   .errmsg = true,
732 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
733 	{ .name	    = "rmdir",    .errmsg = true, },
734 	{ .name	    = "rt_sigaction", .errmsg = true,
735 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
736 	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
737 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
738 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
739 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
740 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
741 	{ .name	    = "sched_setscheduler",   .errmsg = true,
742 	  .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
743 	{ .name	    = "seccomp", .errmsg = true,
744 	  .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
745 			     [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
746 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
747 	{ .name	    = "sendmmsg",    .errmsg = true,
748 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
749 	{ .name	    = "sendmsg",    .errmsg = true,
750 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
751 	{ .name	    = "sendto",	    .errmsg = true,
752 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
753 	{ .name	    = "set_tid_address", .errpid = true, },
754 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
755 	{ .name	    = "setpgid",    .errmsg = true, },
756 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
757 	{ .name	    = "setxattr",   .errmsg = true, },
758 	{ .name	    = "shutdown",   .errmsg = true, },
759 	{ .name	    = "socket",	    .errmsg = true,
760 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
761 			     [1] = SCA_SK_TYPE, /* type */ },
762 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
763 	{ .name	    = "socketpair", .errmsg = true,
764 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
765 			     [1] = SCA_SK_TYPE, /* type */ },
766 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
767 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
768 	{ .name	    = "statfs",	    .errmsg = true, },
769 	{ .name	    = "swapoff",    .errmsg = true,
770 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
771 	{ .name	    = "swapon",	    .errmsg = true,
772 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
773 	{ .name	    = "symlinkat",  .errmsg = true,
774 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
775 	{ .name	    = "tgkill",	    .errmsg = true,
776 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
777 	{ .name	    = "tkill",	    .errmsg = true,
778 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
779 	{ .name	    = "truncate",   .errmsg = true, },
780 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
781 	{ .name	    = "unlinkat",   .errmsg = true,
782 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
783 	{ .name	    = "utime",  .errmsg = true, },
784 	{ .name	    = "utimensat",  .errmsg = true,
785 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
786 	{ .name	    = "utimes",  .errmsg = true, },
787 	{ .name	    = "vmsplice",  .errmsg = true, },
788 	{ .name	    = "wait4",	    .errpid = true,
789 	  .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
790 	{ .name	    = "waitid",	    .errpid = true,
791 	  .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
792 	{ .name	    = "write",	    .errmsg = true, },
793 	{ .name	    = "writev",	    .errmsg = true, },
794 };
795 
796 static int syscall_fmt__cmp(const void *name, const void *fmtp)
797 {
798 	const struct syscall_fmt *fmt = fmtp;
799 	return strcmp(name, fmt->name);
800 }
801 
802 static struct syscall_fmt *syscall_fmt__find(const char *name)
803 {
804 	const int nmemb = ARRAY_SIZE(syscall_fmts);
805 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
806 }
807 
808 struct syscall {
809 	struct event_format *tp_format;
810 	int		    nr_args;
811 	struct format_field *args;
812 	const char	    *name;
813 	bool		    is_exit;
814 	struct syscall_fmt  *fmt;
815 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
816 	void		    **arg_parm;
817 };
818 
819 static size_t fprintf_duration(unsigned long t, FILE *fp)
820 {
821 	double duration = (double)t / NSEC_PER_MSEC;
822 	size_t printed = fprintf(fp, "(");
823 
824 	if (duration >= 1.0)
825 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
826 	else if (duration >= 0.01)
827 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
828 	else
829 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
830 	return printed + fprintf(fp, "): ");
831 }
832 
833 /**
834  * filename.ptr: The filename char pointer that will be vfs_getname'd
835  * filename.entry_str_pos: Where to insert the string translated from
836  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
837  */
838 struct thread_trace {
839 	u64		  entry_time;
840 	u64		  exit_time;
841 	bool		  entry_pending;
842 	unsigned long	  nr_events;
843 	unsigned long	  pfmaj, pfmin;
844 	char		  *entry_str;
845 	double		  runtime_ms;
846         struct {
847 		unsigned long ptr;
848 		short int     entry_str_pos;
849 		bool	      pending_open;
850 		unsigned int  namelen;
851 		char	      *name;
852 	} filename;
853 	struct {
854 		int	  max;
855 		char	  **table;
856 	} paths;
857 
858 	struct intlist *syscall_stats;
859 };
860 
861 static struct thread_trace *thread_trace__new(void)
862 {
863 	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
864 
865 	if (ttrace)
866 		ttrace->paths.max = -1;
867 
868 	ttrace->syscall_stats = intlist__new(NULL);
869 
870 	return ttrace;
871 }
872 
873 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
874 {
875 	struct thread_trace *ttrace;
876 
877 	if (thread == NULL)
878 		goto fail;
879 
880 	if (thread__priv(thread) == NULL)
881 		thread__set_priv(thread, thread_trace__new());
882 
883 	if (thread__priv(thread) == NULL)
884 		goto fail;
885 
886 	ttrace = thread__priv(thread);
887 	++ttrace->nr_events;
888 
889 	return ttrace;
890 fail:
891 	color_fprintf(fp, PERF_COLOR_RED,
892 		      "WARNING: not enough memory, dropping samples!\n");
893 	return NULL;
894 }
895 
896 #define TRACE_PFMAJ		(1 << 0)
897 #define TRACE_PFMIN		(1 << 1)
898 
899 static const size_t trace__entry_str_size = 2048;
900 
901 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
902 {
903 	struct thread_trace *ttrace = thread__priv(thread);
904 
905 	if (fd > ttrace->paths.max) {
906 		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
907 
908 		if (npath == NULL)
909 			return -1;
910 
911 		if (ttrace->paths.max != -1) {
912 			memset(npath + ttrace->paths.max + 1, 0,
913 			       (fd - ttrace->paths.max) * sizeof(char *));
914 		} else {
915 			memset(npath, 0, (fd + 1) * sizeof(char *));
916 		}
917 
918 		ttrace->paths.table = npath;
919 		ttrace->paths.max   = fd;
920 	}
921 
922 	ttrace->paths.table[fd] = strdup(pathname);
923 
924 	return ttrace->paths.table[fd] != NULL ? 0 : -1;
925 }
926 
927 static int thread__read_fd_path(struct thread *thread, int fd)
928 {
929 	char linkname[PATH_MAX], pathname[PATH_MAX];
930 	struct stat st;
931 	int ret;
932 
933 	if (thread->pid_ == thread->tid) {
934 		scnprintf(linkname, sizeof(linkname),
935 			  "/proc/%d/fd/%d", thread->pid_, fd);
936 	} else {
937 		scnprintf(linkname, sizeof(linkname),
938 			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
939 	}
940 
941 	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
942 		return -1;
943 
944 	ret = readlink(linkname, pathname, sizeof(pathname));
945 
946 	if (ret < 0 || ret > st.st_size)
947 		return -1;
948 
949 	pathname[ret] = '\0';
950 	return trace__set_fd_pathname(thread, fd, pathname);
951 }
952 
953 static const char *thread__fd_path(struct thread *thread, int fd,
954 				   struct trace *trace)
955 {
956 	struct thread_trace *ttrace = thread__priv(thread);
957 
958 	if (ttrace == NULL)
959 		return NULL;
960 
961 	if (fd < 0)
962 		return NULL;
963 
964 	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
965 		if (!trace->live)
966 			return NULL;
967 		++trace->stats.proc_getname;
968 		if (thread__read_fd_path(thread, fd))
969 			return NULL;
970 	}
971 
972 	return ttrace->paths.table[fd];
973 }
974 
975 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
976 					struct syscall_arg *arg)
977 {
978 	int fd = arg->val;
979 	size_t printed = scnprintf(bf, size, "%d", fd);
980 	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
981 
982 	if (path)
983 		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
984 
985 	return printed;
986 }
987 
988 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
989 					      struct syscall_arg *arg)
990 {
991 	int fd = arg->val;
992 	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
993 	struct thread_trace *ttrace = thread__priv(arg->thread);
994 
995 	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
996 		zfree(&ttrace->paths.table[fd]);
997 
998 	return printed;
999 }
1000 
1001 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1002 				     unsigned long ptr)
1003 {
1004 	struct thread_trace *ttrace = thread__priv(thread);
1005 
1006 	ttrace->filename.ptr = ptr;
1007 	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1008 }
1009 
1010 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1011 					      struct syscall_arg *arg)
1012 {
1013 	unsigned long ptr = arg->val;
1014 
1015 	if (!arg->trace->vfs_getname)
1016 		return scnprintf(bf, size, "%#x", ptr);
1017 
1018 	thread__set_filename_pos(arg->thread, bf, ptr);
1019 	return 0;
1020 }
1021 
1022 static bool trace__filter_duration(struct trace *trace, double t)
1023 {
1024 	return t < (trace->duration_filter * NSEC_PER_MSEC);
1025 }
1026 
1027 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1028 {
1029 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1030 
1031 	return fprintf(fp, "%10.3f ", ts);
1032 }
1033 
1034 static bool done = false;
1035 static bool interrupted = false;
1036 
1037 static void sig_handler(int sig)
1038 {
1039 	done = true;
1040 	interrupted = sig == SIGINT;
1041 }
1042 
1043 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1044 					u64 duration, u64 tstamp, FILE *fp)
1045 {
1046 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1047 	printed += fprintf_duration(duration, fp);
1048 
1049 	if (trace->multiple_threads) {
1050 		if (trace->show_comm)
1051 			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1052 		printed += fprintf(fp, "%d ", thread->tid);
1053 	}
1054 
1055 	return printed;
1056 }
1057 
1058 static int trace__process_event(struct trace *trace, struct machine *machine,
1059 				union perf_event *event, struct perf_sample *sample)
1060 {
1061 	int ret = 0;
1062 
1063 	switch (event->header.type) {
1064 	case PERF_RECORD_LOST:
1065 		color_fprintf(trace->output, PERF_COLOR_RED,
1066 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1067 		ret = machine__process_lost_event(machine, event, sample);
1068 		break;
1069 	default:
1070 		ret = machine__process_event(machine, event, sample);
1071 		break;
1072 	}
1073 
1074 	return ret;
1075 }
1076 
1077 static int trace__tool_process(struct perf_tool *tool,
1078 			       union perf_event *event,
1079 			       struct perf_sample *sample,
1080 			       struct machine *machine)
1081 {
1082 	struct trace *trace = container_of(tool, struct trace, tool);
1083 	return trace__process_event(trace, machine, event, sample);
1084 }
1085 
1086 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1087 {
1088 	struct machine *machine = vmachine;
1089 
1090 	if (machine->kptr_restrict_warned)
1091 		return NULL;
1092 
1093 	if (symbol_conf.kptr_restrict) {
1094 		pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1095 			   "Check /proc/sys/kernel/kptr_restrict.\n\n"
1096 			   "Kernel samples will not be resolved.\n");
1097 		machine->kptr_restrict_warned = true;
1098 		return NULL;
1099 	}
1100 
1101 	return machine__resolve_kernel_addr(vmachine, addrp, modp);
1102 }
1103 
1104 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1105 {
1106 	int err = symbol__init(NULL);
1107 
1108 	if (err)
1109 		return err;
1110 
1111 	trace->host = machine__new_host();
1112 	if (trace->host == NULL)
1113 		return -ENOMEM;
1114 
1115 	if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1116 		return -errno;
1117 
1118 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1119 					    evlist->threads, trace__tool_process, false,
1120 					    trace->opts.proc_map_timeout);
1121 	if (err)
1122 		symbol__exit();
1123 
1124 	return err;
1125 }
1126 
1127 static int syscall__set_arg_fmts(struct syscall *sc)
1128 {
1129 	struct format_field *field;
1130 	int idx = 0, len;
1131 
1132 	sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1133 	if (sc->arg_scnprintf == NULL)
1134 		return -1;
1135 
1136 	if (sc->fmt)
1137 		sc->arg_parm = sc->fmt->arg_parm;
1138 
1139 	for (field = sc->args; field; field = field->next) {
1140 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1141 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1142 		else if (strcmp(field->type, "const char *") == 0 &&
1143 			 (strcmp(field->name, "filename") == 0 ||
1144 			  strcmp(field->name, "path") == 0 ||
1145 			  strcmp(field->name, "pathname") == 0))
1146 			sc->arg_scnprintf[idx] = SCA_FILENAME;
1147 		else if (field->flags & FIELD_IS_POINTER)
1148 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1149 		else if (strcmp(field->type, "pid_t") == 0)
1150 			sc->arg_scnprintf[idx] = SCA_PID;
1151 		else if (strcmp(field->type, "umode_t") == 0)
1152 			sc->arg_scnprintf[idx] = SCA_MODE_T;
1153 		else if ((strcmp(field->type, "int") == 0 ||
1154 			  strcmp(field->type, "unsigned int") == 0 ||
1155 			  strcmp(field->type, "long") == 0) &&
1156 			 (len = strlen(field->name)) >= 2 &&
1157 			 strcmp(field->name + len - 2, "fd") == 0) {
1158 			/*
1159 			 * /sys/kernel/tracing/events/syscalls/sys_enter*
1160 			 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1161 			 * 65 int
1162 			 * 23 unsigned int
1163 			 * 7 unsigned long
1164 			 */
1165 			sc->arg_scnprintf[idx] = SCA_FD;
1166 		}
1167 		++idx;
1168 	}
1169 
1170 	return 0;
1171 }
1172 
1173 static int trace__read_syscall_info(struct trace *trace, int id)
1174 {
1175 	char tp_name[128];
1176 	struct syscall *sc;
1177 	const char *name = syscalltbl__name(trace->sctbl, id);
1178 
1179 	if (name == NULL)
1180 		return -1;
1181 
1182 	if (id > trace->syscalls.max) {
1183 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1184 
1185 		if (nsyscalls == NULL)
1186 			return -1;
1187 
1188 		if (trace->syscalls.max != -1) {
1189 			memset(nsyscalls + trace->syscalls.max + 1, 0,
1190 			       (id - trace->syscalls.max) * sizeof(*sc));
1191 		} else {
1192 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1193 		}
1194 
1195 		trace->syscalls.table = nsyscalls;
1196 		trace->syscalls.max   = id;
1197 	}
1198 
1199 	sc = trace->syscalls.table + id;
1200 	sc->name = name;
1201 
1202 	sc->fmt  = syscall_fmt__find(sc->name);
1203 
1204 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1205 	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1206 
1207 	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1208 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1209 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1210 	}
1211 
1212 	if (IS_ERR(sc->tp_format))
1213 		return -1;
1214 
1215 	sc->args = sc->tp_format->format.fields;
1216 	sc->nr_args = sc->tp_format->format.nr_fields;
1217 	/*
1218 	 * We need to check and discard the first variable '__syscall_nr'
1219 	 * or 'nr' that mean the syscall number. It is needless here.
1220 	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1221 	 */
1222 	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1223 		sc->args = sc->args->next;
1224 		--sc->nr_args;
1225 	}
1226 
1227 	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1228 
1229 	return syscall__set_arg_fmts(sc);
1230 }
1231 
1232 static int trace__validate_ev_qualifier(struct trace *trace)
1233 {
1234 	int err = 0, i;
1235 	struct str_node *pos;
1236 
1237 	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1238 	trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1239 						 sizeof(trace->ev_qualifier_ids.entries[0]));
1240 
1241 	if (trace->ev_qualifier_ids.entries == NULL) {
1242 		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1243 		       trace->output);
1244 		err = -EINVAL;
1245 		goto out;
1246 	}
1247 
1248 	i = 0;
1249 
1250 	strlist__for_each(pos, trace->ev_qualifier) {
1251 		const char *sc = pos->s;
1252 		int id = syscalltbl__id(trace->sctbl, sc);
1253 
1254 		if (id < 0) {
1255 			if (err == 0) {
1256 				fputs("Error:\tInvalid syscall ", trace->output);
1257 				err = -EINVAL;
1258 			} else {
1259 				fputs(", ", trace->output);
1260 			}
1261 
1262 			fputs(sc, trace->output);
1263 		}
1264 
1265 		trace->ev_qualifier_ids.entries[i++] = id;
1266 	}
1267 
1268 	if (err < 0) {
1269 		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1270 		      "\nHint:\tand: 'man syscalls'\n", trace->output);
1271 		zfree(&trace->ev_qualifier_ids.entries);
1272 		trace->ev_qualifier_ids.nr = 0;
1273 	}
1274 out:
1275 	return err;
1276 }
1277 
1278 /*
1279  * args is to be interpreted as a series of longs but we need to handle
1280  * 8-byte unaligned accesses. args points to raw_data within the event
1281  * and raw_data is guaranteed to be 8-byte unaligned because it is
1282  * preceded by raw_size which is a u32. So we need to copy args to a temp
1283  * variable to read it. Most notably this avoids extended load instructions
1284  * on unaligned addresses
1285  */
1286 
1287 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1288 				      unsigned char *args, struct trace *trace,
1289 				      struct thread *thread)
1290 {
1291 	size_t printed = 0;
1292 	unsigned char *p;
1293 	unsigned long val;
1294 
1295 	if (sc->args != NULL) {
1296 		struct format_field *field;
1297 		u8 bit = 1;
1298 		struct syscall_arg arg = {
1299 			.idx	= 0,
1300 			.mask	= 0,
1301 			.trace  = trace,
1302 			.thread = thread,
1303 		};
1304 
1305 		for (field = sc->args; field;
1306 		     field = field->next, ++arg.idx, bit <<= 1) {
1307 			if (arg.mask & bit)
1308 				continue;
1309 
1310 			/* special care for unaligned accesses */
1311 			p = args + sizeof(unsigned long) * arg.idx;
1312 			memcpy(&val, p, sizeof(val));
1313 
1314 			/*
1315  			 * Suppress this argument if its value is zero and
1316  			 * and we don't have a string associated in an
1317  			 * strarray for it.
1318  			 */
1319 			if (val == 0 &&
1320 			    !(sc->arg_scnprintf &&
1321 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1322 			      sc->arg_parm[arg.idx]))
1323 				continue;
1324 
1325 			printed += scnprintf(bf + printed, size - printed,
1326 					     "%s%s: ", printed ? ", " : "", field->name);
1327 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1328 				arg.val = val;
1329 				if (sc->arg_parm)
1330 					arg.parm = sc->arg_parm[arg.idx];
1331 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1332 								      size - printed, &arg);
1333 			} else {
1334 				printed += scnprintf(bf + printed, size - printed,
1335 						     "%ld", val);
1336 			}
1337 		}
1338 	} else if (IS_ERR(sc->tp_format)) {
1339 		/*
1340 		 * If we managed to read the tracepoint /format file, then we
1341 		 * may end up not having any args, like with gettid(), so only
1342 		 * print the raw args when we didn't manage to read it.
1343 		 */
1344 		int i = 0;
1345 
1346 		while (i < 6) {
1347 			/* special care for unaligned accesses */
1348 			p = args + sizeof(unsigned long) * i;
1349 			memcpy(&val, p, sizeof(val));
1350 			printed += scnprintf(bf + printed, size - printed,
1351 					     "%sarg%d: %ld",
1352 					     printed ? ", " : "", i, val);
1353 			++i;
1354 		}
1355 	}
1356 
1357 	return printed;
1358 }
1359 
1360 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1361 				  union perf_event *event,
1362 				  struct perf_sample *sample);
1363 
1364 static struct syscall *trace__syscall_info(struct trace *trace,
1365 					   struct perf_evsel *evsel, int id)
1366 {
1367 
1368 	if (id < 0) {
1369 
1370 		/*
1371 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1372 		 * before that, leaving at a higher verbosity level till that is
1373 		 * explained. Reproduced with plain ftrace with:
1374 		 *
1375 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1376 		 * grep "NR -1 " /t/trace_pipe
1377 		 *
1378 		 * After generating some load on the machine.
1379  		 */
1380 		if (verbose > 1) {
1381 			static u64 n;
1382 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1383 				id, perf_evsel__name(evsel), ++n);
1384 		}
1385 		return NULL;
1386 	}
1387 
1388 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1389 	    trace__read_syscall_info(trace, id))
1390 		goto out_cant_read;
1391 
1392 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1393 		goto out_cant_read;
1394 
1395 	return &trace->syscalls.table[id];
1396 
1397 out_cant_read:
1398 	if (verbose) {
1399 		fprintf(trace->output, "Problems reading syscall %d", id);
1400 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1401 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1402 		fputs(" information\n", trace->output);
1403 	}
1404 	return NULL;
1405 }
1406 
1407 static void thread__update_stats(struct thread_trace *ttrace,
1408 				 int id, struct perf_sample *sample)
1409 {
1410 	struct int_node *inode;
1411 	struct stats *stats;
1412 	u64 duration = 0;
1413 
1414 	inode = intlist__findnew(ttrace->syscall_stats, id);
1415 	if (inode == NULL)
1416 		return;
1417 
1418 	stats = inode->priv;
1419 	if (stats == NULL) {
1420 		stats = malloc(sizeof(struct stats));
1421 		if (stats == NULL)
1422 			return;
1423 		init_stats(stats);
1424 		inode->priv = stats;
1425 	}
1426 
1427 	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1428 		duration = sample->time - ttrace->entry_time;
1429 
1430 	update_stats(stats, duration);
1431 }
1432 
1433 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1434 {
1435 	struct thread_trace *ttrace;
1436 	u64 duration;
1437 	size_t printed;
1438 
1439 	if (trace->current == NULL)
1440 		return 0;
1441 
1442 	ttrace = thread__priv(trace->current);
1443 
1444 	if (!ttrace->entry_pending)
1445 		return 0;
1446 
1447 	duration = sample->time - ttrace->entry_time;
1448 
1449 	printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1450 	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1451 	ttrace->entry_pending = false;
1452 
1453 	return printed;
1454 }
1455 
1456 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1457 			    union perf_event *event __maybe_unused,
1458 			    struct perf_sample *sample)
1459 {
1460 	char *msg;
1461 	void *args;
1462 	size_t printed = 0;
1463 	struct thread *thread;
1464 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1465 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1466 	struct thread_trace *ttrace;
1467 
1468 	if (sc == NULL)
1469 		return -1;
1470 
1471 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1472 	ttrace = thread__trace(thread, trace->output);
1473 	if (ttrace == NULL)
1474 		goto out_put;
1475 
1476 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1477 
1478 	if (ttrace->entry_str == NULL) {
1479 		ttrace->entry_str = malloc(trace__entry_str_size);
1480 		if (!ttrace->entry_str)
1481 			goto out_put;
1482 	}
1483 
1484 	if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1485 		trace__printf_interrupted_entry(trace, sample);
1486 
1487 	ttrace->entry_time = sample->time;
1488 	msg = ttrace->entry_str;
1489 	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1490 
1491 	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1492 					   args, trace, thread);
1493 
1494 	if (sc->is_exit) {
1495 		if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1496 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1497 			fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1498 		}
1499 	} else {
1500 		ttrace->entry_pending = true;
1501 		/* See trace__vfs_getname & trace__sys_exit */
1502 		ttrace->filename.pending_open = false;
1503 	}
1504 
1505 	if (trace->current != thread) {
1506 		thread__put(trace->current);
1507 		trace->current = thread__get(thread);
1508 	}
1509 	err = 0;
1510 out_put:
1511 	thread__put(thread);
1512 	return err;
1513 }
1514 
1515 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1516 				    struct perf_sample *sample,
1517 				    struct callchain_cursor *cursor)
1518 {
1519 	struct addr_location al;
1520 
1521 	if (machine__resolve(trace->host, &al, sample) < 0 ||
1522 	    thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1523 		return -1;
1524 
1525 	return 0;
1526 }
1527 
1528 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1529 {
1530 	/* TODO: user-configurable print_opts */
1531 	const unsigned int print_opts = EVSEL__PRINT_SYM |
1532 				        EVSEL__PRINT_DSO |
1533 				        EVSEL__PRINT_UNKNOWN_AS_ADDR;
1534 
1535 	return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1536 }
1537 
1538 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1539 			   union perf_event *event __maybe_unused,
1540 			   struct perf_sample *sample)
1541 {
1542 	long ret;
1543 	u64 duration = 0;
1544 	struct thread *thread;
1545 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1546 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1547 	struct thread_trace *ttrace;
1548 
1549 	if (sc == NULL)
1550 		return -1;
1551 
1552 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1553 	ttrace = thread__trace(thread, trace->output);
1554 	if (ttrace == NULL)
1555 		goto out_put;
1556 
1557 	if (trace->summary)
1558 		thread__update_stats(ttrace, id, sample);
1559 
1560 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1561 
1562 	if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1563 		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1564 		ttrace->filename.pending_open = false;
1565 		++trace->stats.vfs_getname;
1566 	}
1567 
1568 	ttrace->exit_time = sample->time;
1569 
1570 	if (ttrace->entry_time) {
1571 		duration = sample->time - ttrace->entry_time;
1572 		if (trace__filter_duration(trace, duration))
1573 			goto out;
1574 	} else if (trace->duration_filter)
1575 		goto out;
1576 
1577 	if (sample->callchain) {
1578 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1579 		if (callchain_ret == 0) {
1580 			if (callchain_cursor.nr < trace->min_stack)
1581 				goto out;
1582 			callchain_ret = 1;
1583 		}
1584 	}
1585 
1586 	if (trace->summary_only)
1587 		goto out;
1588 
1589 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1590 
1591 	if (ttrace->entry_pending) {
1592 		fprintf(trace->output, "%-70s", ttrace->entry_str);
1593 	} else {
1594 		fprintf(trace->output, " ... [");
1595 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1596 		fprintf(trace->output, "]: %s()", sc->name);
1597 	}
1598 
1599 	if (sc->fmt == NULL) {
1600 signed_print:
1601 		fprintf(trace->output, ") = %ld", ret);
1602 	} else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1603 		char bf[STRERR_BUFSIZE];
1604 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1605 			   *e = audit_errno_to_name(-ret);
1606 
1607 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
1608 	} else if (ret == 0 && sc->fmt->timeout)
1609 		fprintf(trace->output, ") = 0 Timeout");
1610 	else if (sc->fmt->hexret)
1611 		fprintf(trace->output, ") = %#lx", ret);
1612 	else if (sc->fmt->errpid) {
1613 		struct thread *child = machine__find_thread(trace->host, ret, ret);
1614 
1615 		if (child != NULL) {
1616 			fprintf(trace->output, ") = %ld", ret);
1617 			if (child->comm_set)
1618 				fprintf(trace->output, " (%s)", thread__comm_str(child));
1619 			thread__put(child);
1620 		}
1621 	} else
1622 		goto signed_print;
1623 
1624 	fputc('\n', trace->output);
1625 
1626 	if (callchain_ret > 0)
1627 		trace__fprintf_callchain(trace, sample);
1628 	else if (callchain_ret < 0)
1629 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1630 out:
1631 	ttrace->entry_pending = false;
1632 	err = 0;
1633 out_put:
1634 	thread__put(thread);
1635 	return err;
1636 }
1637 
1638 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1639 			      union perf_event *event __maybe_unused,
1640 			      struct perf_sample *sample)
1641 {
1642 	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1643 	struct thread_trace *ttrace;
1644 	size_t filename_len, entry_str_len, to_move;
1645 	ssize_t remaining_space;
1646 	char *pos;
1647 	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1648 
1649 	if (!thread)
1650 		goto out;
1651 
1652 	ttrace = thread__priv(thread);
1653 	if (!ttrace)
1654 		goto out;
1655 
1656 	filename_len = strlen(filename);
1657 
1658 	if (ttrace->filename.namelen < filename_len) {
1659 		char *f = realloc(ttrace->filename.name, filename_len + 1);
1660 
1661 		if (f == NULL)
1662 				goto out;
1663 
1664 		ttrace->filename.namelen = filename_len;
1665 		ttrace->filename.name = f;
1666 	}
1667 
1668 	strcpy(ttrace->filename.name, filename);
1669 	ttrace->filename.pending_open = true;
1670 
1671 	if (!ttrace->filename.ptr)
1672 		goto out;
1673 
1674 	entry_str_len = strlen(ttrace->entry_str);
1675 	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1676 	if (remaining_space <= 0)
1677 		goto out;
1678 
1679 	if (filename_len > (size_t)remaining_space) {
1680 		filename += filename_len - remaining_space;
1681 		filename_len = remaining_space;
1682 	}
1683 
1684 	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1685 	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1686 	memmove(pos + filename_len, pos, to_move);
1687 	memcpy(pos, filename, filename_len);
1688 
1689 	ttrace->filename.ptr = 0;
1690 	ttrace->filename.entry_str_pos = 0;
1691 out:
1692 	return 0;
1693 }
1694 
1695 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1696 				     union perf_event *event __maybe_unused,
1697 				     struct perf_sample *sample)
1698 {
1699         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1700 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1701 	struct thread *thread = machine__findnew_thread(trace->host,
1702 							sample->pid,
1703 							sample->tid);
1704 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
1705 
1706 	if (ttrace == NULL)
1707 		goto out_dump;
1708 
1709 	ttrace->runtime_ms += runtime_ms;
1710 	trace->runtime_ms += runtime_ms;
1711 	thread__put(thread);
1712 	return 0;
1713 
1714 out_dump:
1715 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1716 	       evsel->name,
1717 	       perf_evsel__strval(evsel, sample, "comm"),
1718 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1719 	       runtime,
1720 	       perf_evsel__intval(evsel, sample, "vruntime"));
1721 	thread__put(thread);
1722 	return 0;
1723 }
1724 
1725 static void bpf_output__printer(enum binary_printer_ops op,
1726 				unsigned int val, void *extra)
1727 {
1728 	FILE *output = extra;
1729 	unsigned char ch = (unsigned char)val;
1730 
1731 	switch (op) {
1732 	case BINARY_PRINT_CHAR_DATA:
1733 		fprintf(output, "%c", isprint(ch) ? ch : '.');
1734 		break;
1735 	case BINARY_PRINT_DATA_BEGIN:
1736 	case BINARY_PRINT_LINE_BEGIN:
1737 	case BINARY_PRINT_ADDR:
1738 	case BINARY_PRINT_NUM_DATA:
1739 	case BINARY_PRINT_NUM_PAD:
1740 	case BINARY_PRINT_SEP:
1741 	case BINARY_PRINT_CHAR_PAD:
1742 	case BINARY_PRINT_LINE_END:
1743 	case BINARY_PRINT_DATA_END:
1744 	default:
1745 		break;
1746 	}
1747 }
1748 
1749 static void bpf_output__fprintf(struct trace *trace,
1750 				struct perf_sample *sample)
1751 {
1752 	print_binary(sample->raw_data, sample->raw_size, 8,
1753 		     bpf_output__printer, trace->output);
1754 }
1755 
1756 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1757 				union perf_event *event __maybe_unused,
1758 				struct perf_sample *sample)
1759 {
1760 	int callchain_ret = 0;
1761 
1762 	if (sample->callchain) {
1763 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1764 		if (callchain_ret == 0) {
1765 			if (callchain_cursor.nr < trace->min_stack)
1766 				goto out;
1767 			callchain_ret = 1;
1768 		}
1769 	}
1770 
1771 	trace__printf_interrupted_entry(trace, sample);
1772 	trace__fprintf_tstamp(trace, sample->time, trace->output);
1773 
1774 	if (trace->trace_syscalls)
1775 		fprintf(trace->output, "(         ): ");
1776 
1777 	fprintf(trace->output, "%s:", evsel->name);
1778 
1779 	if (perf_evsel__is_bpf_output(evsel)) {
1780 		bpf_output__fprintf(trace, sample);
1781 	} else if (evsel->tp_format) {
1782 		event_format__fprintf(evsel->tp_format, sample->cpu,
1783 				      sample->raw_data, sample->raw_size,
1784 				      trace->output);
1785 	}
1786 
1787 	fprintf(trace->output, ")\n");
1788 
1789 	if (callchain_ret > 0)
1790 		trace__fprintf_callchain(trace, sample);
1791 	else if (callchain_ret < 0)
1792 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1793 out:
1794 	return 0;
1795 }
1796 
1797 static void print_location(FILE *f, struct perf_sample *sample,
1798 			   struct addr_location *al,
1799 			   bool print_dso, bool print_sym)
1800 {
1801 
1802 	if ((verbose || print_dso) && al->map)
1803 		fprintf(f, "%s@", al->map->dso->long_name);
1804 
1805 	if ((verbose || print_sym) && al->sym)
1806 		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1807 			al->addr - al->sym->start);
1808 	else if (al->map)
1809 		fprintf(f, "0x%" PRIx64, al->addr);
1810 	else
1811 		fprintf(f, "0x%" PRIx64, sample->addr);
1812 }
1813 
1814 static int trace__pgfault(struct trace *trace,
1815 			  struct perf_evsel *evsel,
1816 			  union perf_event *event __maybe_unused,
1817 			  struct perf_sample *sample)
1818 {
1819 	struct thread *thread;
1820 	struct addr_location al;
1821 	char map_type = 'd';
1822 	struct thread_trace *ttrace;
1823 	int err = -1;
1824 	int callchain_ret = 0;
1825 
1826 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1827 
1828 	if (sample->callchain) {
1829 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1830 		if (callchain_ret == 0) {
1831 			if (callchain_cursor.nr < trace->min_stack)
1832 				goto out_put;
1833 			callchain_ret = 1;
1834 		}
1835 	}
1836 
1837 	ttrace = thread__trace(thread, trace->output);
1838 	if (ttrace == NULL)
1839 		goto out_put;
1840 
1841 	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1842 		ttrace->pfmaj++;
1843 	else
1844 		ttrace->pfmin++;
1845 
1846 	if (trace->summary_only)
1847 		goto out;
1848 
1849 	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1850 			      sample->ip, &al);
1851 
1852 	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1853 
1854 	fprintf(trace->output, "%sfault [",
1855 		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1856 		"maj" : "min");
1857 
1858 	print_location(trace->output, sample, &al, false, true);
1859 
1860 	fprintf(trace->output, "] => ");
1861 
1862 	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1863 				   sample->addr, &al);
1864 
1865 	if (!al.map) {
1866 		thread__find_addr_location(thread, sample->cpumode,
1867 					   MAP__FUNCTION, sample->addr, &al);
1868 
1869 		if (al.map)
1870 			map_type = 'x';
1871 		else
1872 			map_type = '?';
1873 	}
1874 
1875 	print_location(trace->output, sample, &al, true, false);
1876 
1877 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1878 
1879 	if (callchain_ret > 0)
1880 		trace__fprintf_callchain(trace, sample);
1881 	else if (callchain_ret < 0)
1882 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1883 out:
1884 	err = 0;
1885 out_put:
1886 	thread__put(thread);
1887 	return err;
1888 }
1889 
1890 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1891 {
1892 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1893 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1894 		return false;
1895 
1896 	if (trace->pid_list || trace->tid_list)
1897 		return true;
1898 
1899 	return false;
1900 }
1901 
1902 static void trace__set_base_time(struct trace *trace,
1903 				 struct perf_evsel *evsel,
1904 				 struct perf_sample *sample)
1905 {
1906 	/*
1907 	 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1908 	 * and don't use sample->time unconditionally, we may end up having
1909 	 * some other event in the future without PERF_SAMPLE_TIME for good
1910 	 * reason, i.e. we may not be interested in its timestamps, just in
1911 	 * it taking place, picking some piece of information when it
1912 	 * appears in our event stream (vfs_getname comes to mind).
1913 	 */
1914 	if (trace->base_time == 0 && !trace->full_time &&
1915 	    (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1916 		trace->base_time = sample->time;
1917 }
1918 
1919 static int trace__process_sample(struct perf_tool *tool,
1920 				 union perf_event *event,
1921 				 struct perf_sample *sample,
1922 				 struct perf_evsel *evsel,
1923 				 struct machine *machine __maybe_unused)
1924 {
1925 	struct trace *trace = container_of(tool, struct trace, tool);
1926 	int err = 0;
1927 
1928 	tracepoint_handler handler = evsel->handler;
1929 
1930 	if (skip_sample(trace, sample))
1931 		return 0;
1932 
1933 	trace__set_base_time(trace, evsel, sample);
1934 
1935 	if (handler) {
1936 		++trace->nr_events;
1937 		handler(trace, evsel, event, sample);
1938 	}
1939 
1940 	return err;
1941 }
1942 
1943 static int parse_target_str(struct trace *trace)
1944 {
1945 	if (trace->opts.target.pid) {
1946 		trace->pid_list = intlist__new(trace->opts.target.pid);
1947 		if (trace->pid_list == NULL) {
1948 			pr_err("Error parsing process id string\n");
1949 			return -EINVAL;
1950 		}
1951 	}
1952 
1953 	if (trace->opts.target.tid) {
1954 		trace->tid_list = intlist__new(trace->opts.target.tid);
1955 		if (trace->tid_list == NULL) {
1956 			pr_err("Error parsing thread id string\n");
1957 			return -EINVAL;
1958 		}
1959 	}
1960 
1961 	return 0;
1962 }
1963 
1964 static int trace__record(struct trace *trace, int argc, const char **argv)
1965 {
1966 	unsigned int rec_argc, i, j;
1967 	const char **rec_argv;
1968 	const char * const record_args[] = {
1969 		"record",
1970 		"-R",
1971 		"-m", "1024",
1972 		"-c", "1",
1973 	};
1974 
1975 	const char * const sc_args[] = { "-e", };
1976 	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1977 	const char * const majpf_args[] = { "-e", "major-faults" };
1978 	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1979 	const char * const minpf_args[] = { "-e", "minor-faults" };
1980 	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1981 
1982 	/* +1 is for the event string below */
1983 	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1984 		majpf_args_nr + minpf_args_nr + argc;
1985 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1986 
1987 	if (rec_argv == NULL)
1988 		return -ENOMEM;
1989 
1990 	j = 0;
1991 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1992 		rec_argv[j++] = record_args[i];
1993 
1994 	if (trace->trace_syscalls) {
1995 		for (i = 0; i < sc_args_nr; i++)
1996 			rec_argv[j++] = sc_args[i];
1997 
1998 		/* event string may be different for older kernels - e.g., RHEL6 */
1999 		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2000 			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2001 		else if (is_valid_tracepoint("syscalls:sys_enter"))
2002 			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2003 		else {
2004 			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2005 			return -1;
2006 		}
2007 	}
2008 
2009 	if (trace->trace_pgfaults & TRACE_PFMAJ)
2010 		for (i = 0; i < majpf_args_nr; i++)
2011 			rec_argv[j++] = majpf_args[i];
2012 
2013 	if (trace->trace_pgfaults & TRACE_PFMIN)
2014 		for (i = 0; i < minpf_args_nr; i++)
2015 			rec_argv[j++] = minpf_args[i];
2016 
2017 	for (i = 0; i < (unsigned int)argc; i++)
2018 		rec_argv[j++] = argv[i];
2019 
2020 	return cmd_record(j, rec_argv, NULL);
2021 }
2022 
2023 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2024 
2025 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2026 {
2027 	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2028 
2029 	if (IS_ERR(evsel))
2030 		return false;
2031 
2032 	if (perf_evsel__field(evsel, "pathname") == NULL) {
2033 		perf_evsel__delete(evsel);
2034 		return false;
2035 	}
2036 
2037 	evsel->handler = trace__vfs_getname;
2038 	perf_evlist__add(evlist, evsel);
2039 	return true;
2040 }
2041 
2042 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2043 {
2044 	struct perf_evsel *evsel;
2045 	struct perf_event_attr attr = {
2046 		.type = PERF_TYPE_SOFTWARE,
2047 		.mmap_data = 1,
2048 	};
2049 
2050 	attr.config = config;
2051 	attr.sample_period = 1;
2052 
2053 	event_attr_init(&attr);
2054 
2055 	evsel = perf_evsel__new(&attr);
2056 	if (evsel)
2057 		evsel->handler = trace__pgfault;
2058 
2059 	return evsel;
2060 }
2061 
2062 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2063 {
2064 	const u32 type = event->header.type;
2065 	struct perf_evsel *evsel;
2066 
2067 	if (type != PERF_RECORD_SAMPLE) {
2068 		trace__process_event(trace, trace->host, event, sample);
2069 		return;
2070 	}
2071 
2072 	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2073 	if (evsel == NULL) {
2074 		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2075 		return;
2076 	}
2077 
2078 	trace__set_base_time(trace, evsel, sample);
2079 
2080 	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2081 	    sample->raw_data == NULL) {
2082 		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2083 		       perf_evsel__name(evsel), sample->tid,
2084 		       sample->cpu, sample->raw_size);
2085 	} else {
2086 		tracepoint_handler handler = evsel->handler;
2087 		handler(trace, evsel, event, sample);
2088 	}
2089 }
2090 
2091 static int trace__add_syscall_newtp(struct trace *trace)
2092 {
2093 	int ret = -1;
2094 	struct perf_evlist *evlist = trace->evlist;
2095 	struct perf_evsel *sys_enter, *sys_exit;
2096 
2097 	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2098 	if (sys_enter == NULL)
2099 		goto out;
2100 
2101 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2102 		goto out_delete_sys_enter;
2103 
2104 	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2105 	if (sys_exit == NULL)
2106 		goto out_delete_sys_enter;
2107 
2108 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2109 		goto out_delete_sys_exit;
2110 
2111 	perf_evlist__add(evlist, sys_enter);
2112 	perf_evlist__add(evlist, sys_exit);
2113 
2114 	if (callchain_param.enabled && !trace->kernel_syscallchains) {
2115 		/*
2116 		 * We're interested only in the user space callchain
2117 		 * leading to the syscall, allow overriding that for
2118 		 * debugging reasons using --kernel_syscall_callchains
2119 		 */
2120 		sys_exit->attr.exclude_callchain_kernel = 1;
2121 	}
2122 
2123 	trace->syscalls.events.sys_enter = sys_enter;
2124 	trace->syscalls.events.sys_exit  = sys_exit;
2125 
2126 	ret = 0;
2127 out:
2128 	return ret;
2129 
2130 out_delete_sys_exit:
2131 	perf_evsel__delete_priv(sys_exit);
2132 out_delete_sys_enter:
2133 	perf_evsel__delete_priv(sys_enter);
2134 	goto out;
2135 }
2136 
2137 static int trace__set_ev_qualifier_filter(struct trace *trace)
2138 {
2139 	int err = -1;
2140 	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2141 						trace->ev_qualifier_ids.nr,
2142 						trace->ev_qualifier_ids.entries);
2143 
2144 	if (filter == NULL)
2145 		goto out_enomem;
2146 
2147 	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2148 		err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2149 
2150 	free(filter);
2151 out:
2152 	return err;
2153 out_enomem:
2154 	errno = ENOMEM;
2155 	goto out;
2156 }
2157 
2158 static int trace__run(struct trace *trace, int argc, const char **argv)
2159 {
2160 	struct perf_evlist *evlist = trace->evlist;
2161 	struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2162 	int err = -1, i;
2163 	unsigned long before;
2164 	const bool forks = argc > 0;
2165 	bool draining = false;
2166 
2167 	trace->live = true;
2168 
2169 	if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2170 		goto out_error_raw_syscalls;
2171 
2172 	if (trace->trace_syscalls)
2173 		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2174 
2175 	if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2176 		pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2177 		if (pgfault_maj == NULL)
2178 			goto out_error_mem;
2179 		perf_evlist__add(evlist, pgfault_maj);
2180 	}
2181 
2182 	if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2183 		pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2184 		if (pgfault_min == NULL)
2185 			goto out_error_mem;
2186 		perf_evlist__add(evlist, pgfault_min);
2187 	}
2188 
2189 	if (trace->sched &&
2190 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2191 				   trace__sched_stat_runtime))
2192 		goto out_error_sched_stat_runtime;
2193 
2194 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2195 	if (err < 0) {
2196 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2197 		goto out_delete_evlist;
2198 	}
2199 
2200 	err = trace__symbols_init(trace, evlist);
2201 	if (err < 0) {
2202 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2203 		goto out_delete_evlist;
2204 	}
2205 
2206 	perf_evlist__config(evlist, &trace->opts, NULL);
2207 
2208 	if (callchain_param.enabled) {
2209 		bool use_identifier = false;
2210 
2211 		if (trace->syscalls.events.sys_exit) {
2212 			perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2213 						     &trace->opts, &callchain_param);
2214 			use_identifier = true;
2215 		}
2216 
2217 		if (pgfault_maj) {
2218 			perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2219 			use_identifier = true;
2220 		}
2221 
2222 		if (pgfault_min) {
2223 			perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2224 			use_identifier = true;
2225 		}
2226 
2227 		if (use_identifier) {
2228 		       /*
2229 			* Now we have evsels with different sample_ids, use
2230 			* PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2231 			* from a fixed position in each ring buffer record.
2232 			*
2233 			* As of this the changeset introducing this comment, this
2234 			* isn't strictly needed, as the fields that can come before
2235 			* PERF_SAMPLE_ID are all used, but we'll probably disable
2236 			* some of those for things like copying the payload of
2237 			* pointer syscall arguments, and for vfs_getname we don't
2238 			* need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2239 			* here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2240 			*/
2241 			perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2242 			perf_evlist__reset_sample_bit(evlist, ID);
2243 		}
2244 	}
2245 
2246 	signal(SIGCHLD, sig_handler);
2247 	signal(SIGINT, sig_handler);
2248 
2249 	if (forks) {
2250 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2251 						    argv, false, NULL);
2252 		if (err < 0) {
2253 			fprintf(trace->output, "Couldn't run the workload!\n");
2254 			goto out_delete_evlist;
2255 		}
2256 	}
2257 
2258 	err = perf_evlist__open(evlist);
2259 	if (err < 0)
2260 		goto out_error_open;
2261 
2262 	err = bpf__apply_obj_config();
2263 	if (err) {
2264 		char errbuf[BUFSIZ];
2265 
2266 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2267 		pr_err("ERROR: Apply config to BPF failed: %s\n",
2268 			 errbuf);
2269 		goto out_error_open;
2270 	}
2271 
2272 	/*
2273 	 * Better not use !target__has_task() here because we need to cover the
2274 	 * case where no threads were specified in the command line, but a
2275 	 * workload was, and in that case we will fill in the thread_map when
2276 	 * we fork the workload in perf_evlist__prepare_workload.
2277 	 */
2278 	if (trace->filter_pids.nr > 0)
2279 		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2280 	else if (thread_map__pid(evlist->threads, 0) == -1)
2281 		err = perf_evlist__set_filter_pid(evlist, getpid());
2282 
2283 	if (err < 0)
2284 		goto out_error_mem;
2285 
2286 	if (trace->ev_qualifier_ids.nr > 0) {
2287 		err = trace__set_ev_qualifier_filter(trace);
2288 		if (err < 0)
2289 			goto out_errno;
2290 
2291 		pr_debug("event qualifier tracepoint filter: %s\n",
2292 			 trace->syscalls.events.sys_exit->filter);
2293 	}
2294 
2295 	err = perf_evlist__apply_filters(evlist, &evsel);
2296 	if (err < 0)
2297 		goto out_error_apply_filters;
2298 
2299 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2300 	if (err < 0)
2301 		goto out_error_mmap;
2302 
2303 	if (!target__none(&trace->opts.target))
2304 		perf_evlist__enable(evlist);
2305 
2306 	if (forks)
2307 		perf_evlist__start_workload(evlist);
2308 
2309 	trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2310 				  evlist->threads->nr > 1 ||
2311 				  perf_evlist__first(evlist)->attr.inherit;
2312 again:
2313 	before = trace->nr_events;
2314 
2315 	for (i = 0; i < evlist->nr_mmaps; i++) {
2316 		union perf_event *event;
2317 
2318 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2319 			struct perf_sample sample;
2320 
2321 			++trace->nr_events;
2322 
2323 			err = perf_evlist__parse_sample(evlist, event, &sample);
2324 			if (err) {
2325 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2326 				goto next_event;
2327 			}
2328 
2329 			trace__handle_event(trace, event, &sample);
2330 next_event:
2331 			perf_evlist__mmap_consume(evlist, i);
2332 
2333 			if (interrupted)
2334 				goto out_disable;
2335 
2336 			if (done && !draining) {
2337 				perf_evlist__disable(evlist);
2338 				draining = true;
2339 			}
2340 		}
2341 	}
2342 
2343 	if (trace->nr_events == before) {
2344 		int timeout = done ? 100 : -1;
2345 
2346 		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2347 			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2348 				draining = true;
2349 
2350 			goto again;
2351 		}
2352 	} else {
2353 		goto again;
2354 	}
2355 
2356 out_disable:
2357 	thread__zput(trace->current);
2358 
2359 	perf_evlist__disable(evlist);
2360 
2361 	if (!err) {
2362 		if (trace->summary)
2363 			trace__fprintf_thread_summary(trace, trace->output);
2364 
2365 		if (trace->show_tool_stats) {
2366 			fprintf(trace->output, "Stats:\n "
2367 					       " vfs_getname : %" PRIu64 "\n"
2368 					       " proc_getname: %" PRIu64 "\n",
2369 				trace->stats.vfs_getname,
2370 				trace->stats.proc_getname);
2371 		}
2372 	}
2373 
2374 out_delete_evlist:
2375 	perf_evlist__delete(evlist);
2376 	trace->evlist = NULL;
2377 	trace->live = false;
2378 	return err;
2379 {
2380 	char errbuf[BUFSIZ];
2381 
2382 out_error_sched_stat_runtime:
2383 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2384 	goto out_error;
2385 
2386 out_error_raw_syscalls:
2387 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2388 	goto out_error;
2389 
2390 out_error_mmap:
2391 	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2392 	goto out_error;
2393 
2394 out_error_open:
2395 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2396 
2397 out_error:
2398 	fprintf(trace->output, "%s\n", errbuf);
2399 	goto out_delete_evlist;
2400 
2401 out_error_apply_filters:
2402 	fprintf(trace->output,
2403 		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
2404 		evsel->filter, perf_evsel__name(evsel), errno,
2405 		strerror_r(errno, errbuf, sizeof(errbuf)));
2406 	goto out_delete_evlist;
2407 }
2408 out_error_mem:
2409 	fprintf(trace->output, "Not enough memory to run!\n");
2410 	goto out_delete_evlist;
2411 
2412 out_errno:
2413 	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2414 	goto out_delete_evlist;
2415 }
2416 
2417 static int trace__replay(struct trace *trace)
2418 {
2419 	const struct perf_evsel_str_handler handlers[] = {
2420 		{ "probe:vfs_getname",	     trace__vfs_getname, },
2421 	};
2422 	struct perf_data_file file = {
2423 		.path  = input_name,
2424 		.mode  = PERF_DATA_MODE_READ,
2425 		.force = trace->force,
2426 	};
2427 	struct perf_session *session;
2428 	struct perf_evsel *evsel;
2429 	int err = -1;
2430 
2431 	trace->tool.sample	  = trace__process_sample;
2432 	trace->tool.mmap	  = perf_event__process_mmap;
2433 	trace->tool.mmap2	  = perf_event__process_mmap2;
2434 	trace->tool.comm	  = perf_event__process_comm;
2435 	trace->tool.exit	  = perf_event__process_exit;
2436 	trace->tool.fork	  = perf_event__process_fork;
2437 	trace->tool.attr	  = perf_event__process_attr;
2438 	trace->tool.tracing_data = perf_event__process_tracing_data;
2439 	trace->tool.build_id	  = perf_event__process_build_id;
2440 
2441 	trace->tool.ordered_events = true;
2442 	trace->tool.ordering_requires_timestamps = true;
2443 
2444 	/* add tid to output */
2445 	trace->multiple_threads = true;
2446 
2447 	session = perf_session__new(&file, false, &trace->tool);
2448 	if (session == NULL)
2449 		return -1;
2450 
2451 	if (symbol__init(&session->header.env) < 0)
2452 		goto out;
2453 
2454 	trace->host = &session->machines.host;
2455 
2456 	err = perf_session__set_tracepoints_handlers(session, handlers);
2457 	if (err)
2458 		goto out;
2459 
2460 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2461 						     "raw_syscalls:sys_enter");
2462 	/* older kernels have syscalls tp versus raw_syscalls */
2463 	if (evsel == NULL)
2464 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2465 							     "syscalls:sys_enter");
2466 
2467 	if (evsel &&
2468 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2469 	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2470 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2471 		goto out;
2472 	}
2473 
2474 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2475 						     "raw_syscalls:sys_exit");
2476 	if (evsel == NULL)
2477 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2478 							     "syscalls:sys_exit");
2479 	if (evsel &&
2480 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2481 	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2482 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2483 		goto out;
2484 	}
2485 
2486 	evlist__for_each(session->evlist, evsel) {
2487 		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2488 		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2489 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2490 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2491 			evsel->handler = trace__pgfault;
2492 	}
2493 
2494 	err = parse_target_str(trace);
2495 	if (err != 0)
2496 		goto out;
2497 
2498 	setup_pager();
2499 
2500 	err = perf_session__process_events(session);
2501 	if (err)
2502 		pr_err("Failed to process events, error %d", err);
2503 
2504 	else if (trace->summary)
2505 		trace__fprintf_thread_summary(trace, trace->output);
2506 
2507 out:
2508 	perf_session__delete(session);
2509 
2510 	return err;
2511 }
2512 
2513 static size_t trace__fprintf_threads_header(FILE *fp)
2514 {
2515 	size_t printed;
2516 
2517 	printed  = fprintf(fp, "\n Summary of events:\n\n");
2518 
2519 	return printed;
2520 }
2521 
2522 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2523 	struct stats 	*stats;
2524 	double		msecs;
2525 	int		syscall;
2526 )
2527 {
2528 	struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2529 	struct stats *stats = source->priv;
2530 
2531 	entry->syscall = source->i;
2532 	entry->stats   = stats;
2533 	entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2534 }
2535 
2536 static size_t thread__dump_stats(struct thread_trace *ttrace,
2537 				 struct trace *trace, FILE *fp)
2538 {
2539 	size_t printed = 0;
2540 	struct syscall *sc;
2541 	struct rb_node *nd;
2542 	DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2543 
2544 	if (syscall_stats == NULL)
2545 		return 0;
2546 
2547 	printed += fprintf(fp, "\n");
2548 
2549 	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2550 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2551 	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2552 
2553 	resort_rb__for_each(nd, syscall_stats) {
2554 		struct stats *stats = syscall_stats_entry->stats;
2555 		if (stats) {
2556 			double min = (double)(stats->min) / NSEC_PER_MSEC;
2557 			double max = (double)(stats->max) / NSEC_PER_MSEC;
2558 			double avg = avg_stats(stats);
2559 			double pct;
2560 			u64 n = (u64) stats->n;
2561 
2562 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2563 			avg /= NSEC_PER_MSEC;
2564 
2565 			sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2566 			printed += fprintf(fp, "   %-15s", sc->name);
2567 			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2568 					   n, syscall_stats_entry->msecs, min, avg);
2569 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2570 		}
2571 	}
2572 
2573 	resort_rb__delete(syscall_stats);
2574 	printed += fprintf(fp, "\n\n");
2575 
2576 	return printed;
2577 }
2578 
2579 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2580 {
2581 	size_t printed = 0;
2582 	struct thread_trace *ttrace = thread__priv(thread);
2583 	double ratio;
2584 
2585 	if (ttrace == NULL)
2586 		return 0;
2587 
2588 	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2589 
2590 	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2591 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2592 	printed += fprintf(fp, "%.1f%%", ratio);
2593 	if (ttrace->pfmaj)
2594 		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2595 	if (ttrace->pfmin)
2596 		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2597 	if (trace->sched)
2598 		printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2599 	else if (fputc('\n', fp) != EOF)
2600 		++printed;
2601 
2602 	printed += thread__dump_stats(ttrace, trace, fp);
2603 
2604 	return printed;
2605 }
2606 
2607 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2608 {
2609 	return ttrace ? ttrace->nr_events : 0;
2610 }
2611 
2612 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2613 	struct thread *thread;
2614 )
2615 {
2616 	entry->thread = rb_entry(nd, struct thread, rb_node);
2617 }
2618 
2619 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2620 {
2621 	DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2622 	size_t printed = trace__fprintf_threads_header(fp);
2623 	struct rb_node *nd;
2624 
2625 	if (threads == NULL) {
2626 		fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2627 		return 0;
2628 	}
2629 
2630 	resort_rb__for_each(nd, threads)
2631 		printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2632 
2633 	resort_rb__delete(threads);
2634 
2635 	return printed;
2636 }
2637 
2638 static int trace__set_duration(const struct option *opt, const char *str,
2639 			       int unset __maybe_unused)
2640 {
2641 	struct trace *trace = opt->value;
2642 
2643 	trace->duration_filter = atof(str);
2644 	return 0;
2645 }
2646 
2647 static int trace__set_filter_pids(const struct option *opt, const char *str,
2648 				  int unset __maybe_unused)
2649 {
2650 	int ret = -1;
2651 	size_t i;
2652 	struct trace *trace = opt->value;
2653 	/*
2654 	 * FIXME: introduce a intarray class, plain parse csv and create a
2655 	 * { int nr, int entries[] } struct...
2656 	 */
2657 	struct intlist *list = intlist__new(str);
2658 
2659 	if (list == NULL)
2660 		return -1;
2661 
2662 	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2663 	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2664 
2665 	if (trace->filter_pids.entries == NULL)
2666 		goto out;
2667 
2668 	trace->filter_pids.entries[0] = getpid();
2669 
2670 	for (i = 1; i < trace->filter_pids.nr; ++i)
2671 		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2672 
2673 	intlist__delete(list);
2674 	ret = 0;
2675 out:
2676 	return ret;
2677 }
2678 
2679 static int trace__open_output(struct trace *trace, const char *filename)
2680 {
2681 	struct stat st;
2682 
2683 	if (!stat(filename, &st) && st.st_size) {
2684 		char oldname[PATH_MAX];
2685 
2686 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2687 		unlink(oldname);
2688 		rename(filename, oldname);
2689 	}
2690 
2691 	trace->output = fopen(filename, "w");
2692 
2693 	return trace->output == NULL ? -errno : 0;
2694 }
2695 
2696 static int parse_pagefaults(const struct option *opt, const char *str,
2697 			    int unset __maybe_unused)
2698 {
2699 	int *trace_pgfaults = opt->value;
2700 
2701 	if (strcmp(str, "all") == 0)
2702 		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2703 	else if (strcmp(str, "maj") == 0)
2704 		*trace_pgfaults |= TRACE_PFMAJ;
2705 	else if (strcmp(str, "min") == 0)
2706 		*trace_pgfaults |= TRACE_PFMIN;
2707 	else
2708 		return -1;
2709 
2710 	return 0;
2711 }
2712 
2713 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2714 {
2715 	struct perf_evsel *evsel;
2716 
2717 	evlist__for_each(evlist, evsel)
2718 		evsel->handler = handler;
2719 }
2720 
2721 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2722 {
2723 	const char *trace_usage[] = {
2724 		"perf trace [<options>] [<command>]",
2725 		"perf trace [<options>] -- <command> [<options>]",
2726 		"perf trace record [<options>] [<command>]",
2727 		"perf trace record [<options>] -- <command> [<options>]",
2728 		NULL
2729 	};
2730 	struct trace trace = {
2731 		.syscalls = {
2732 			. max = -1,
2733 		},
2734 		.opts = {
2735 			.target = {
2736 				.uid	   = UINT_MAX,
2737 				.uses_mmap = true,
2738 			},
2739 			.user_freq     = UINT_MAX,
2740 			.user_interval = ULLONG_MAX,
2741 			.no_buffering  = true,
2742 			.mmap_pages    = UINT_MAX,
2743 			.proc_map_timeout  = 500,
2744 		},
2745 		.output = stderr,
2746 		.show_comm = true,
2747 		.trace_syscalls = true,
2748 		.kernel_syscallchains = false,
2749 		.max_stack = UINT_MAX,
2750 	};
2751 	const char *output_name = NULL;
2752 	const char *ev_qualifier_str = NULL;
2753 	const struct option trace_options[] = {
2754 	OPT_CALLBACK(0, "event", &trace.evlist, "event",
2755 		     "event selector. use 'perf list' to list available events",
2756 		     parse_events_option),
2757 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
2758 		    "show the thread COMM next to its id"),
2759 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2760 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2761 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
2762 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2763 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2764 		    "trace events on existing process id"),
2765 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2766 		    "trace events on existing thread id"),
2767 	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2768 		     "pids to filter (by the kernel)", trace__set_filter_pids),
2769 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2770 		    "system-wide collection from all CPUs"),
2771 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2772 		    "list of cpus to monitor"),
2773 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2774 		    "child tasks do not inherit counters"),
2775 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2776 		     "number of mmap data pages",
2777 		     perf_evlist__parse_mmap_pages),
2778 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2779 		   "user to profile"),
2780 	OPT_CALLBACK(0, "duration", &trace, "float",
2781 		     "show only events with duration > N.M ms",
2782 		     trace__set_duration),
2783 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2784 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2785 	OPT_BOOLEAN('T', "time", &trace.full_time,
2786 		    "Show full timestamp, not time relative to first start"),
2787 	OPT_BOOLEAN('s', "summary", &trace.summary_only,
2788 		    "Show only syscall summary with statistics"),
2789 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
2790 		    "Show all syscalls and summary with statistics"),
2791 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2792 		     "Trace pagefaults", parse_pagefaults, "maj"),
2793 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2794 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2795 	OPT_CALLBACK(0, "call-graph", &trace.opts,
2796 		     "record_mode[,record_size]", record_callchain_help,
2797 		     &record_parse_callchain_opt),
2798 	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2799 		    "Show the kernel callchains on the syscall exit path"),
2800 	OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2801 		     "Set the minimum stack depth when parsing the callchain, "
2802 		     "anything below the specified depth will be ignored."),
2803 	OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2804 		     "Set the maximum stack depth when parsing the callchain, "
2805 		     "anything beyond the specified depth will be ignored. "
2806 		     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2807 	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2808 			"per thread proc mmap processing timeout in ms"),
2809 	OPT_END()
2810 	};
2811 	bool __maybe_unused max_stack_user_set = true;
2812 	bool mmap_pages_user_set = true;
2813 	const char * const trace_subcommands[] = { "record", NULL };
2814 	int err;
2815 	char bf[BUFSIZ];
2816 
2817 	signal(SIGSEGV, sighandler_dump_stack);
2818 	signal(SIGFPE, sighandler_dump_stack);
2819 
2820 	trace.evlist = perf_evlist__new();
2821 	trace.sctbl = syscalltbl__new();
2822 
2823 	if (trace.evlist == NULL || trace.sctbl == NULL) {
2824 		pr_err("Not enough memory to run!\n");
2825 		err = -ENOMEM;
2826 		goto out;
2827 	}
2828 
2829 	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2830 				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2831 
2832 	err = bpf__setup_stdout(trace.evlist);
2833 	if (err) {
2834 		bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2835 		pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2836 		goto out;
2837 	}
2838 
2839 	err = -1;
2840 
2841 	if (trace.trace_pgfaults) {
2842 		trace.opts.sample_address = true;
2843 		trace.opts.sample_time = true;
2844 	}
2845 
2846 	if (trace.opts.mmap_pages == UINT_MAX)
2847 		mmap_pages_user_set = false;
2848 
2849 	if (trace.max_stack == UINT_MAX) {
2850 		trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2851 		max_stack_user_set = false;
2852 	}
2853 
2854 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2855 	if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
2856 		record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2857 #endif
2858 
2859 	if (callchain_param.enabled) {
2860 		if (!mmap_pages_user_set && geteuid() == 0)
2861 			trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2862 
2863 		symbol_conf.use_callchain = true;
2864 	}
2865 
2866 	if (trace.evlist->nr_entries > 0)
2867 		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2868 
2869 	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2870 		return trace__record(&trace, argc-1, &argv[1]);
2871 
2872 	/* summary_only implies summary option, but don't overwrite summary if set */
2873 	if (trace.summary_only)
2874 		trace.summary = trace.summary_only;
2875 
2876 	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2877 	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
2878 		pr_err("Please specify something to trace.\n");
2879 		return -1;
2880 	}
2881 
2882 	if (!trace.trace_syscalls && ev_qualifier_str) {
2883 		pr_err("The -e option can't be used with --no-syscalls.\n");
2884 		goto out;
2885 	}
2886 
2887 	if (output_name != NULL) {
2888 		err = trace__open_output(&trace, output_name);
2889 		if (err < 0) {
2890 			perror("failed to create output file");
2891 			goto out;
2892 		}
2893 	}
2894 
2895 	trace.open_id = syscalltbl__id(trace.sctbl, "open");
2896 
2897 	if (ev_qualifier_str != NULL) {
2898 		const char *s = ev_qualifier_str;
2899 		struct strlist_config slist_config = {
2900 			.dirname = system_path(STRACE_GROUPS_DIR),
2901 		};
2902 
2903 		trace.not_ev_qualifier = *s == '!';
2904 		if (trace.not_ev_qualifier)
2905 			++s;
2906 		trace.ev_qualifier = strlist__new(s, &slist_config);
2907 		if (trace.ev_qualifier == NULL) {
2908 			fputs("Not enough memory to parse event qualifier",
2909 			      trace.output);
2910 			err = -ENOMEM;
2911 			goto out_close;
2912 		}
2913 
2914 		err = trace__validate_ev_qualifier(&trace);
2915 		if (err)
2916 			goto out_close;
2917 	}
2918 
2919 	err = target__validate(&trace.opts.target);
2920 	if (err) {
2921 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2922 		fprintf(trace.output, "%s", bf);
2923 		goto out_close;
2924 	}
2925 
2926 	err = target__parse_uid(&trace.opts.target);
2927 	if (err) {
2928 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2929 		fprintf(trace.output, "%s", bf);
2930 		goto out_close;
2931 	}
2932 
2933 	if (!argc && target__none(&trace.opts.target))
2934 		trace.opts.target.system_wide = true;
2935 
2936 	if (input_name)
2937 		err = trace__replay(&trace);
2938 	else
2939 		err = trace__run(&trace, argc, argv);
2940 
2941 out_close:
2942 	if (output_name != NULL)
2943 		fclose(trace.output);
2944 out:
2945 	return err;
2946 }
2947