xref: /openbmc/linux/tools/perf/builtin-trace.c (revision b96fc2f3)
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18 
19 #include <traceevent/event-parse.h>
20 #include "builtin.h"
21 #include "util/color.h"
22 #include "util/debug.h"
23 #include "util/evlist.h"
24 #include "util/exec_cmd.h"
25 #include "util/machine.h"
26 #include "util/session.h"
27 #include "util/thread.h"
28 #include "util/parse-options.h"
29 #include "util/strlist.h"
30 #include "util/intlist.h"
31 #include "util/thread_map.h"
32 #include "util/stat.h"
33 #include "trace-event.h"
34 #include "util/parse-events.h"
35 
36 #include <libaudit.h>
37 #include <stdlib.h>
38 #include <sys/mman.h>
39 #include <linux/futex.h>
40 
41 /* For older distros: */
42 #ifndef MAP_STACK
43 # define MAP_STACK		0x20000
44 #endif
45 
46 #ifndef MADV_HWPOISON
47 # define MADV_HWPOISON		100
48 
49 #endif
50 
51 #ifndef MADV_MERGEABLE
52 # define MADV_MERGEABLE		12
53 #endif
54 
55 #ifndef MADV_UNMERGEABLE
56 # define MADV_UNMERGEABLE	13
57 #endif
58 
59 #ifndef EFD_SEMAPHORE
60 # define EFD_SEMAPHORE		1
61 #endif
62 
63 #ifndef EFD_NONBLOCK
64 # define EFD_NONBLOCK		00004000
65 #endif
66 
67 #ifndef EFD_CLOEXEC
68 # define EFD_CLOEXEC		02000000
69 #endif
70 
71 #ifndef O_CLOEXEC
72 # define O_CLOEXEC		02000000
73 #endif
74 
75 #ifndef SOCK_DCCP
76 # define SOCK_DCCP		6
77 #endif
78 
79 #ifndef SOCK_CLOEXEC
80 # define SOCK_CLOEXEC		02000000
81 #endif
82 
83 #ifndef SOCK_NONBLOCK
84 # define SOCK_NONBLOCK		00004000
85 #endif
86 
87 #ifndef MSG_CMSG_CLOEXEC
88 # define MSG_CMSG_CLOEXEC	0x40000000
89 #endif
90 
91 #ifndef PERF_FLAG_FD_NO_GROUP
92 # define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
93 #endif
94 
95 #ifndef PERF_FLAG_FD_OUTPUT
96 # define PERF_FLAG_FD_OUTPUT		(1UL << 1)
97 #endif
98 
99 #ifndef PERF_FLAG_PID_CGROUP
100 # define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
101 #endif
102 
103 #ifndef PERF_FLAG_FD_CLOEXEC
104 # define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
105 #endif
106 
107 
108 struct tp_field {
109 	int offset;
110 	union {
111 		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
112 		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
113 	};
114 };
115 
116 #define TP_UINT_FIELD(bits) \
117 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
118 { \
119 	u##bits value; \
120 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
121 	return value;  \
122 }
123 
124 TP_UINT_FIELD(8);
125 TP_UINT_FIELD(16);
126 TP_UINT_FIELD(32);
127 TP_UINT_FIELD(64);
128 
129 #define TP_UINT_FIELD__SWAPPED(bits) \
130 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
131 { \
132 	u##bits value; \
133 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
134 	return bswap_##bits(value);\
135 }
136 
137 TP_UINT_FIELD__SWAPPED(16);
138 TP_UINT_FIELD__SWAPPED(32);
139 TP_UINT_FIELD__SWAPPED(64);
140 
141 static int tp_field__init_uint(struct tp_field *field,
142 			       struct format_field *format_field,
143 			       bool needs_swap)
144 {
145 	field->offset = format_field->offset;
146 
147 	switch (format_field->size) {
148 	case 1:
149 		field->integer = tp_field__u8;
150 		break;
151 	case 2:
152 		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
153 		break;
154 	case 4:
155 		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
156 		break;
157 	case 8:
158 		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
159 		break;
160 	default:
161 		return -1;
162 	}
163 
164 	return 0;
165 }
166 
167 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
168 {
169 	return sample->raw_data + field->offset;
170 }
171 
172 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
173 {
174 	field->offset = format_field->offset;
175 	field->pointer = tp_field__ptr;
176 	return 0;
177 }
178 
179 struct syscall_tp {
180 	struct tp_field id;
181 	union {
182 		struct tp_field args, ret;
183 	};
184 };
185 
186 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
187 					  struct tp_field *field,
188 					  const char *name)
189 {
190 	struct format_field *format_field = perf_evsel__field(evsel, name);
191 
192 	if (format_field == NULL)
193 		return -1;
194 
195 	return tp_field__init_uint(field, format_field, evsel->needs_swap);
196 }
197 
198 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
199 	({ struct syscall_tp *sc = evsel->priv;\
200 	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
201 
202 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
203 					 struct tp_field *field,
204 					 const char *name)
205 {
206 	struct format_field *format_field = perf_evsel__field(evsel, name);
207 
208 	if (format_field == NULL)
209 		return -1;
210 
211 	return tp_field__init_ptr(field, format_field);
212 }
213 
214 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
215 	({ struct syscall_tp *sc = evsel->priv;\
216 	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
217 
218 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
219 {
220 	zfree(&evsel->priv);
221 	perf_evsel__delete(evsel);
222 }
223 
224 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
225 {
226 	evsel->priv = malloc(sizeof(struct syscall_tp));
227 	if (evsel->priv != NULL) {
228 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
229 			goto out_delete;
230 
231 		evsel->handler = handler;
232 		return 0;
233 	}
234 
235 	return -ENOMEM;
236 
237 out_delete:
238 	zfree(&evsel->priv);
239 	return -ENOENT;
240 }
241 
242 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
243 {
244 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
245 
246 	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
247 	if (evsel == NULL)
248 		evsel = perf_evsel__newtp("syscalls", direction);
249 
250 	if (evsel) {
251 		if (perf_evsel__init_syscall_tp(evsel, handler))
252 			goto out_delete;
253 	}
254 
255 	return evsel;
256 
257 out_delete:
258 	perf_evsel__delete_priv(evsel);
259 	return NULL;
260 }
261 
262 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
263 	({ struct syscall_tp *fields = evsel->priv; \
264 	   fields->name.integer(&fields->name, sample); })
265 
266 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
267 	({ struct syscall_tp *fields = evsel->priv; \
268 	   fields->name.pointer(&fields->name, sample); })
269 
270 struct syscall_arg {
271 	unsigned long val;
272 	struct thread *thread;
273 	struct trace  *trace;
274 	void	      *parm;
275 	u8	      idx;
276 	u8	      mask;
277 };
278 
279 struct strarray {
280 	int	    offset;
281 	int	    nr_entries;
282 	const char **entries;
283 };
284 
285 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
286 	.nr_entries = ARRAY_SIZE(array), \
287 	.entries = array, \
288 }
289 
290 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
291 	.offset	    = off, \
292 	.nr_entries = ARRAY_SIZE(array), \
293 	.entries = array, \
294 }
295 
296 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
297 						const char *intfmt,
298 					        struct syscall_arg *arg)
299 {
300 	struct strarray *sa = arg->parm;
301 	int idx = arg->val - sa->offset;
302 
303 	if (idx < 0 || idx >= sa->nr_entries)
304 		return scnprintf(bf, size, intfmt, arg->val);
305 
306 	return scnprintf(bf, size, "%s", sa->entries[idx]);
307 }
308 
309 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
310 					      struct syscall_arg *arg)
311 {
312 	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
313 }
314 
315 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
316 
317 #if defined(__i386__) || defined(__x86_64__)
318 /*
319  * FIXME: Make this available to all arches as soon as the ioctl beautifier
320  * 	  gets rewritten to support all arches.
321  */
322 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
323 						 struct syscall_arg *arg)
324 {
325 	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
326 }
327 
328 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
329 #endif /* defined(__i386__) || defined(__x86_64__) */
330 
331 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
332 					struct syscall_arg *arg);
333 
334 #define SCA_FD syscall_arg__scnprintf_fd
335 
336 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
337 					   struct syscall_arg *arg)
338 {
339 	int fd = arg->val;
340 
341 	if (fd == AT_FDCWD)
342 		return scnprintf(bf, size, "CWD");
343 
344 	return syscall_arg__scnprintf_fd(bf, size, arg);
345 }
346 
347 #define SCA_FDAT syscall_arg__scnprintf_fd_at
348 
349 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
350 					      struct syscall_arg *arg);
351 
352 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
353 
354 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
355 					 struct syscall_arg *arg)
356 {
357 	return scnprintf(bf, size, "%#lx", arg->val);
358 }
359 
360 #define SCA_HEX syscall_arg__scnprintf_hex
361 
362 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
363 					 struct syscall_arg *arg)
364 {
365 	return scnprintf(bf, size, "%d", arg->val);
366 }
367 
368 #define SCA_INT syscall_arg__scnprintf_int
369 
370 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
371 					       struct syscall_arg *arg)
372 {
373 	int printed = 0, prot = arg->val;
374 
375 	if (prot == PROT_NONE)
376 		return scnprintf(bf, size, "NONE");
377 #define	P_MMAP_PROT(n) \
378 	if (prot & PROT_##n) { \
379 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
380 		prot &= ~PROT_##n; \
381 	}
382 
383 	P_MMAP_PROT(EXEC);
384 	P_MMAP_PROT(READ);
385 	P_MMAP_PROT(WRITE);
386 #ifdef PROT_SEM
387 	P_MMAP_PROT(SEM);
388 #endif
389 	P_MMAP_PROT(GROWSDOWN);
390 	P_MMAP_PROT(GROWSUP);
391 #undef P_MMAP_PROT
392 
393 	if (prot)
394 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
395 
396 	return printed;
397 }
398 
399 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
400 
401 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
402 						struct syscall_arg *arg)
403 {
404 	int printed = 0, flags = arg->val;
405 
406 #define	P_MMAP_FLAG(n) \
407 	if (flags & MAP_##n) { \
408 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
409 		flags &= ~MAP_##n; \
410 	}
411 
412 	P_MMAP_FLAG(SHARED);
413 	P_MMAP_FLAG(PRIVATE);
414 #ifdef MAP_32BIT
415 	P_MMAP_FLAG(32BIT);
416 #endif
417 	P_MMAP_FLAG(ANONYMOUS);
418 	P_MMAP_FLAG(DENYWRITE);
419 	P_MMAP_FLAG(EXECUTABLE);
420 	P_MMAP_FLAG(FILE);
421 	P_MMAP_FLAG(FIXED);
422 	P_MMAP_FLAG(GROWSDOWN);
423 #ifdef MAP_HUGETLB
424 	P_MMAP_FLAG(HUGETLB);
425 #endif
426 	P_MMAP_FLAG(LOCKED);
427 	P_MMAP_FLAG(NONBLOCK);
428 	P_MMAP_FLAG(NORESERVE);
429 	P_MMAP_FLAG(POPULATE);
430 	P_MMAP_FLAG(STACK);
431 #ifdef MAP_UNINITIALIZED
432 	P_MMAP_FLAG(UNINITIALIZED);
433 #endif
434 #undef P_MMAP_FLAG
435 
436 	if (flags)
437 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
438 
439 	return printed;
440 }
441 
442 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
443 
444 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
445 						  struct syscall_arg *arg)
446 {
447 	int printed = 0, flags = arg->val;
448 
449 #define P_MREMAP_FLAG(n) \
450 	if (flags & MREMAP_##n) { \
451 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
452 		flags &= ~MREMAP_##n; \
453 	}
454 
455 	P_MREMAP_FLAG(MAYMOVE);
456 #ifdef MREMAP_FIXED
457 	P_MREMAP_FLAG(FIXED);
458 #endif
459 #undef P_MREMAP_FLAG
460 
461 	if (flags)
462 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
463 
464 	return printed;
465 }
466 
467 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
468 
469 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
470 						      struct syscall_arg *arg)
471 {
472 	int behavior = arg->val;
473 
474 	switch (behavior) {
475 #define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
476 	P_MADV_BHV(NORMAL);
477 	P_MADV_BHV(RANDOM);
478 	P_MADV_BHV(SEQUENTIAL);
479 	P_MADV_BHV(WILLNEED);
480 	P_MADV_BHV(DONTNEED);
481 	P_MADV_BHV(REMOVE);
482 	P_MADV_BHV(DONTFORK);
483 	P_MADV_BHV(DOFORK);
484 	P_MADV_BHV(HWPOISON);
485 #ifdef MADV_SOFT_OFFLINE
486 	P_MADV_BHV(SOFT_OFFLINE);
487 #endif
488 	P_MADV_BHV(MERGEABLE);
489 	P_MADV_BHV(UNMERGEABLE);
490 #ifdef MADV_HUGEPAGE
491 	P_MADV_BHV(HUGEPAGE);
492 #endif
493 #ifdef MADV_NOHUGEPAGE
494 	P_MADV_BHV(NOHUGEPAGE);
495 #endif
496 #ifdef MADV_DONTDUMP
497 	P_MADV_BHV(DONTDUMP);
498 #endif
499 #ifdef MADV_DODUMP
500 	P_MADV_BHV(DODUMP);
501 #endif
502 #undef P_MADV_PHV
503 	default: break;
504 	}
505 
506 	return scnprintf(bf, size, "%#x", behavior);
507 }
508 
509 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
510 
511 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
512 					   struct syscall_arg *arg)
513 {
514 	int printed = 0, op = arg->val;
515 
516 	if (op == 0)
517 		return scnprintf(bf, size, "NONE");
518 #define	P_CMD(cmd) \
519 	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
520 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
521 		op &= ~LOCK_##cmd; \
522 	}
523 
524 	P_CMD(SH);
525 	P_CMD(EX);
526 	P_CMD(NB);
527 	P_CMD(UN);
528 	P_CMD(MAND);
529 	P_CMD(RW);
530 	P_CMD(READ);
531 	P_CMD(WRITE);
532 #undef P_OP
533 
534 	if (op)
535 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
536 
537 	return printed;
538 }
539 
540 #define SCA_FLOCK syscall_arg__scnprintf_flock
541 
542 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
543 {
544 	enum syscall_futex_args {
545 		SCF_UADDR   = (1 << 0),
546 		SCF_OP	    = (1 << 1),
547 		SCF_VAL	    = (1 << 2),
548 		SCF_TIMEOUT = (1 << 3),
549 		SCF_UADDR2  = (1 << 4),
550 		SCF_VAL3    = (1 << 5),
551 	};
552 	int op = arg->val;
553 	int cmd = op & FUTEX_CMD_MASK;
554 	size_t printed = 0;
555 
556 	switch (cmd) {
557 #define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
558 	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
559 	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
560 	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
561 	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
562 	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
563 	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
564 	P_FUTEX_OP(WAKE_OP);							  break;
565 	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
566 	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
567 	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
568 	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
569 	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
570 	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
571 	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
572 	}
573 
574 	if (op & FUTEX_PRIVATE_FLAG)
575 		printed += scnprintf(bf + printed, size - printed, "|PRIV");
576 
577 	if (op & FUTEX_CLOCK_REALTIME)
578 		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
579 
580 	return printed;
581 }
582 
583 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
584 
585 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
586 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
587 
588 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
589 static DEFINE_STRARRAY(itimers);
590 
591 static const char *keyctl_options[] = {
592 	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
593 	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
594 	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
595 	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
596 	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
597 };
598 static DEFINE_STRARRAY(keyctl_options);
599 
600 static const char *whences[] = { "SET", "CUR", "END",
601 #ifdef SEEK_DATA
602 "DATA",
603 #endif
604 #ifdef SEEK_HOLE
605 "HOLE",
606 #endif
607 };
608 static DEFINE_STRARRAY(whences);
609 
610 static const char *fcntl_cmds[] = {
611 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
612 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
613 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
614 	"F_GETOWNER_UIDS",
615 };
616 static DEFINE_STRARRAY(fcntl_cmds);
617 
618 static const char *rlimit_resources[] = {
619 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
620 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
621 	"RTTIME",
622 };
623 static DEFINE_STRARRAY(rlimit_resources);
624 
625 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
626 static DEFINE_STRARRAY(sighow);
627 
628 static const char *clockid[] = {
629 	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
630 	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
631 	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
632 };
633 static DEFINE_STRARRAY(clockid);
634 
635 static const char *socket_families[] = {
636 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
637 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
638 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
639 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
640 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
641 	"ALG", "NFC", "VSOCK",
642 };
643 static DEFINE_STRARRAY(socket_families);
644 
645 #ifndef SOCK_TYPE_MASK
646 #define SOCK_TYPE_MASK 0xf
647 #endif
648 
649 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
650 						      struct syscall_arg *arg)
651 {
652 	size_t printed;
653 	int type = arg->val,
654 	    flags = type & ~SOCK_TYPE_MASK;
655 
656 	type &= SOCK_TYPE_MASK;
657 	/*
658  	 * Can't use a strarray, MIPS may override for ABI reasons.
659  	 */
660 	switch (type) {
661 #define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
662 	P_SK_TYPE(STREAM);
663 	P_SK_TYPE(DGRAM);
664 	P_SK_TYPE(RAW);
665 	P_SK_TYPE(RDM);
666 	P_SK_TYPE(SEQPACKET);
667 	P_SK_TYPE(DCCP);
668 	P_SK_TYPE(PACKET);
669 #undef P_SK_TYPE
670 	default:
671 		printed = scnprintf(bf, size, "%#x", type);
672 	}
673 
674 #define	P_SK_FLAG(n) \
675 	if (flags & SOCK_##n) { \
676 		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
677 		flags &= ~SOCK_##n; \
678 	}
679 
680 	P_SK_FLAG(CLOEXEC);
681 	P_SK_FLAG(NONBLOCK);
682 #undef P_SK_FLAG
683 
684 	if (flags)
685 		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
686 
687 	return printed;
688 }
689 
690 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
691 
692 #ifndef MSG_PROBE
693 #define MSG_PROBE	     0x10
694 #endif
695 #ifndef MSG_WAITFORONE
696 #define MSG_WAITFORONE	0x10000
697 #endif
698 #ifndef MSG_SENDPAGE_NOTLAST
699 #define MSG_SENDPAGE_NOTLAST 0x20000
700 #endif
701 #ifndef MSG_FASTOPEN
702 #define MSG_FASTOPEN	     0x20000000
703 #endif
704 
705 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
706 					       struct syscall_arg *arg)
707 {
708 	int printed = 0, flags = arg->val;
709 
710 	if (flags == 0)
711 		return scnprintf(bf, size, "NONE");
712 #define	P_MSG_FLAG(n) \
713 	if (flags & MSG_##n) { \
714 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
715 		flags &= ~MSG_##n; \
716 	}
717 
718 	P_MSG_FLAG(OOB);
719 	P_MSG_FLAG(PEEK);
720 	P_MSG_FLAG(DONTROUTE);
721 	P_MSG_FLAG(TRYHARD);
722 	P_MSG_FLAG(CTRUNC);
723 	P_MSG_FLAG(PROBE);
724 	P_MSG_FLAG(TRUNC);
725 	P_MSG_FLAG(DONTWAIT);
726 	P_MSG_FLAG(EOR);
727 	P_MSG_FLAG(WAITALL);
728 	P_MSG_FLAG(FIN);
729 	P_MSG_FLAG(SYN);
730 	P_MSG_FLAG(CONFIRM);
731 	P_MSG_FLAG(RST);
732 	P_MSG_FLAG(ERRQUEUE);
733 	P_MSG_FLAG(NOSIGNAL);
734 	P_MSG_FLAG(MORE);
735 	P_MSG_FLAG(WAITFORONE);
736 	P_MSG_FLAG(SENDPAGE_NOTLAST);
737 	P_MSG_FLAG(FASTOPEN);
738 	P_MSG_FLAG(CMSG_CLOEXEC);
739 #undef P_MSG_FLAG
740 
741 	if (flags)
742 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
743 
744 	return printed;
745 }
746 
747 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
748 
749 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
750 						 struct syscall_arg *arg)
751 {
752 	size_t printed = 0;
753 	int mode = arg->val;
754 
755 	if (mode == F_OK) /* 0 */
756 		return scnprintf(bf, size, "F");
757 #define	P_MODE(n) \
758 	if (mode & n##_OK) { \
759 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
760 		mode &= ~n##_OK; \
761 	}
762 
763 	P_MODE(R);
764 	P_MODE(W);
765 	P_MODE(X);
766 #undef P_MODE
767 
768 	if (mode)
769 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
770 
771 	return printed;
772 }
773 
774 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
775 
776 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
777 					      struct syscall_arg *arg);
778 
779 #define SCA_FILENAME syscall_arg__scnprintf_filename
780 
781 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
782 					       struct syscall_arg *arg)
783 {
784 	int printed = 0, flags = arg->val;
785 
786 	if (!(flags & O_CREAT))
787 		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
788 
789 	if (flags == 0)
790 		return scnprintf(bf, size, "RDONLY");
791 #define	P_FLAG(n) \
792 	if (flags & O_##n) { \
793 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
794 		flags &= ~O_##n; \
795 	}
796 
797 	P_FLAG(APPEND);
798 	P_FLAG(ASYNC);
799 	P_FLAG(CLOEXEC);
800 	P_FLAG(CREAT);
801 	P_FLAG(DIRECT);
802 	P_FLAG(DIRECTORY);
803 	P_FLAG(EXCL);
804 	P_FLAG(LARGEFILE);
805 	P_FLAG(NOATIME);
806 	P_FLAG(NOCTTY);
807 #ifdef O_NONBLOCK
808 	P_FLAG(NONBLOCK);
809 #elif O_NDELAY
810 	P_FLAG(NDELAY);
811 #endif
812 #ifdef O_PATH
813 	P_FLAG(PATH);
814 #endif
815 	P_FLAG(RDWR);
816 #ifdef O_DSYNC
817 	if ((flags & O_SYNC) == O_SYNC)
818 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
819 	else {
820 		P_FLAG(DSYNC);
821 	}
822 #else
823 	P_FLAG(SYNC);
824 #endif
825 	P_FLAG(TRUNC);
826 	P_FLAG(WRONLY);
827 #undef P_FLAG
828 
829 	if (flags)
830 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
831 
832 	return printed;
833 }
834 
835 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
836 
837 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
838 						struct syscall_arg *arg)
839 {
840 	int printed = 0, flags = arg->val;
841 
842 	if (flags == 0)
843 		return 0;
844 
845 #define	P_FLAG(n) \
846 	if (flags & PERF_FLAG_##n) { \
847 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
848 		flags &= ~PERF_FLAG_##n; \
849 	}
850 
851 	P_FLAG(FD_NO_GROUP);
852 	P_FLAG(FD_OUTPUT);
853 	P_FLAG(PID_CGROUP);
854 	P_FLAG(FD_CLOEXEC);
855 #undef P_FLAG
856 
857 	if (flags)
858 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
859 
860 	return printed;
861 }
862 
863 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
864 
865 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
866 						   struct syscall_arg *arg)
867 {
868 	int printed = 0, flags = arg->val;
869 
870 	if (flags == 0)
871 		return scnprintf(bf, size, "NONE");
872 #define	P_FLAG(n) \
873 	if (flags & EFD_##n) { \
874 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
875 		flags &= ~EFD_##n; \
876 	}
877 
878 	P_FLAG(SEMAPHORE);
879 	P_FLAG(CLOEXEC);
880 	P_FLAG(NONBLOCK);
881 #undef P_FLAG
882 
883 	if (flags)
884 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
885 
886 	return printed;
887 }
888 
889 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
890 
891 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
892 						struct syscall_arg *arg)
893 {
894 	int printed = 0, flags = arg->val;
895 
896 #define	P_FLAG(n) \
897 	if (flags & O_##n) { \
898 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
899 		flags &= ~O_##n; \
900 	}
901 
902 	P_FLAG(CLOEXEC);
903 	P_FLAG(NONBLOCK);
904 #undef P_FLAG
905 
906 	if (flags)
907 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
908 
909 	return printed;
910 }
911 
912 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
913 
914 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
915 {
916 	int sig = arg->val;
917 
918 	switch (sig) {
919 #define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
920 	P_SIGNUM(HUP);
921 	P_SIGNUM(INT);
922 	P_SIGNUM(QUIT);
923 	P_SIGNUM(ILL);
924 	P_SIGNUM(TRAP);
925 	P_SIGNUM(ABRT);
926 	P_SIGNUM(BUS);
927 	P_SIGNUM(FPE);
928 	P_SIGNUM(KILL);
929 	P_SIGNUM(USR1);
930 	P_SIGNUM(SEGV);
931 	P_SIGNUM(USR2);
932 	P_SIGNUM(PIPE);
933 	P_SIGNUM(ALRM);
934 	P_SIGNUM(TERM);
935 	P_SIGNUM(CHLD);
936 	P_SIGNUM(CONT);
937 	P_SIGNUM(STOP);
938 	P_SIGNUM(TSTP);
939 	P_SIGNUM(TTIN);
940 	P_SIGNUM(TTOU);
941 	P_SIGNUM(URG);
942 	P_SIGNUM(XCPU);
943 	P_SIGNUM(XFSZ);
944 	P_SIGNUM(VTALRM);
945 	P_SIGNUM(PROF);
946 	P_SIGNUM(WINCH);
947 	P_SIGNUM(IO);
948 	P_SIGNUM(PWR);
949 	P_SIGNUM(SYS);
950 #ifdef SIGEMT
951 	P_SIGNUM(EMT);
952 #endif
953 #ifdef SIGSTKFLT
954 	P_SIGNUM(STKFLT);
955 #endif
956 #ifdef SIGSWI
957 	P_SIGNUM(SWI);
958 #endif
959 	default: break;
960 	}
961 
962 	return scnprintf(bf, size, "%#x", sig);
963 }
964 
965 #define SCA_SIGNUM syscall_arg__scnprintf_signum
966 
967 #if defined(__i386__) || defined(__x86_64__)
968 /*
969  * FIXME: Make this available to all arches.
970  */
971 #define TCGETS		0x5401
972 
973 static const char *tioctls[] = {
974 	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
975 	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
976 	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
977 	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
978 	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
979 	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
980 	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
981 	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
982 	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
983 	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
984 	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
985 	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
986 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
987 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
988 	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
989 };
990 
991 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
992 #endif /* defined(__i386__) || defined(__x86_64__) */
993 
994 #define STRARRAY(arg, name, array) \
995 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
996 	  .arg_parm	 = { [arg] = &strarray__##array, }
997 
998 static struct syscall_fmt {
999 	const char *name;
1000 	const char *alias;
1001 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1002 	void	   *arg_parm[6];
1003 	bool	   errmsg;
1004 	bool	   timeout;
1005 	bool	   hexret;
1006 } syscall_fmts[] = {
1007 	{ .name	    = "access",	    .errmsg = true,
1008 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1009 			     [1] = SCA_ACCMODE,  /* mode */ }, },
1010 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
1011 	{ .name	    = "brk",	    .hexret = true,
1012 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1013 	{ .name	    = "chdir",	    .errmsg = true,
1014 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1015 	{ .name	    = "chmod",	    .errmsg = true,
1016 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1017 	{ .name	    = "chroot",	    .errmsg = true,
1018 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1019 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1020 	{ .name	    = "close",	    .errmsg = true,
1021 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1022 	{ .name	    = "connect",    .errmsg = true, },
1023 	{ .name	    = "creat",	    .errmsg = true,
1024 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1025 	{ .name	    = "dup",	    .errmsg = true,
1026 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1027 	{ .name	    = "dup2",	    .errmsg = true,
1028 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1029 	{ .name	    = "dup3",	    .errmsg = true,
1030 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1031 	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1032 	{ .name	    = "eventfd2",   .errmsg = true,
1033 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1034 	{ .name	    = "faccessat",  .errmsg = true,
1035 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1036 			     [1] = SCA_FILENAME, /* filename */ }, },
1037 	{ .name	    = "fadvise64",  .errmsg = true,
1038 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 	{ .name	    = "fallocate",  .errmsg = true,
1040 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041 	{ .name	    = "fchdir",	    .errmsg = true,
1042 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043 	{ .name	    = "fchmod",	    .errmsg = true,
1044 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1045 	{ .name	    = "fchmodat",   .errmsg = true,
1046 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1047 			     [1] = SCA_FILENAME, /* filename */ }, },
1048 	{ .name	    = "fchown",	    .errmsg = true,
1049 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1050 	{ .name	    = "fchownat",   .errmsg = true,
1051 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1052 			     [1] = SCA_FILENAME, /* filename */ }, },
1053 	{ .name	    = "fcntl",	    .errmsg = true,
1054 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1055 			     [1] = SCA_STRARRAY, /* cmd */ },
1056 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1057 	{ .name	    = "fdatasync",  .errmsg = true,
1058 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059 	{ .name	    = "flock",	    .errmsg = true,
1060 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1061 			     [1] = SCA_FLOCK, /* cmd */ }, },
1062 	{ .name	    = "fsetxattr",  .errmsg = true,
1063 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1064 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
1065 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1066 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
1067 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1068 			     [1] = SCA_FILENAME, /* filename */ }, },
1069 	{ .name	    = "fstatfs",    .errmsg = true,
1070 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1071 	{ .name	    = "fsync",    .errmsg = true,
1072 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1073 	{ .name	    = "ftruncate", .errmsg = true,
1074 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1075 	{ .name	    = "futex",	    .errmsg = true,
1076 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1077 	{ .name	    = "futimesat", .errmsg = true,
1078 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1079 			     [1] = SCA_FILENAME, /* filename */ }, },
1080 	{ .name	    = "getdents",   .errmsg = true,
1081 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1082 	{ .name	    = "getdents64", .errmsg = true,
1083 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1084 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1085 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1086 	{ .name	    = "getxattr",    .errmsg = true,
1087 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1088 	{ .name	    = "inotify_add_watch",	    .errmsg = true,
1089 	  .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1090 	{ .name	    = "ioctl",	    .errmsg = true,
1091 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1092 #if defined(__i386__) || defined(__x86_64__)
1093 /*
1094  * FIXME: Make this available to all arches.
1095  */
1096 			     [1] = SCA_STRHEXARRAY, /* cmd */
1097 			     [2] = SCA_HEX, /* arg */ },
1098 	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
1099 #else
1100 			     [2] = SCA_HEX, /* arg */ }, },
1101 #endif
1102 	{ .name	    = "keyctl",	    .errmsg = true, STRARRAY(0, option, keyctl_options), },
1103 	{ .name	    = "kill",	    .errmsg = true,
1104 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1105 	{ .name	    = "lchown",    .errmsg = true,
1106 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1107 	{ .name	    = "lgetxattr",  .errmsg = true,
1108 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1109 	{ .name	    = "linkat",	    .errmsg = true,
1110 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1111 	{ .name	    = "listxattr",  .errmsg = true,
1112 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1113 	{ .name	    = "llistxattr", .errmsg = true,
1114 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1115 	{ .name	    = "lremovexattr",  .errmsg = true,
1116 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1117 	{ .name	    = "lseek",	    .errmsg = true,
1118 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1119 			     [2] = SCA_STRARRAY, /* whence */ },
1120 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
1121 	{ .name	    = "lsetxattr",  .errmsg = true,
1122 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1123 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat",
1124 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1125 	{ .name	    = "lsxattr",    .errmsg = true,
1126 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1127 	{ .name     = "madvise",    .errmsg = true,
1128 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
1129 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
1130 	{ .name	    = "mkdir",    .errmsg = true,
1131 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1132 	{ .name	    = "mkdirat",    .errmsg = true,
1133 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1134 			     [1] = SCA_FILENAME, /* pathname */ }, },
1135 	{ .name	    = "mknod",      .errmsg = true,
1136 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1137 	{ .name	    = "mknodat",    .errmsg = true,
1138 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1139 			     [1] = SCA_FILENAME, /* filename */ }, },
1140 	{ .name	    = "mlock",	    .errmsg = true,
1141 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1142 	{ .name	    = "mlockall",   .errmsg = true,
1143 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1144 	{ .name	    = "mmap",	    .hexret = true,
1145 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
1146 			     [2] = SCA_MMAP_PROT, /* prot */
1147 			     [3] = SCA_MMAP_FLAGS, /* flags */
1148 			     [4] = SCA_FD, 	  /* fd */ }, },
1149 	{ .name	    = "mprotect",   .errmsg = true,
1150 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
1151 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
1152 	{ .name	    = "mq_unlink", .errmsg = true,
1153 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1154 	{ .name	    = "mremap",	    .hexret = true,
1155 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1156 			     [3] = SCA_MREMAP_FLAGS, /* flags */
1157 			     [4] = SCA_HEX, /* new_addr */ }, },
1158 	{ .name	    = "munlock",    .errmsg = true,
1159 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1160 	{ .name	    = "munmap",	    .errmsg = true,
1161 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1162 	{ .name	    = "name_to_handle_at", .errmsg = true,
1163 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1164 	{ .name	    = "newfstatat", .errmsg = true,
1165 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1166 			     [1] = SCA_FILENAME, /* filename */ }, },
1167 	{ .name	    = "open",	    .errmsg = true,
1168 	  .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1169 			     [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1170 	{ .name	    = "open_by_handle_at", .errmsg = true,
1171 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1172 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1173 	{ .name	    = "openat",	    .errmsg = true,
1174 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1175 			     [1] = SCA_FILENAME, /* filename */
1176 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1177 	{ .name	    = "perf_event_open", .errmsg = true,
1178 	  .arg_scnprintf = { [1] = SCA_INT, /* pid */
1179 			     [2] = SCA_INT, /* cpu */
1180 			     [3] = SCA_FD,  /* group_fd */
1181 			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1182 	{ .name	    = "pipe2",	    .errmsg = true,
1183 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1184 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
1185 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
1186 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
1187 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1188 	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
1189 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1190 	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1191 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1192 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1193 	{ .name	    = "pwritev",    .errmsg = true,
1194 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1195 	{ .name	    = "read",	    .errmsg = true,
1196 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1197 	{ .name	    = "readlink",   .errmsg = true,
1198 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1199 	{ .name	    = "readlinkat", .errmsg = true,
1200 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1201 			     [1] = SCA_FILENAME, /* pathname */ }, },
1202 	{ .name	    = "readv",	    .errmsg = true,
1203 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1204 	{ .name	    = "recvfrom",   .errmsg = true,
1205 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1206 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1207 	{ .name	    = "recvmmsg",   .errmsg = true,
1208 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1209 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1210 	{ .name	    = "recvmsg",    .errmsg = true,
1211 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1212 			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1213 	{ .name	    = "removexattr", .errmsg = true,
1214 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1215 	{ .name	    = "renameat",   .errmsg = true,
1216 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1217 	{ .name	    = "rmdir",    .errmsg = true,
1218 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1219 	{ .name	    = "rt_sigaction", .errmsg = true,
1220 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1221 	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1222 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1223 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1224 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1225 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1226 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1227 	{ .name	    = "sendmmsg",    .errmsg = true,
1228 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1229 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1230 	{ .name	    = "sendmsg",    .errmsg = true,
1231 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1232 			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1233 	{ .name	    = "sendto",	    .errmsg = true,
1234 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1235 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1236 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1237 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1238 	{ .name	    = "setxattr",   .errmsg = true,
1239 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1240 	{ .name	    = "shutdown",   .errmsg = true,
1241 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1242 	{ .name	    = "socket",	    .errmsg = true,
1243 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1244 			     [1] = SCA_SK_TYPE, /* type */ },
1245 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1246 	{ .name	    = "socketpair", .errmsg = true,
1247 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1248 			     [1] = SCA_SK_TYPE, /* type */ },
1249 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1250 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat",
1251 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1252 	{ .name	    = "statfs",	    .errmsg = true,
1253 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1254 	{ .name	    = "swapoff",    .errmsg = true,
1255 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1256 	{ .name	    = "swapon",	    .errmsg = true,
1257 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1258 	{ .name	    = "symlinkat",  .errmsg = true,
1259 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1260 	{ .name	    = "tgkill",	    .errmsg = true,
1261 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1262 	{ .name	    = "tkill",	    .errmsg = true,
1263 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1264 	{ .name	    = "truncate",   .errmsg = true,
1265 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1266 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1267 	{ .name	    = "unlinkat",   .errmsg = true,
1268 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1269 			     [1] = SCA_FILENAME, /* pathname */ }, },
1270 	{ .name	    = "utime",  .errmsg = true,
1271 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1272 	{ .name	    = "utimensat",  .errmsg = true,
1273 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1274 			     [1] = SCA_FILENAME, /* filename */ }, },
1275 	{ .name	    = "utimes",  .errmsg = true,
1276 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1277 	{ .name	    = "vmsplice",  .errmsg = true,
1278 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1279 	{ .name	    = "write",	    .errmsg = true,
1280 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1281 	{ .name	    = "writev",	    .errmsg = true,
1282 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1283 };
1284 
1285 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1286 {
1287 	const struct syscall_fmt *fmt = fmtp;
1288 	return strcmp(name, fmt->name);
1289 }
1290 
1291 static struct syscall_fmt *syscall_fmt__find(const char *name)
1292 {
1293 	const int nmemb = ARRAY_SIZE(syscall_fmts);
1294 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1295 }
1296 
1297 struct syscall {
1298 	struct event_format *tp_format;
1299 	int		    nr_args;
1300 	struct format_field *args;
1301 	const char	    *name;
1302 	bool		    is_exit;
1303 	struct syscall_fmt  *fmt;
1304 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1305 	void		    **arg_parm;
1306 };
1307 
1308 static size_t fprintf_duration(unsigned long t, FILE *fp)
1309 {
1310 	double duration = (double)t / NSEC_PER_MSEC;
1311 	size_t printed = fprintf(fp, "(");
1312 
1313 	if (duration >= 1.0)
1314 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1315 	else if (duration >= 0.01)
1316 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1317 	else
1318 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1319 	return printed + fprintf(fp, "): ");
1320 }
1321 
1322 /**
1323  * filename.ptr: The filename char pointer that will be vfs_getname'd
1324  * filename.entry_str_pos: Where to insert the string translated from
1325  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1326  */
1327 struct thread_trace {
1328 	u64		  entry_time;
1329 	u64		  exit_time;
1330 	bool		  entry_pending;
1331 	unsigned long	  nr_events;
1332 	unsigned long	  pfmaj, pfmin;
1333 	char		  *entry_str;
1334 	double		  runtime_ms;
1335         struct {
1336 		unsigned long ptr;
1337 		short int     entry_str_pos;
1338 		bool	      pending_open;
1339 		unsigned int  namelen;
1340 		char	      *name;
1341 	} filename;
1342 	struct {
1343 		int	  max;
1344 		char	  **table;
1345 	} paths;
1346 
1347 	struct intlist *syscall_stats;
1348 };
1349 
1350 static struct thread_trace *thread_trace__new(void)
1351 {
1352 	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1353 
1354 	if (ttrace)
1355 		ttrace->paths.max = -1;
1356 
1357 	ttrace->syscall_stats = intlist__new(NULL);
1358 
1359 	return ttrace;
1360 }
1361 
1362 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1363 {
1364 	struct thread_trace *ttrace;
1365 
1366 	if (thread == NULL)
1367 		goto fail;
1368 
1369 	if (thread__priv(thread) == NULL)
1370 		thread__set_priv(thread, thread_trace__new());
1371 
1372 	if (thread__priv(thread) == NULL)
1373 		goto fail;
1374 
1375 	ttrace = thread__priv(thread);
1376 	++ttrace->nr_events;
1377 
1378 	return ttrace;
1379 fail:
1380 	color_fprintf(fp, PERF_COLOR_RED,
1381 		      "WARNING: not enough memory, dropping samples!\n");
1382 	return NULL;
1383 }
1384 
1385 #define TRACE_PFMAJ		(1 << 0)
1386 #define TRACE_PFMIN		(1 << 1)
1387 
1388 static const size_t trace__entry_str_size = 2048;
1389 
1390 struct trace {
1391 	struct perf_tool	tool;
1392 	struct {
1393 		int		machine;
1394 		int		open_id;
1395 	}			audit;
1396 	struct {
1397 		int		max;
1398 		struct syscall  *table;
1399 		struct {
1400 			struct perf_evsel *sys_enter,
1401 					  *sys_exit;
1402 		}		events;
1403 	} syscalls;
1404 	struct record_opts	opts;
1405 	struct perf_evlist	*evlist;
1406 	struct machine		*host;
1407 	struct thread		*current;
1408 	u64			base_time;
1409 	FILE			*output;
1410 	unsigned long		nr_events;
1411 	struct strlist		*ev_qualifier;
1412 	struct {
1413 		size_t		nr;
1414 		int		*entries;
1415 	}			ev_qualifier_ids;
1416 	struct intlist		*tid_list;
1417 	struct intlist		*pid_list;
1418 	struct {
1419 		size_t		nr;
1420 		pid_t		*entries;
1421 	}			filter_pids;
1422 	double			duration_filter;
1423 	double			runtime_ms;
1424 	struct {
1425 		u64		vfs_getname,
1426 				proc_getname;
1427 	} stats;
1428 	bool			not_ev_qualifier;
1429 	bool			live;
1430 	bool			full_time;
1431 	bool			sched;
1432 	bool			multiple_threads;
1433 	bool			summary;
1434 	bool			summary_only;
1435 	bool			show_comm;
1436 	bool			show_tool_stats;
1437 	bool			trace_syscalls;
1438 	bool			force;
1439 	bool			vfs_getname;
1440 	int			trace_pgfaults;
1441 };
1442 
1443 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1444 {
1445 	struct thread_trace *ttrace = thread__priv(thread);
1446 
1447 	if (fd > ttrace->paths.max) {
1448 		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1449 
1450 		if (npath == NULL)
1451 			return -1;
1452 
1453 		if (ttrace->paths.max != -1) {
1454 			memset(npath + ttrace->paths.max + 1, 0,
1455 			       (fd - ttrace->paths.max) * sizeof(char *));
1456 		} else {
1457 			memset(npath, 0, (fd + 1) * sizeof(char *));
1458 		}
1459 
1460 		ttrace->paths.table = npath;
1461 		ttrace->paths.max   = fd;
1462 	}
1463 
1464 	ttrace->paths.table[fd] = strdup(pathname);
1465 
1466 	return ttrace->paths.table[fd] != NULL ? 0 : -1;
1467 }
1468 
1469 static int thread__read_fd_path(struct thread *thread, int fd)
1470 {
1471 	char linkname[PATH_MAX], pathname[PATH_MAX];
1472 	struct stat st;
1473 	int ret;
1474 
1475 	if (thread->pid_ == thread->tid) {
1476 		scnprintf(linkname, sizeof(linkname),
1477 			  "/proc/%d/fd/%d", thread->pid_, fd);
1478 	} else {
1479 		scnprintf(linkname, sizeof(linkname),
1480 			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1481 	}
1482 
1483 	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1484 		return -1;
1485 
1486 	ret = readlink(linkname, pathname, sizeof(pathname));
1487 
1488 	if (ret < 0 || ret > st.st_size)
1489 		return -1;
1490 
1491 	pathname[ret] = '\0';
1492 	return trace__set_fd_pathname(thread, fd, pathname);
1493 }
1494 
1495 static const char *thread__fd_path(struct thread *thread, int fd,
1496 				   struct trace *trace)
1497 {
1498 	struct thread_trace *ttrace = thread__priv(thread);
1499 
1500 	if (ttrace == NULL)
1501 		return NULL;
1502 
1503 	if (fd < 0)
1504 		return NULL;
1505 
1506 	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1507 		if (!trace->live)
1508 			return NULL;
1509 		++trace->stats.proc_getname;
1510 		if (thread__read_fd_path(thread, fd))
1511 			return NULL;
1512 	}
1513 
1514 	return ttrace->paths.table[fd];
1515 }
1516 
1517 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1518 					struct syscall_arg *arg)
1519 {
1520 	int fd = arg->val;
1521 	size_t printed = scnprintf(bf, size, "%d", fd);
1522 	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1523 
1524 	if (path)
1525 		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1526 
1527 	return printed;
1528 }
1529 
1530 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1531 					      struct syscall_arg *arg)
1532 {
1533 	int fd = arg->val;
1534 	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1535 	struct thread_trace *ttrace = thread__priv(arg->thread);
1536 
1537 	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1538 		zfree(&ttrace->paths.table[fd]);
1539 
1540 	return printed;
1541 }
1542 
1543 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1544 				     unsigned long ptr)
1545 {
1546 	struct thread_trace *ttrace = thread__priv(thread);
1547 
1548 	ttrace->filename.ptr = ptr;
1549 	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1550 }
1551 
1552 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1553 					      struct syscall_arg *arg)
1554 {
1555 	unsigned long ptr = arg->val;
1556 
1557 	if (!arg->trace->vfs_getname)
1558 		return scnprintf(bf, size, "%#x", ptr);
1559 
1560 	thread__set_filename_pos(arg->thread, bf, ptr);
1561 	return 0;
1562 }
1563 
1564 static bool trace__filter_duration(struct trace *trace, double t)
1565 {
1566 	return t < (trace->duration_filter * NSEC_PER_MSEC);
1567 }
1568 
1569 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1570 {
1571 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1572 
1573 	return fprintf(fp, "%10.3f ", ts);
1574 }
1575 
1576 static bool done = false;
1577 static bool interrupted = false;
1578 
1579 static void sig_handler(int sig)
1580 {
1581 	done = true;
1582 	interrupted = sig == SIGINT;
1583 }
1584 
1585 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1586 					u64 duration, u64 tstamp, FILE *fp)
1587 {
1588 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1589 	printed += fprintf_duration(duration, fp);
1590 
1591 	if (trace->multiple_threads) {
1592 		if (trace->show_comm)
1593 			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1594 		printed += fprintf(fp, "%d ", thread->tid);
1595 	}
1596 
1597 	return printed;
1598 }
1599 
1600 static int trace__process_event(struct trace *trace, struct machine *machine,
1601 				union perf_event *event, struct perf_sample *sample)
1602 {
1603 	int ret = 0;
1604 
1605 	switch (event->header.type) {
1606 	case PERF_RECORD_LOST:
1607 		color_fprintf(trace->output, PERF_COLOR_RED,
1608 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1609 		ret = machine__process_lost_event(machine, event, sample);
1610 	default:
1611 		ret = machine__process_event(machine, event, sample);
1612 		break;
1613 	}
1614 
1615 	return ret;
1616 }
1617 
1618 static int trace__tool_process(struct perf_tool *tool,
1619 			       union perf_event *event,
1620 			       struct perf_sample *sample,
1621 			       struct machine *machine)
1622 {
1623 	struct trace *trace = container_of(tool, struct trace, tool);
1624 	return trace__process_event(trace, machine, event, sample);
1625 }
1626 
1627 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1628 {
1629 	int err = symbol__init(NULL);
1630 
1631 	if (err)
1632 		return err;
1633 
1634 	trace->host = machine__new_host();
1635 	if (trace->host == NULL)
1636 		return -ENOMEM;
1637 
1638 	if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1639 		return -errno;
1640 
1641 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1642 					    evlist->threads, trace__tool_process, false,
1643 					    trace->opts.proc_map_timeout);
1644 	if (err)
1645 		symbol__exit();
1646 
1647 	return err;
1648 }
1649 
1650 static int syscall__set_arg_fmts(struct syscall *sc)
1651 {
1652 	struct format_field *field;
1653 	int idx = 0;
1654 
1655 	sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1656 	if (sc->arg_scnprintf == NULL)
1657 		return -1;
1658 
1659 	if (sc->fmt)
1660 		sc->arg_parm = sc->fmt->arg_parm;
1661 
1662 	for (field = sc->args; field; field = field->next) {
1663 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1664 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1665 		else if (field->flags & FIELD_IS_POINTER)
1666 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1667 		++idx;
1668 	}
1669 
1670 	return 0;
1671 }
1672 
1673 static int trace__read_syscall_info(struct trace *trace, int id)
1674 {
1675 	char tp_name[128];
1676 	struct syscall *sc;
1677 	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1678 
1679 	if (name == NULL)
1680 		return -1;
1681 
1682 	if (id > trace->syscalls.max) {
1683 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1684 
1685 		if (nsyscalls == NULL)
1686 			return -1;
1687 
1688 		if (trace->syscalls.max != -1) {
1689 			memset(nsyscalls + trace->syscalls.max + 1, 0,
1690 			       (id - trace->syscalls.max) * sizeof(*sc));
1691 		} else {
1692 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1693 		}
1694 
1695 		trace->syscalls.table = nsyscalls;
1696 		trace->syscalls.max   = id;
1697 	}
1698 
1699 	sc = trace->syscalls.table + id;
1700 	sc->name = name;
1701 
1702 	sc->fmt  = syscall_fmt__find(sc->name);
1703 
1704 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1705 	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1706 
1707 	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1708 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1709 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1710 	}
1711 
1712 	if (sc->tp_format == NULL)
1713 		return -1;
1714 
1715 	sc->args = sc->tp_format->format.fields;
1716 	sc->nr_args = sc->tp_format->format.nr_fields;
1717 	/* drop nr field - not relevant here; does not exist on older kernels */
1718 	if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1719 		sc->args = sc->args->next;
1720 		--sc->nr_args;
1721 	}
1722 
1723 	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1724 
1725 	return syscall__set_arg_fmts(sc);
1726 }
1727 
1728 static int trace__validate_ev_qualifier(struct trace *trace)
1729 {
1730 	int err = 0, i;
1731 	struct str_node *pos;
1732 
1733 	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1734 	trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1735 						 sizeof(trace->ev_qualifier_ids.entries[0]));
1736 
1737 	if (trace->ev_qualifier_ids.entries == NULL) {
1738 		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1739 		       trace->output);
1740 		err = -EINVAL;
1741 		goto out;
1742 	}
1743 
1744 	i = 0;
1745 
1746 	strlist__for_each(pos, trace->ev_qualifier) {
1747 		const char *sc = pos->s;
1748 		int id = audit_name_to_syscall(sc, trace->audit.machine);
1749 
1750 		if (id < 0) {
1751 			if (err == 0) {
1752 				fputs("Error:\tInvalid syscall ", trace->output);
1753 				err = -EINVAL;
1754 			} else {
1755 				fputs(", ", trace->output);
1756 			}
1757 
1758 			fputs(sc, trace->output);
1759 		}
1760 
1761 		trace->ev_qualifier_ids.entries[i++] = id;
1762 	}
1763 
1764 	if (err < 0) {
1765 		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1766 		      "\nHint:\tand: 'man syscalls'\n", trace->output);
1767 		zfree(&trace->ev_qualifier_ids.entries);
1768 		trace->ev_qualifier_ids.nr = 0;
1769 	}
1770 out:
1771 	return err;
1772 }
1773 
1774 /*
1775  * args is to be interpreted as a series of longs but we need to handle
1776  * 8-byte unaligned accesses. args points to raw_data within the event
1777  * and raw_data is guaranteed to be 8-byte unaligned because it is
1778  * preceded by raw_size which is a u32. So we need to copy args to a temp
1779  * variable to read it. Most notably this avoids extended load instructions
1780  * on unaligned addresses
1781  */
1782 
1783 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1784 				      unsigned char *args, struct trace *trace,
1785 				      struct thread *thread)
1786 {
1787 	size_t printed = 0;
1788 	unsigned char *p;
1789 	unsigned long val;
1790 
1791 	if (sc->args != NULL) {
1792 		struct format_field *field;
1793 		u8 bit = 1;
1794 		struct syscall_arg arg = {
1795 			.idx	= 0,
1796 			.mask	= 0,
1797 			.trace  = trace,
1798 			.thread = thread,
1799 		};
1800 
1801 		for (field = sc->args; field;
1802 		     field = field->next, ++arg.idx, bit <<= 1) {
1803 			if (arg.mask & bit)
1804 				continue;
1805 
1806 			/* special care for unaligned accesses */
1807 			p = args + sizeof(unsigned long) * arg.idx;
1808 			memcpy(&val, p, sizeof(val));
1809 
1810 			/*
1811  			 * Suppress this argument if its value is zero and
1812  			 * and we don't have a string associated in an
1813  			 * strarray for it.
1814  			 */
1815 			if (val == 0 &&
1816 			    !(sc->arg_scnprintf &&
1817 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1818 			      sc->arg_parm[arg.idx]))
1819 				continue;
1820 
1821 			printed += scnprintf(bf + printed, size - printed,
1822 					     "%s%s: ", printed ? ", " : "", field->name);
1823 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1824 				arg.val = val;
1825 				if (sc->arg_parm)
1826 					arg.parm = sc->arg_parm[arg.idx];
1827 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1828 								      size - printed, &arg);
1829 			} else {
1830 				printed += scnprintf(bf + printed, size - printed,
1831 						     "%ld", val);
1832 			}
1833 		}
1834 	} else {
1835 		int i = 0;
1836 
1837 		while (i < 6) {
1838 			/* special care for unaligned accesses */
1839 			p = args + sizeof(unsigned long) * i;
1840 			memcpy(&val, p, sizeof(val));
1841 			printed += scnprintf(bf + printed, size - printed,
1842 					     "%sarg%d: %ld",
1843 					     printed ? ", " : "", i, val);
1844 			++i;
1845 		}
1846 	}
1847 
1848 	return printed;
1849 }
1850 
1851 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1852 				  union perf_event *event,
1853 				  struct perf_sample *sample);
1854 
1855 static struct syscall *trace__syscall_info(struct trace *trace,
1856 					   struct perf_evsel *evsel, int id)
1857 {
1858 
1859 	if (id < 0) {
1860 
1861 		/*
1862 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1863 		 * before that, leaving at a higher verbosity level till that is
1864 		 * explained. Reproduced with plain ftrace with:
1865 		 *
1866 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1867 		 * grep "NR -1 " /t/trace_pipe
1868 		 *
1869 		 * After generating some load on the machine.
1870  		 */
1871 		if (verbose > 1) {
1872 			static u64 n;
1873 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1874 				id, perf_evsel__name(evsel), ++n);
1875 		}
1876 		return NULL;
1877 	}
1878 
1879 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1880 	    trace__read_syscall_info(trace, id))
1881 		goto out_cant_read;
1882 
1883 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1884 		goto out_cant_read;
1885 
1886 	return &trace->syscalls.table[id];
1887 
1888 out_cant_read:
1889 	if (verbose) {
1890 		fprintf(trace->output, "Problems reading syscall %d", id);
1891 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1892 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1893 		fputs(" information\n", trace->output);
1894 	}
1895 	return NULL;
1896 }
1897 
1898 static void thread__update_stats(struct thread_trace *ttrace,
1899 				 int id, struct perf_sample *sample)
1900 {
1901 	struct int_node *inode;
1902 	struct stats *stats;
1903 	u64 duration = 0;
1904 
1905 	inode = intlist__findnew(ttrace->syscall_stats, id);
1906 	if (inode == NULL)
1907 		return;
1908 
1909 	stats = inode->priv;
1910 	if (stats == NULL) {
1911 		stats = malloc(sizeof(struct stats));
1912 		if (stats == NULL)
1913 			return;
1914 		init_stats(stats);
1915 		inode->priv = stats;
1916 	}
1917 
1918 	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1919 		duration = sample->time - ttrace->entry_time;
1920 
1921 	update_stats(stats, duration);
1922 }
1923 
1924 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1925 {
1926 	struct thread_trace *ttrace;
1927 	u64 duration;
1928 	size_t printed;
1929 
1930 	if (trace->current == NULL)
1931 		return 0;
1932 
1933 	ttrace = thread__priv(trace->current);
1934 
1935 	if (!ttrace->entry_pending)
1936 		return 0;
1937 
1938 	duration = sample->time - ttrace->entry_time;
1939 
1940 	printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1941 	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1942 	ttrace->entry_pending = false;
1943 
1944 	return printed;
1945 }
1946 
1947 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1948 			    union perf_event *event __maybe_unused,
1949 			    struct perf_sample *sample)
1950 {
1951 	char *msg;
1952 	void *args;
1953 	size_t printed = 0;
1954 	struct thread *thread;
1955 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1956 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1957 	struct thread_trace *ttrace;
1958 
1959 	if (sc == NULL)
1960 		return -1;
1961 
1962 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1963 	ttrace = thread__trace(thread, trace->output);
1964 	if (ttrace == NULL)
1965 		goto out_put;
1966 
1967 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1968 
1969 	if (ttrace->entry_str == NULL) {
1970 		ttrace->entry_str = malloc(trace__entry_str_size);
1971 		if (!ttrace->entry_str)
1972 			goto out_put;
1973 	}
1974 
1975 	if (!trace->summary_only)
1976 		trace__printf_interrupted_entry(trace, sample);
1977 
1978 	ttrace->entry_time = sample->time;
1979 	msg = ttrace->entry_str;
1980 	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1981 
1982 	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1983 					   args, trace, thread);
1984 
1985 	if (sc->is_exit) {
1986 		if (!trace->duration_filter && !trace->summary_only) {
1987 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1988 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1989 		}
1990 	} else {
1991 		ttrace->entry_pending = true;
1992 		/* See trace__vfs_getname & trace__sys_exit */
1993 		ttrace->filename.pending_open = false;
1994 	}
1995 
1996 	if (trace->current != thread) {
1997 		thread__put(trace->current);
1998 		trace->current = thread__get(thread);
1999 	}
2000 	err = 0;
2001 out_put:
2002 	thread__put(thread);
2003 	return err;
2004 }
2005 
2006 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2007 			   union perf_event *event __maybe_unused,
2008 			   struct perf_sample *sample)
2009 {
2010 	long ret;
2011 	u64 duration = 0;
2012 	struct thread *thread;
2013 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2014 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2015 	struct thread_trace *ttrace;
2016 
2017 	if (sc == NULL)
2018 		return -1;
2019 
2020 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2021 	ttrace = thread__trace(thread, trace->output);
2022 	if (ttrace == NULL)
2023 		goto out_put;
2024 
2025 	if (trace->summary)
2026 		thread__update_stats(ttrace, id, sample);
2027 
2028 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2029 
2030 	if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2031 		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2032 		ttrace->filename.pending_open = false;
2033 		++trace->stats.vfs_getname;
2034 	}
2035 
2036 	ttrace->exit_time = sample->time;
2037 
2038 	if (ttrace->entry_time) {
2039 		duration = sample->time - ttrace->entry_time;
2040 		if (trace__filter_duration(trace, duration))
2041 			goto out;
2042 	} else if (trace->duration_filter)
2043 		goto out;
2044 
2045 	if (trace->summary_only)
2046 		goto out;
2047 
2048 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2049 
2050 	if (ttrace->entry_pending) {
2051 		fprintf(trace->output, "%-70s", ttrace->entry_str);
2052 	} else {
2053 		fprintf(trace->output, " ... [");
2054 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2055 		fprintf(trace->output, "]: %s()", sc->name);
2056 	}
2057 
2058 	if (sc->fmt == NULL) {
2059 signed_print:
2060 		fprintf(trace->output, ") = %ld", ret);
2061 	} else if (ret < 0 && sc->fmt->errmsg) {
2062 		char bf[STRERR_BUFSIZE];
2063 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2064 			   *e = audit_errno_to_name(-ret);
2065 
2066 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
2067 	} else if (ret == 0 && sc->fmt->timeout)
2068 		fprintf(trace->output, ") = 0 Timeout");
2069 	else if (sc->fmt->hexret)
2070 		fprintf(trace->output, ") = %#lx", ret);
2071 	else
2072 		goto signed_print;
2073 
2074 	fputc('\n', trace->output);
2075 out:
2076 	ttrace->entry_pending = false;
2077 	err = 0;
2078 out_put:
2079 	thread__put(thread);
2080 	return err;
2081 }
2082 
2083 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2084 			      union perf_event *event __maybe_unused,
2085 			      struct perf_sample *sample)
2086 {
2087 	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2088 	struct thread_trace *ttrace;
2089 	size_t filename_len, entry_str_len, to_move;
2090 	ssize_t remaining_space;
2091 	char *pos;
2092 	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2093 
2094 	if (!thread)
2095 		goto out;
2096 
2097 	ttrace = thread__priv(thread);
2098 	if (!ttrace)
2099 		goto out;
2100 
2101 	filename_len = strlen(filename);
2102 
2103 	if (ttrace->filename.namelen < filename_len) {
2104 		char *f = realloc(ttrace->filename.name, filename_len + 1);
2105 
2106 		if (f == NULL)
2107 				goto out;
2108 
2109 		ttrace->filename.namelen = filename_len;
2110 		ttrace->filename.name = f;
2111 	}
2112 
2113 	strcpy(ttrace->filename.name, filename);
2114 	ttrace->filename.pending_open = true;
2115 
2116 	if (!ttrace->filename.ptr)
2117 		goto out;
2118 
2119 	entry_str_len = strlen(ttrace->entry_str);
2120 	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2121 	if (remaining_space <= 0)
2122 		goto out;
2123 
2124 	if (filename_len > (size_t)remaining_space) {
2125 		filename += filename_len - remaining_space;
2126 		filename_len = remaining_space;
2127 	}
2128 
2129 	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2130 	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2131 	memmove(pos + filename_len, pos, to_move);
2132 	memcpy(pos, filename, filename_len);
2133 
2134 	ttrace->filename.ptr = 0;
2135 	ttrace->filename.entry_str_pos = 0;
2136 out:
2137 	return 0;
2138 }
2139 
2140 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2141 				     union perf_event *event __maybe_unused,
2142 				     struct perf_sample *sample)
2143 {
2144         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2145 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2146 	struct thread *thread = machine__findnew_thread(trace->host,
2147 							sample->pid,
2148 							sample->tid);
2149 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
2150 
2151 	if (ttrace == NULL)
2152 		goto out_dump;
2153 
2154 	ttrace->runtime_ms += runtime_ms;
2155 	trace->runtime_ms += runtime_ms;
2156 	thread__put(thread);
2157 	return 0;
2158 
2159 out_dump:
2160 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2161 	       evsel->name,
2162 	       perf_evsel__strval(evsel, sample, "comm"),
2163 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2164 	       runtime,
2165 	       perf_evsel__intval(evsel, sample, "vruntime"));
2166 	thread__put(thread);
2167 	return 0;
2168 }
2169 
2170 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2171 				union perf_event *event __maybe_unused,
2172 				struct perf_sample *sample)
2173 {
2174 	trace__printf_interrupted_entry(trace, sample);
2175 	trace__fprintf_tstamp(trace, sample->time, trace->output);
2176 
2177 	if (trace->trace_syscalls)
2178 		fprintf(trace->output, "(         ): ");
2179 
2180 	fprintf(trace->output, "%s:", evsel->name);
2181 
2182 	if (evsel->tp_format) {
2183 		event_format__fprintf(evsel->tp_format, sample->cpu,
2184 				      sample->raw_data, sample->raw_size,
2185 				      trace->output);
2186 	}
2187 
2188 	fprintf(trace->output, ")\n");
2189 	return 0;
2190 }
2191 
2192 static void print_location(FILE *f, struct perf_sample *sample,
2193 			   struct addr_location *al,
2194 			   bool print_dso, bool print_sym)
2195 {
2196 
2197 	if ((verbose || print_dso) && al->map)
2198 		fprintf(f, "%s@", al->map->dso->long_name);
2199 
2200 	if ((verbose || print_sym) && al->sym)
2201 		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2202 			al->addr - al->sym->start);
2203 	else if (al->map)
2204 		fprintf(f, "0x%" PRIx64, al->addr);
2205 	else
2206 		fprintf(f, "0x%" PRIx64, sample->addr);
2207 }
2208 
2209 static int trace__pgfault(struct trace *trace,
2210 			  struct perf_evsel *evsel,
2211 			  union perf_event *event,
2212 			  struct perf_sample *sample)
2213 {
2214 	struct thread *thread;
2215 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2216 	struct addr_location al;
2217 	char map_type = 'd';
2218 	struct thread_trace *ttrace;
2219 	int err = -1;
2220 
2221 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2222 	ttrace = thread__trace(thread, trace->output);
2223 	if (ttrace == NULL)
2224 		goto out_put;
2225 
2226 	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2227 		ttrace->pfmaj++;
2228 	else
2229 		ttrace->pfmin++;
2230 
2231 	if (trace->summary_only)
2232 		goto out;
2233 
2234 	thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2235 			      sample->ip, &al);
2236 
2237 	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2238 
2239 	fprintf(trace->output, "%sfault [",
2240 		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2241 		"maj" : "min");
2242 
2243 	print_location(trace->output, sample, &al, false, true);
2244 
2245 	fprintf(trace->output, "] => ");
2246 
2247 	thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2248 				   sample->addr, &al);
2249 
2250 	if (!al.map) {
2251 		thread__find_addr_location(thread, cpumode,
2252 					   MAP__FUNCTION, sample->addr, &al);
2253 
2254 		if (al.map)
2255 			map_type = 'x';
2256 		else
2257 			map_type = '?';
2258 	}
2259 
2260 	print_location(trace->output, sample, &al, true, false);
2261 
2262 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2263 out:
2264 	err = 0;
2265 out_put:
2266 	thread__put(thread);
2267 	return err;
2268 }
2269 
2270 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2271 {
2272 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2273 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2274 		return false;
2275 
2276 	if (trace->pid_list || trace->tid_list)
2277 		return true;
2278 
2279 	return false;
2280 }
2281 
2282 static int trace__process_sample(struct perf_tool *tool,
2283 				 union perf_event *event,
2284 				 struct perf_sample *sample,
2285 				 struct perf_evsel *evsel,
2286 				 struct machine *machine __maybe_unused)
2287 {
2288 	struct trace *trace = container_of(tool, struct trace, tool);
2289 	int err = 0;
2290 
2291 	tracepoint_handler handler = evsel->handler;
2292 
2293 	if (skip_sample(trace, sample))
2294 		return 0;
2295 
2296 	if (!trace->full_time && trace->base_time == 0)
2297 		trace->base_time = sample->time;
2298 
2299 	if (handler) {
2300 		++trace->nr_events;
2301 		handler(trace, evsel, event, sample);
2302 	}
2303 
2304 	return err;
2305 }
2306 
2307 static int parse_target_str(struct trace *trace)
2308 {
2309 	if (trace->opts.target.pid) {
2310 		trace->pid_list = intlist__new(trace->opts.target.pid);
2311 		if (trace->pid_list == NULL) {
2312 			pr_err("Error parsing process id string\n");
2313 			return -EINVAL;
2314 		}
2315 	}
2316 
2317 	if (trace->opts.target.tid) {
2318 		trace->tid_list = intlist__new(trace->opts.target.tid);
2319 		if (trace->tid_list == NULL) {
2320 			pr_err("Error parsing thread id string\n");
2321 			return -EINVAL;
2322 		}
2323 	}
2324 
2325 	return 0;
2326 }
2327 
2328 static int trace__record(struct trace *trace, int argc, const char **argv)
2329 {
2330 	unsigned int rec_argc, i, j;
2331 	const char **rec_argv;
2332 	const char * const record_args[] = {
2333 		"record",
2334 		"-R",
2335 		"-m", "1024",
2336 		"-c", "1",
2337 	};
2338 
2339 	const char * const sc_args[] = { "-e", };
2340 	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2341 	const char * const majpf_args[] = { "-e", "major-faults" };
2342 	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2343 	const char * const minpf_args[] = { "-e", "minor-faults" };
2344 	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2345 
2346 	/* +1 is for the event string below */
2347 	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2348 		majpf_args_nr + minpf_args_nr + argc;
2349 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2350 
2351 	if (rec_argv == NULL)
2352 		return -ENOMEM;
2353 
2354 	j = 0;
2355 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2356 		rec_argv[j++] = record_args[i];
2357 
2358 	if (trace->trace_syscalls) {
2359 		for (i = 0; i < sc_args_nr; i++)
2360 			rec_argv[j++] = sc_args[i];
2361 
2362 		/* event string may be different for older kernels - e.g., RHEL6 */
2363 		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2364 			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2365 		else if (is_valid_tracepoint("syscalls:sys_enter"))
2366 			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2367 		else {
2368 			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2369 			return -1;
2370 		}
2371 	}
2372 
2373 	if (trace->trace_pgfaults & TRACE_PFMAJ)
2374 		for (i = 0; i < majpf_args_nr; i++)
2375 			rec_argv[j++] = majpf_args[i];
2376 
2377 	if (trace->trace_pgfaults & TRACE_PFMIN)
2378 		for (i = 0; i < minpf_args_nr; i++)
2379 			rec_argv[j++] = minpf_args[i];
2380 
2381 	for (i = 0; i < (unsigned int)argc; i++)
2382 		rec_argv[j++] = argv[i];
2383 
2384 	return cmd_record(j, rec_argv, NULL);
2385 }
2386 
2387 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2388 
2389 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2390 {
2391 	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2392 	if (evsel == NULL)
2393 		return false;
2394 
2395 	if (perf_evsel__field(evsel, "pathname") == NULL) {
2396 		perf_evsel__delete(evsel);
2397 		return false;
2398 	}
2399 
2400 	evsel->handler = trace__vfs_getname;
2401 	perf_evlist__add(evlist, evsel);
2402 	return true;
2403 }
2404 
2405 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2406 				    u64 config)
2407 {
2408 	struct perf_evsel *evsel;
2409 	struct perf_event_attr attr = {
2410 		.type = PERF_TYPE_SOFTWARE,
2411 		.mmap_data = 1,
2412 	};
2413 
2414 	attr.config = config;
2415 	attr.sample_period = 1;
2416 
2417 	event_attr_init(&attr);
2418 
2419 	evsel = perf_evsel__new(&attr);
2420 	if (!evsel)
2421 		return -ENOMEM;
2422 
2423 	evsel->handler = trace__pgfault;
2424 	perf_evlist__add(evlist, evsel);
2425 
2426 	return 0;
2427 }
2428 
2429 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2430 {
2431 	const u32 type = event->header.type;
2432 	struct perf_evsel *evsel;
2433 
2434 	if (!trace->full_time && trace->base_time == 0)
2435 		trace->base_time = sample->time;
2436 
2437 	if (type != PERF_RECORD_SAMPLE) {
2438 		trace__process_event(trace, trace->host, event, sample);
2439 		return;
2440 	}
2441 
2442 	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2443 	if (evsel == NULL) {
2444 		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2445 		return;
2446 	}
2447 
2448 	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2449 	    sample->raw_data == NULL) {
2450 		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2451 		       perf_evsel__name(evsel), sample->tid,
2452 		       sample->cpu, sample->raw_size);
2453 	} else {
2454 		tracepoint_handler handler = evsel->handler;
2455 		handler(trace, evsel, event, sample);
2456 	}
2457 }
2458 
2459 static int trace__add_syscall_newtp(struct trace *trace)
2460 {
2461 	int ret = -1;
2462 	struct perf_evlist *evlist = trace->evlist;
2463 	struct perf_evsel *sys_enter, *sys_exit;
2464 
2465 	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2466 	if (sys_enter == NULL)
2467 		goto out;
2468 
2469 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2470 		goto out_delete_sys_enter;
2471 
2472 	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2473 	if (sys_exit == NULL)
2474 		goto out_delete_sys_enter;
2475 
2476 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2477 		goto out_delete_sys_exit;
2478 
2479 	perf_evlist__add(evlist, sys_enter);
2480 	perf_evlist__add(evlist, sys_exit);
2481 
2482 	trace->syscalls.events.sys_enter = sys_enter;
2483 	trace->syscalls.events.sys_exit  = sys_exit;
2484 
2485 	ret = 0;
2486 out:
2487 	return ret;
2488 
2489 out_delete_sys_exit:
2490 	perf_evsel__delete_priv(sys_exit);
2491 out_delete_sys_enter:
2492 	perf_evsel__delete_priv(sys_enter);
2493 	goto out;
2494 }
2495 
2496 static int trace__set_ev_qualifier_filter(struct trace *trace)
2497 {
2498 	int err = -1;
2499 	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2500 						trace->ev_qualifier_ids.nr,
2501 						trace->ev_qualifier_ids.entries);
2502 
2503 	if (filter == NULL)
2504 		goto out_enomem;
2505 
2506 	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2507 		err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2508 
2509 	free(filter);
2510 out:
2511 	return err;
2512 out_enomem:
2513 	errno = ENOMEM;
2514 	goto out;
2515 }
2516 
2517 static int trace__run(struct trace *trace, int argc, const char **argv)
2518 {
2519 	struct perf_evlist *evlist = trace->evlist;
2520 	struct perf_evsel *evsel;
2521 	int err = -1, i;
2522 	unsigned long before;
2523 	const bool forks = argc > 0;
2524 	bool draining = false;
2525 
2526 	trace->live = true;
2527 
2528 	if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2529 		goto out_error_raw_syscalls;
2530 
2531 	if (trace->trace_syscalls)
2532 		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2533 
2534 	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2535 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2536 		goto out_error_mem;
2537 	}
2538 
2539 	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2540 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2541 		goto out_error_mem;
2542 
2543 	if (trace->sched &&
2544 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2545 				   trace__sched_stat_runtime))
2546 		goto out_error_sched_stat_runtime;
2547 
2548 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2549 	if (err < 0) {
2550 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2551 		goto out_delete_evlist;
2552 	}
2553 
2554 	err = trace__symbols_init(trace, evlist);
2555 	if (err < 0) {
2556 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2557 		goto out_delete_evlist;
2558 	}
2559 
2560 	perf_evlist__config(evlist, &trace->opts);
2561 
2562 	signal(SIGCHLD, sig_handler);
2563 	signal(SIGINT, sig_handler);
2564 
2565 	if (forks) {
2566 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2567 						    argv, false, NULL);
2568 		if (err < 0) {
2569 			fprintf(trace->output, "Couldn't run the workload!\n");
2570 			goto out_delete_evlist;
2571 		}
2572 	}
2573 
2574 	err = perf_evlist__open(evlist);
2575 	if (err < 0)
2576 		goto out_error_open;
2577 
2578 	/*
2579 	 * Better not use !target__has_task() here because we need to cover the
2580 	 * case where no threads were specified in the command line, but a
2581 	 * workload was, and in that case we will fill in the thread_map when
2582 	 * we fork the workload in perf_evlist__prepare_workload.
2583 	 */
2584 	if (trace->filter_pids.nr > 0)
2585 		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2586 	else if (thread_map__pid(evlist->threads, 0) == -1)
2587 		err = perf_evlist__set_filter_pid(evlist, getpid());
2588 
2589 	if (err < 0)
2590 		goto out_error_mem;
2591 
2592 	if (trace->ev_qualifier_ids.nr > 0) {
2593 		err = trace__set_ev_qualifier_filter(trace);
2594 		if (err < 0)
2595 			goto out_errno;
2596 
2597 		pr_debug("event qualifier tracepoint filter: %s\n",
2598 			 trace->syscalls.events.sys_exit->filter);
2599 	}
2600 
2601 	err = perf_evlist__apply_filters(evlist, &evsel);
2602 	if (err < 0)
2603 		goto out_error_apply_filters;
2604 
2605 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2606 	if (err < 0)
2607 		goto out_error_mmap;
2608 
2609 	if (!target__none(&trace->opts.target))
2610 		perf_evlist__enable(evlist);
2611 
2612 	if (forks)
2613 		perf_evlist__start_workload(evlist);
2614 
2615 	trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2616 				  evlist->threads->nr > 1 ||
2617 				  perf_evlist__first(evlist)->attr.inherit;
2618 again:
2619 	before = trace->nr_events;
2620 
2621 	for (i = 0; i < evlist->nr_mmaps; i++) {
2622 		union perf_event *event;
2623 
2624 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2625 			struct perf_sample sample;
2626 
2627 			++trace->nr_events;
2628 
2629 			err = perf_evlist__parse_sample(evlist, event, &sample);
2630 			if (err) {
2631 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2632 				goto next_event;
2633 			}
2634 
2635 			trace__handle_event(trace, event, &sample);
2636 next_event:
2637 			perf_evlist__mmap_consume(evlist, i);
2638 
2639 			if (interrupted)
2640 				goto out_disable;
2641 
2642 			if (done && !draining) {
2643 				perf_evlist__disable(evlist);
2644 				draining = true;
2645 			}
2646 		}
2647 	}
2648 
2649 	if (trace->nr_events == before) {
2650 		int timeout = done ? 100 : -1;
2651 
2652 		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2653 			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2654 				draining = true;
2655 
2656 			goto again;
2657 		}
2658 	} else {
2659 		goto again;
2660 	}
2661 
2662 out_disable:
2663 	thread__zput(trace->current);
2664 
2665 	perf_evlist__disable(evlist);
2666 
2667 	if (!err) {
2668 		if (trace->summary)
2669 			trace__fprintf_thread_summary(trace, trace->output);
2670 
2671 		if (trace->show_tool_stats) {
2672 			fprintf(trace->output, "Stats:\n "
2673 					       " vfs_getname : %" PRIu64 "\n"
2674 					       " proc_getname: %" PRIu64 "\n",
2675 				trace->stats.vfs_getname,
2676 				trace->stats.proc_getname);
2677 		}
2678 	}
2679 
2680 out_delete_evlist:
2681 	perf_evlist__delete(evlist);
2682 	trace->evlist = NULL;
2683 	trace->live = false;
2684 	return err;
2685 {
2686 	char errbuf[BUFSIZ];
2687 
2688 out_error_sched_stat_runtime:
2689 	debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2690 	goto out_error;
2691 
2692 out_error_raw_syscalls:
2693 	debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2694 	goto out_error;
2695 
2696 out_error_mmap:
2697 	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2698 	goto out_error;
2699 
2700 out_error_open:
2701 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2702 
2703 out_error:
2704 	fprintf(trace->output, "%s\n", errbuf);
2705 	goto out_delete_evlist;
2706 
2707 out_error_apply_filters:
2708 	fprintf(trace->output,
2709 		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
2710 		evsel->filter, perf_evsel__name(evsel), errno,
2711 		strerror_r(errno, errbuf, sizeof(errbuf)));
2712 	goto out_delete_evlist;
2713 }
2714 out_error_mem:
2715 	fprintf(trace->output, "Not enough memory to run!\n");
2716 	goto out_delete_evlist;
2717 
2718 out_errno:
2719 	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2720 	goto out_delete_evlist;
2721 }
2722 
2723 static int trace__replay(struct trace *trace)
2724 {
2725 	const struct perf_evsel_str_handler handlers[] = {
2726 		{ "probe:vfs_getname",	     trace__vfs_getname, },
2727 	};
2728 	struct perf_data_file file = {
2729 		.path  = input_name,
2730 		.mode  = PERF_DATA_MODE_READ,
2731 		.force = trace->force,
2732 	};
2733 	struct perf_session *session;
2734 	struct perf_evsel *evsel;
2735 	int err = -1;
2736 
2737 	trace->tool.sample	  = trace__process_sample;
2738 	trace->tool.mmap	  = perf_event__process_mmap;
2739 	trace->tool.mmap2	  = perf_event__process_mmap2;
2740 	trace->tool.comm	  = perf_event__process_comm;
2741 	trace->tool.exit	  = perf_event__process_exit;
2742 	trace->tool.fork	  = perf_event__process_fork;
2743 	trace->tool.attr	  = perf_event__process_attr;
2744 	trace->tool.tracing_data = perf_event__process_tracing_data;
2745 	trace->tool.build_id	  = perf_event__process_build_id;
2746 
2747 	trace->tool.ordered_events = true;
2748 	trace->tool.ordering_requires_timestamps = true;
2749 
2750 	/* add tid to output */
2751 	trace->multiple_threads = true;
2752 
2753 	session = perf_session__new(&file, false, &trace->tool);
2754 	if (session == NULL)
2755 		return -1;
2756 
2757 	if (symbol__init(&session->header.env) < 0)
2758 		goto out;
2759 
2760 	trace->host = &session->machines.host;
2761 
2762 	err = perf_session__set_tracepoints_handlers(session, handlers);
2763 	if (err)
2764 		goto out;
2765 
2766 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2767 						     "raw_syscalls:sys_enter");
2768 	/* older kernels have syscalls tp versus raw_syscalls */
2769 	if (evsel == NULL)
2770 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2771 							     "syscalls:sys_enter");
2772 
2773 	if (evsel &&
2774 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2775 	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2776 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2777 		goto out;
2778 	}
2779 
2780 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2781 						     "raw_syscalls:sys_exit");
2782 	if (evsel == NULL)
2783 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2784 							     "syscalls:sys_exit");
2785 	if (evsel &&
2786 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2787 	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2788 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2789 		goto out;
2790 	}
2791 
2792 	evlist__for_each(session->evlist, evsel) {
2793 		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2794 		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2795 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2796 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2797 			evsel->handler = trace__pgfault;
2798 	}
2799 
2800 	err = parse_target_str(trace);
2801 	if (err != 0)
2802 		goto out;
2803 
2804 	setup_pager();
2805 
2806 	err = perf_session__process_events(session);
2807 	if (err)
2808 		pr_err("Failed to process events, error %d", err);
2809 
2810 	else if (trace->summary)
2811 		trace__fprintf_thread_summary(trace, trace->output);
2812 
2813 out:
2814 	perf_session__delete(session);
2815 
2816 	return err;
2817 }
2818 
2819 static size_t trace__fprintf_threads_header(FILE *fp)
2820 {
2821 	size_t printed;
2822 
2823 	printed  = fprintf(fp, "\n Summary of events:\n\n");
2824 
2825 	return printed;
2826 }
2827 
2828 static size_t thread__dump_stats(struct thread_trace *ttrace,
2829 				 struct trace *trace, FILE *fp)
2830 {
2831 	struct stats *stats;
2832 	size_t printed = 0;
2833 	struct syscall *sc;
2834 	struct int_node *inode = intlist__first(ttrace->syscall_stats);
2835 
2836 	if (inode == NULL)
2837 		return 0;
2838 
2839 	printed += fprintf(fp, "\n");
2840 
2841 	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2842 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2843 	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2844 
2845 	/* each int_node is a syscall */
2846 	while (inode) {
2847 		stats = inode->priv;
2848 		if (stats) {
2849 			double min = (double)(stats->min) / NSEC_PER_MSEC;
2850 			double max = (double)(stats->max) / NSEC_PER_MSEC;
2851 			double avg = avg_stats(stats);
2852 			double pct;
2853 			u64 n = (u64) stats->n;
2854 
2855 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2856 			avg /= NSEC_PER_MSEC;
2857 
2858 			sc = &trace->syscalls.table[inode->i];
2859 			printed += fprintf(fp, "   %-15s", sc->name);
2860 			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2861 					   n, avg * n, min, avg);
2862 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2863 		}
2864 
2865 		inode = intlist__next(inode);
2866 	}
2867 
2868 	printed += fprintf(fp, "\n\n");
2869 
2870 	return printed;
2871 }
2872 
2873 /* struct used to pass data to per-thread function */
2874 struct summary_data {
2875 	FILE *fp;
2876 	struct trace *trace;
2877 	size_t printed;
2878 };
2879 
2880 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2881 {
2882 	struct summary_data *data = priv;
2883 	FILE *fp = data->fp;
2884 	size_t printed = data->printed;
2885 	struct trace *trace = data->trace;
2886 	struct thread_trace *ttrace = thread__priv(thread);
2887 	double ratio;
2888 
2889 	if (ttrace == NULL)
2890 		return 0;
2891 
2892 	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2893 
2894 	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2895 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2896 	printed += fprintf(fp, "%.1f%%", ratio);
2897 	if (ttrace->pfmaj)
2898 		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2899 	if (ttrace->pfmin)
2900 		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2901 	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2902 	printed += thread__dump_stats(ttrace, trace, fp);
2903 
2904 	data->printed += printed;
2905 
2906 	return 0;
2907 }
2908 
2909 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2910 {
2911 	struct summary_data data = {
2912 		.fp = fp,
2913 		.trace = trace
2914 	};
2915 	data.printed = trace__fprintf_threads_header(fp);
2916 
2917 	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2918 
2919 	return data.printed;
2920 }
2921 
2922 static int trace__set_duration(const struct option *opt, const char *str,
2923 			       int unset __maybe_unused)
2924 {
2925 	struct trace *trace = opt->value;
2926 
2927 	trace->duration_filter = atof(str);
2928 	return 0;
2929 }
2930 
2931 static int trace__set_filter_pids(const struct option *opt, const char *str,
2932 				  int unset __maybe_unused)
2933 {
2934 	int ret = -1;
2935 	size_t i;
2936 	struct trace *trace = opt->value;
2937 	/*
2938 	 * FIXME: introduce a intarray class, plain parse csv and create a
2939 	 * { int nr, int entries[] } struct...
2940 	 */
2941 	struct intlist *list = intlist__new(str);
2942 
2943 	if (list == NULL)
2944 		return -1;
2945 
2946 	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2947 	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2948 
2949 	if (trace->filter_pids.entries == NULL)
2950 		goto out;
2951 
2952 	trace->filter_pids.entries[0] = getpid();
2953 
2954 	for (i = 1; i < trace->filter_pids.nr; ++i)
2955 		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2956 
2957 	intlist__delete(list);
2958 	ret = 0;
2959 out:
2960 	return ret;
2961 }
2962 
2963 static int trace__open_output(struct trace *trace, const char *filename)
2964 {
2965 	struct stat st;
2966 
2967 	if (!stat(filename, &st) && st.st_size) {
2968 		char oldname[PATH_MAX];
2969 
2970 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2971 		unlink(oldname);
2972 		rename(filename, oldname);
2973 	}
2974 
2975 	trace->output = fopen(filename, "w");
2976 
2977 	return trace->output == NULL ? -errno : 0;
2978 }
2979 
2980 static int parse_pagefaults(const struct option *opt, const char *str,
2981 			    int unset __maybe_unused)
2982 {
2983 	int *trace_pgfaults = opt->value;
2984 
2985 	if (strcmp(str, "all") == 0)
2986 		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2987 	else if (strcmp(str, "maj") == 0)
2988 		*trace_pgfaults |= TRACE_PFMAJ;
2989 	else if (strcmp(str, "min") == 0)
2990 		*trace_pgfaults |= TRACE_PFMIN;
2991 	else
2992 		return -1;
2993 
2994 	return 0;
2995 }
2996 
2997 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2998 {
2999 	struct perf_evsel *evsel;
3000 
3001 	evlist__for_each(evlist, evsel)
3002 		evsel->handler = handler;
3003 }
3004 
3005 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3006 {
3007 	const char *trace_usage[] = {
3008 		"perf trace [<options>] [<command>]",
3009 		"perf trace [<options>] -- <command> [<options>]",
3010 		"perf trace record [<options>] [<command>]",
3011 		"perf trace record [<options>] -- <command> [<options>]",
3012 		NULL
3013 	};
3014 	struct trace trace = {
3015 		.audit = {
3016 			.machine = audit_detect_machine(),
3017 			.open_id = audit_name_to_syscall("open", trace.audit.machine),
3018 		},
3019 		.syscalls = {
3020 			. max = -1,
3021 		},
3022 		.opts = {
3023 			.target = {
3024 				.uid	   = UINT_MAX,
3025 				.uses_mmap = true,
3026 			},
3027 			.user_freq     = UINT_MAX,
3028 			.user_interval = ULLONG_MAX,
3029 			.no_buffering  = true,
3030 			.mmap_pages    = UINT_MAX,
3031 			.proc_map_timeout  = 500,
3032 		},
3033 		.output = stderr,
3034 		.show_comm = true,
3035 		.trace_syscalls = true,
3036 	};
3037 	const char *output_name = NULL;
3038 	const char *ev_qualifier_str = NULL;
3039 	const struct option trace_options[] = {
3040 	OPT_CALLBACK(0, "event", &trace.evlist, "event",
3041 		     "event selector. use 'perf list' to list available events",
3042 		     parse_events_option),
3043 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
3044 		    "show the thread COMM next to its id"),
3045 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3046 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3047 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
3048 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3049 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3050 		    "trace events on existing process id"),
3051 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3052 		    "trace events on existing thread id"),
3053 	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3054 		     "pids to filter (by the kernel)", trace__set_filter_pids),
3055 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3056 		    "system-wide collection from all CPUs"),
3057 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3058 		    "list of cpus to monitor"),
3059 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3060 		    "child tasks do not inherit counters"),
3061 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3062 		     "number of mmap data pages",
3063 		     perf_evlist__parse_mmap_pages),
3064 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3065 		   "user to profile"),
3066 	OPT_CALLBACK(0, "duration", &trace, "float",
3067 		     "show only events with duration > N.M ms",
3068 		     trace__set_duration),
3069 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3070 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3071 	OPT_BOOLEAN('T', "time", &trace.full_time,
3072 		    "Show full timestamp, not time relative to first start"),
3073 	OPT_BOOLEAN('s', "summary", &trace.summary_only,
3074 		    "Show only syscall summary with statistics"),
3075 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
3076 		    "Show all syscalls and summary with statistics"),
3077 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3078 		     "Trace pagefaults", parse_pagefaults, "maj"),
3079 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3080 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3081 	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3082 			"per thread proc mmap processing timeout in ms"),
3083 	OPT_END()
3084 	};
3085 	const char * const trace_subcommands[] = { "record", NULL };
3086 	int err;
3087 	char bf[BUFSIZ];
3088 
3089 	signal(SIGSEGV, sighandler_dump_stack);
3090 	signal(SIGFPE, sighandler_dump_stack);
3091 
3092 	trace.evlist = perf_evlist__new();
3093 
3094 	if (trace.evlist == NULL) {
3095 		pr_err("Not enough memory to run!\n");
3096 		err = -ENOMEM;
3097 		goto out;
3098 	}
3099 
3100 	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3101 				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3102 
3103 	if (trace.trace_pgfaults) {
3104 		trace.opts.sample_address = true;
3105 		trace.opts.sample_time = true;
3106 	}
3107 
3108 	if (trace.evlist->nr_entries > 0)
3109 		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3110 
3111 	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3112 		return trace__record(&trace, argc-1, &argv[1]);
3113 
3114 	/* summary_only implies summary option, but don't overwrite summary if set */
3115 	if (trace.summary_only)
3116 		trace.summary = trace.summary_only;
3117 
3118 	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3119 	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
3120 		pr_err("Please specify something to trace.\n");
3121 		return -1;
3122 	}
3123 
3124 	if (output_name != NULL) {
3125 		err = trace__open_output(&trace, output_name);
3126 		if (err < 0) {
3127 			perror("failed to create output file");
3128 			goto out;
3129 		}
3130 	}
3131 
3132 	if (ev_qualifier_str != NULL) {
3133 		const char *s = ev_qualifier_str;
3134 		struct strlist_config slist_config = {
3135 			.dirname = system_path(STRACE_GROUPS_DIR),
3136 		};
3137 
3138 		trace.not_ev_qualifier = *s == '!';
3139 		if (trace.not_ev_qualifier)
3140 			++s;
3141 		trace.ev_qualifier = strlist__new(s, &slist_config);
3142 		if (trace.ev_qualifier == NULL) {
3143 			fputs("Not enough memory to parse event qualifier",
3144 			      trace.output);
3145 			err = -ENOMEM;
3146 			goto out_close;
3147 		}
3148 
3149 		err = trace__validate_ev_qualifier(&trace);
3150 		if (err)
3151 			goto out_close;
3152 	}
3153 
3154 	err = target__validate(&trace.opts.target);
3155 	if (err) {
3156 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3157 		fprintf(trace.output, "%s", bf);
3158 		goto out_close;
3159 	}
3160 
3161 	err = target__parse_uid(&trace.opts.target);
3162 	if (err) {
3163 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3164 		fprintf(trace.output, "%s", bf);
3165 		goto out_close;
3166 	}
3167 
3168 	if (!argc && target__none(&trace.opts.target))
3169 		trace.opts.target.system_wide = true;
3170 
3171 	if (input_name)
3172 		err = trace__replay(&trace);
3173 	else
3174 		err = trace__run(&trace, argc, argv);
3175 
3176 out_close:
3177 	if (output_name != NULL)
3178 		fclose(trace.output);
3179 out:
3180 	return err;
3181 }
3182