xref: /openbmc/linux/tools/perf/builtin-trace.c (revision c819e2cf)
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16 
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22 
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK		0x20000
26 #endif
27 
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON		100
30 #endif
31 
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE		12
34 #endif
35 
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE	13
38 #endif
39 
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE		1
42 #endif
43 
44 struct tp_field {
45 	int offset;
46 	union {
47 		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48 		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49 	};
50 };
51 
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55 	return *(u##bits *)(sample->raw_data + field->offset); \
56 }
57 
58 TP_UINT_FIELD(8);
59 TP_UINT_FIELD(16);
60 TP_UINT_FIELD(32);
61 TP_UINT_FIELD(64);
62 
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
65 { \
66 	u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67 	return bswap_##bits(value);\
68 }
69 
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
73 
74 static int tp_field__init_uint(struct tp_field *field,
75 			       struct format_field *format_field,
76 			       bool needs_swap)
77 {
78 	field->offset = format_field->offset;
79 
80 	switch (format_field->size) {
81 	case 1:
82 		field->integer = tp_field__u8;
83 		break;
84 	case 2:
85 		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
86 		break;
87 	case 4:
88 		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
89 		break;
90 	case 8:
91 		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
92 		break;
93 	default:
94 		return -1;
95 	}
96 
97 	return 0;
98 }
99 
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
101 {
102 	return sample->raw_data + field->offset;
103 }
104 
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
106 {
107 	field->offset = format_field->offset;
108 	field->pointer = tp_field__ptr;
109 	return 0;
110 }
111 
112 struct syscall_tp {
113 	struct tp_field id;
114 	union {
115 		struct tp_field args, ret;
116 	};
117 };
118 
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120 					  struct tp_field *field,
121 					  const char *name)
122 {
123 	struct format_field *format_field = perf_evsel__field(evsel, name);
124 
125 	if (format_field == NULL)
126 		return -1;
127 
128 	return tp_field__init_uint(field, format_field, evsel->needs_swap);
129 }
130 
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132 	({ struct syscall_tp *sc = evsel->priv;\
133 	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
134 
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136 					 struct tp_field *field,
137 					 const char *name)
138 {
139 	struct format_field *format_field = perf_evsel__field(evsel, name);
140 
141 	if (format_field == NULL)
142 		return -1;
143 
144 	return tp_field__init_ptr(field, format_field);
145 }
146 
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148 	({ struct syscall_tp *sc = evsel->priv;\
149 	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
150 
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
152 {
153 	zfree(&evsel->priv);
154 	perf_evsel__delete(evsel);
155 }
156 
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
158 {
159 	evsel->priv = malloc(sizeof(struct syscall_tp));
160 	if (evsel->priv != NULL) {
161 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
162 			goto out_delete;
163 
164 		evsel->handler = handler;
165 		return 0;
166 	}
167 
168 	return -ENOMEM;
169 
170 out_delete:
171 	zfree(&evsel->priv);
172 	return -ENOENT;
173 }
174 
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
176 {
177 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
178 
179 	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
180 	if (evsel == NULL)
181 		evsel = perf_evsel__newtp("syscalls", direction);
182 
183 	if (evsel) {
184 		if (perf_evsel__init_syscall_tp(evsel, handler))
185 			goto out_delete;
186 	}
187 
188 	return evsel;
189 
190 out_delete:
191 	perf_evsel__delete_priv(evsel);
192 	return NULL;
193 }
194 
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196 	({ struct syscall_tp *fields = evsel->priv; \
197 	   fields->name.integer(&fields->name, sample); })
198 
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200 	({ struct syscall_tp *fields = evsel->priv; \
201 	   fields->name.pointer(&fields->name, sample); })
202 
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204 					  void *sys_enter_handler,
205 					  void *sys_exit_handler)
206 {
207 	int ret = -1;
208 	struct perf_evsel *sys_enter, *sys_exit;
209 
210 	sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211 	if (sys_enter == NULL)
212 		goto out;
213 
214 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215 		goto out_delete_sys_enter;
216 
217 	sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218 	if (sys_exit == NULL)
219 		goto out_delete_sys_enter;
220 
221 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222 		goto out_delete_sys_exit;
223 
224 	perf_evlist__add(evlist, sys_enter);
225 	perf_evlist__add(evlist, sys_exit);
226 
227 	ret = 0;
228 out:
229 	return ret;
230 
231 out_delete_sys_exit:
232 	perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234 	perf_evsel__delete_priv(sys_enter);
235 	goto out;
236 }
237 
238 
239 struct syscall_arg {
240 	unsigned long val;
241 	struct thread *thread;
242 	struct trace  *trace;
243 	void	      *parm;
244 	u8	      idx;
245 	u8	      mask;
246 };
247 
248 struct strarray {
249 	int	    offset;
250 	int	    nr_entries;
251 	const char **entries;
252 };
253 
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255 	.nr_entries = ARRAY_SIZE(array), \
256 	.entries = array, \
257 }
258 
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
260 	.offset	    = off, \
261 	.nr_entries = ARRAY_SIZE(array), \
262 	.entries = array, \
263 }
264 
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
266 						const char *intfmt,
267 					        struct syscall_arg *arg)
268 {
269 	struct strarray *sa = arg->parm;
270 	int idx = arg->val - sa->offset;
271 
272 	if (idx < 0 || idx >= sa->nr_entries)
273 		return scnprintf(bf, size, intfmt, arg->val);
274 
275 	return scnprintf(bf, size, "%s", sa->entries[idx]);
276 }
277 
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 					      struct syscall_arg *arg)
280 {
281 	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 }
283 
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
285 
286 #if defined(__i386__) || defined(__x86_64__)
287 /*
288  * FIXME: Make this available to all arches as soon as the ioctl beautifier
289  * 	  gets rewritten to support all arches.
290  */
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292 						 struct syscall_arg *arg)
293 {
294 	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
295 }
296 
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
299 
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301 					struct syscall_arg *arg);
302 
303 #define SCA_FD syscall_arg__scnprintf_fd
304 
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306 					   struct syscall_arg *arg)
307 {
308 	int fd = arg->val;
309 
310 	if (fd == AT_FDCWD)
311 		return scnprintf(bf, size, "CWD");
312 
313 	return syscall_arg__scnprintf_fd(bf, size, arg);
314 }
315 
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
317 
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319 					      struct syscall_arg *arg);
320 
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
322 
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324 					 struct syscall_arg *arg)
325 {
326 	return scnprintf(bf, size, "%#lx", arg->val);
327 }
328 
329 #define SCA_HEX syscall_arg__scnprintf_hex
330 
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332 					       struct syscall_arg *arg)
333 {
334 	int printed = 0, prot = arg->val;
335 
336 	if (prot == PROT_NONE)
337 		return scnprintf(bf, size, "NONE");
338 #define	P_MMAP_PROT(n) \
339 	if (prot & PROT_##n) { \
340 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
341 		prot &= ~PROT_##n; \
342 	}
343 
344 	P_MMAP_PROT(EXEC);
345 	P_MMAP_PROT(READ);
346 	P_MMAP_PROT(WRITE);
347 #ifdef PROT_SEM
348 	P_MMAP_PROT(SEM);
349 #endif
350 	P_MMAP_PROT(GROWSDOWN);
351 	P_MMAP_PROT(GROWSUP);
352 #undef P_MMAP_PROT
353 
354 	if (prot)
355 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
356 
357 	return printed;
358 }
359 
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
361 
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363 						struct syscall_arg *arg)
364 {
365 	int printed = 0, flags = arg->val;
366 
367 #define	P_MMAP_FLAG(n) \
368 	if (flags & MAP_##n) { \
369 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370 		flags &= ~MAP_##n; \
371 	}
372 
373 	P_MMAP_FLAG(SHARED);
374 	P_MMAP_FLAG(PRIVATE);
375 #ifdef MAP_32BIT
376 	P_MMAP_FLAG(32BIT);
377 #endif
378 	P_MMAP_FLAG(ANONYMOUS);
379 	P_MMAP_FLAG(DENYWRITE);
380 	P_MMAP_FLAG(EXECUTABLE);
381 	P_MMAP_FLAG(FILE);
382 	P_MMAP_FLAG(FIXED);
383 	P_MMAP_FLAG(GROWSDOWN);
384 #ifdef MAP_HUGETLB
385 	P_MMAP_FLAG(HUGETLB);
386 #endif
387 	P_MMAP_FLAG(LOCKED);
388 	P_MMAP_FLAG(NONBLOCK);
389 	P_MMAP_FLAG(NORESERVE);
390 	P_MMAP_FLAG(POPULATE);
391 	P_MMAP_FLAG(STACK);
392 #ifdef MAP_UNINITIALIZED
393 	P_MMAP_FLAG(UNINITIALIZED);
394 #endif
395 #undef P_MMAP_FLAG
396 
397 	if (flags)
398 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
399 
400 	return printed;
401 }
402 
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
404 
405 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
406 						  struct syscall_arg *arg)
407 {
408 	int printed = 0, flags = arg->val;
409 
410 #define P_MREMAP_FLAG(n) \
411 	if (flags & MREMAP_##n) { \
412 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
413 		flags &= ~MREMAP_##n; \
414 	}
415 
416 	P_MREMAP_FLAG(MAYMOVE);
417 #ifdef MREMAP_FIXED
418 	P_MREMAP_FLAG(FIXED);
419 #endif
420 #undef P_MREMAP_FLAG
421 
422 	if (flags)
423 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
424 
425 	return printed;
426 }
427 
428 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
429 
430 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
431 						      struct syscall_arg *arg)
432 {
433 	int behavior = arg->val;
434 
435 	switch (behavior) {
436 #define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
437 	P_MADV_BHV(NORMAL);
438 	P_MADV_BHV(RANDOM);
439 	P_MADV_BHV(SEQUENTIAL);
440 	P_MADV_BHV(WILLNEED);
441 	P_MADV_BHV(DONTNEED);
442 	P_MADV_BHV(REMOVE);
443 	P_MADV_BHV(DONTFORK);
444 	P_MADV_BHV(DOFORK);
445 	P_MADV_BHV(HWPOISON);
446 #ifdef MADV_SOFT_OFFLINE
447 	P_MADV_BHV(SOFT_OFFLINE);
448 #endif
449 	P_MADV_BHV(MERGEABLE);
450 	P_MADV_BHV(UNMERGEABLE);
451 #ifdef MADV_HUGEPAGE
452 	P_MADV_BHV(HUGEPAGE);
453 #endif
454 #ifdef MADV_NOHUGEPAGE
455 	P_MADV_BHV(NOHUGEPAGE);
456 #endif
457 #ifdef MADV_DONTDUMP
458 	P_MADV_BHV(DONTDUMP);
459 #endif
460 #ifdef MADV_DODUMP
461 	P_MADV_BHV(DODUMP);
462 #endif
463 #undef P_MADV_PHV
464 	default: break;
465 	}
466 
467 	return scnprintf(bf, size, "%#x", behavior);
468 }
469 
470 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
471 
472 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
473 					   struct syscall_arg *arg)
474 {
475 	int printed = 0, op = arg->val;
476 
477 	if (op == 0)
478 		return scnprintf(bf, size, "NONE");
479 #define	P_CMD(cmd) \
480 	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
481 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
482 		op &= ~LOCK_##cmd; \
483 	}
484 
485 	P_CMD(SH);
486 	P_CMD(EX);
487 	P_CMD(NB);
488 	P_CMD(UN);
489 	P_CMD(MAND);
490 	P_CMD(RW);
491 	P_CMD(READ);
492 	P_CMD(WRITE);
493 #undef P_OP
494 
495 	if (op)
496 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
497 
498 	return printed;
499 }
500 
501 #define SCA_FLOCK syscall_arg__scnprintf_flock
502 
503 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
504 {
505 	enum syscall_futex_args {
506 		SCF_UADDR   = (1 << 0),
507 		SCF_OP	    = (1 << 1),
508 		SCF_VAL	    = (1 << 2),
509 		SCF_TIMEOUT = (1 << 3),
510 		SCF_UADDR2  = (1 << 4),
511 		SCF_VAL3    = (1 << 5),
512 	};
513 	int op = arg->val;
514 	int cmd = op & FUTEX_CMD_MASK;
515 	size_t printed = 0;
516 
517 	switch (cmd) {
518 #define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
519 	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
520 	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
521 	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
522 	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
523 	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
524 	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
525 	P_FUTEX_OP(WAKE_OP);							  break;
526 	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
527 	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
528 	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
529 	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
530 	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
531 	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
532 	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
533 	}
534 
535 	if (op & FUTEX_PRIVATE_FLAG)
536 		printed += scnprintf(bf + printed, size - printed, "|PRIV");
537 
538 	if (op & FUTEX_CLOCK_REALTIME)
539 		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
540 
541 	return printed;
542 }
543 
544 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
545 
546 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
547 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
548 
549 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
550 static DEFINE_STRARRAY(itimers);
551 
552 static const char *whences[] = { "SET", "CUR", "END",
553 #ifdef SEEK_DATA
554 "DATA",
555 #endif
556 #ifdef SEEK_HOLE
557 "HOLE",
558 #endif
559 };
560 static DEFINE_STRARRAY(whences);
561 
562 static const char *fcntl_cmds[] = {
563 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
564 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
565 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
566 	"F_GETOWNER_UIDS",
567 };
568 static DEFINE_STRARRAY(fcntl_cmds);
569 
570 static const char *rlimit_resources[] = {
571 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
572 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
573 	"RTTIME",
574 };
575 static DEFINE_STRARRAY(rlimit_resources);
576 
577 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
578 static DEFINE_STRARRAY(sighow);
579 
580 static const char *clockid[] = {
581 	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
582 	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
583 };
584 static DEFINE_STRARRAY(clockid);
585 
586 static const char *socket_families[] = {
587 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
588 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
589 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
590 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
591 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
592 	"ALG", "NFC", "VSOCK",
593 };
594 static DEFINE_STRARRAY(socket_families);
595 
596 #ifndef SOCK_TYPE_MASK
597 #define SOCK_TYPE_MASK 0xf
598 #endif
599 
600 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
601 						      struct syscall_arg *arg)
602 {
603 	size_t printed;
604 	int type = arg->val,
605 	    flags = type & ~SOCK_TYPE_MASK;
606 
607 	type &= SOCK_TYPE_MASK;
608 	/*
609  	 * Can't use a strarray, MIPS may override for ABI reasons.
610  	 */
611 	switch (type) {
612 #define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
613 	P_SK_TYPE(STREAM);
614 	P_SK_TYPE(DGRAM);
615 	P_SK_TYPE(RAW);
616 	P_SK_TYPE(RDM);
617 	P_SK_TYPE(SEQPACKET);
618 	P_SK_TYPE(DCCP);
619 	P_SK_TYPE(PACKET);
620 #undef P_SK_TYPE
621 	default:
622 		printed = scnprintf(bf, size, "%#x", type);
623 	}
624 
625 #define	P_SK_FLAG(n) \
626 	if (flags & SOCK_##n) { \
627 		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
628 		flags &= ~SOCK_##n; \
629 	}
630 
631 	P_SK_FLAG(CLOEXEC);
632 	P_SK_FLAG(NONBLOCK);
633 #undef P_SK_FLAG
634 
635 	if (flags)
636 		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
637 
638 	return printed;
639 }
640 
641 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
642 
643 #ifndef MSG_PROBE
644 #define MSG_PROBE	     0x10
645 #endif
646 #ifndef MSG_WAITFORONE
647 #define MSG_WAITFORONE	0x10000
648 #endif
649 #ifndef MSG_SENDPAGE_NOTLAST
650 #define MSG_SENDPAGE_NOTLAST 0x20000
651 #endif
652 #ifndef MSG_FASTOPEN
653 #define MSG_FASTOPEN	     0x20000000
654 #endif
655 
656 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
657 					       struct syscall_arg *arg)
658 {
659 	int printed = 0, flags = arg->val;
660 
661 	if (flags == 0)
662 		return scnprintf(bf, size, "NONE");
663 #define	P_MSG_FLAG(n) \
664 	if (flags & MSG_##n) { \
665 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
666 		flags &= ~MSG_##n; \
667 	}
668 
669 	P_MSG_FLAG(OOB);
670 	P_MSG_FLAG(PEEK);
671 	P_MSG_FLAG(DONTROUTE);
672 	P_MSG_FLAG(TRYHARD);
673 	P_MSG_FLAG(CTRUNC);
674 	P_MSG_FLAG(PROBE);
675 	P_MSG_FLAG(TRUNC);
676 	P_MSG_FLAG(DONTWAIT);
677 	P_MSG_FLAG(EOR);
678 	P_MSG_FLAG(WAITALL);
679 	P_MSG_FLAG(FIN);
680 	P_MSG_FLAG(SYN);
681 	P_MSG_FLAG(CONFIRM);
682 	P_MSG_FLAG(RST);
683 	P_MSG_FLAG(ERRQUEUE);
684 	P_MSG_FLAG(NOSIGNAL);
685 	P_MSG_FLAG(MORE);
686 	P_MSG_FLAG(WAITFORONE);
687 	P_MSG_FLAG(SENDPAGE_NOTLAST);
688 	P_MSG_FLAG(FASTOPEN);
689 	P_MSG_FLAG(CMSG_CLOEXEC);
690 #undef P_MSG_FLAG
691 
692 	if (flags)
693 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
694 
695 	return printed;
696 }
697 
698 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
699 
700 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
701 						 struct syscall_arg *arg)
702 {
703 	size_t printed = 0;
704 	int mode = arg->val;
705 
706 	if (mode == F_OK) /* 0 */
707 		return scnprintf(bf, size, "F");
708 #define	P_MODE(n) \
709 	if (mode & n##_OK) { \
710 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
711 		mode &= ~n##_OK; \
712 	}
713 
714 	P_MODE(R);
715 	P_MODE(W);
716 	P_MODE(X);
717 #undef P_MODE
718 
719 	if (mode)
720 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
721 
722 	return printed;
723 }
724 
725 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
726 
727 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
728 					       struct syscall_arg *arg)
729 {
730 	int printed = 0, flags = arg->val;
731 
732 	if (!(flags & O_CREAT))
733 		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
734 
735 	if (flags == 0)
736 		return scnprintf(bf, size, "RDONLY");
737 #define	P_FLAG(n) \
738 	if (flags & O_##n) { \
739 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
740 		flags &= ~O_##n; \
741 	}
742 
743 	P_FLAG(APPEND);
744 	P_FLAG(ASYNC);
745 	P_FLAG(CLOEXEC);
746 	P_FLAG(CREAT);
747 	P_FLAG(DIRECT);
748 	P_FLAG(DIRECTORY);
749 	P_FLAG(EXCL);
750 	P_FLAG(LARGEFILE);
751 	P_FLAG(NOATIME);
752 	P_FLAG(NOCTTY);
753 #ifdef O_NONBLOCK
754 	P_FLAG(NONBLOCK);
755 #elif O_NDELAY
756 	P_FLAG(NDELAY);
757 #endif
758 #ifdef O_PATH
759 	P_FLAG(PATH);
760 #endif
761 	P_FLAG(RDWR);
762 #ifdef O_DSYNC
763 	if ((flags & O_SYNC) == O_SYNC)
764 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
765 	else {
766 		P_FLAG(DSYNC);
767 	}
768 #else
769 	P_FLAG(SYNC);
770 #endif
771 	P_FLAG(TRUNC);
772 	P_FLAG(WRONLY);
773 #undef P_FLAG
774 
775 	if (flags)
776 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
777 
778 	return printed;
779 }
780 
781 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
782 
783 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
784 						   struct syscall_arg *arg)
785 {
786 	int printed = 0, flags = arg->val;
787 
788 	if (flags == 0)
789 		return scnprintf(bf, size, "NONE");
790 #define	P_FLAG(n) \
791 	if (flags & EFD_##n) { \
792 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
793 		flags &= ~EFD_##n; \
794 	}
795 
796 	P_FLAG(SEMAPHORE);
797 	P_FLAG(CLOEXEC);
798 	P_FLAG(NONBLOCK);
799 #undef P_FLAG
800 
801 	if (flags)
802 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
803 
804 	return printed;
805 }
806 
807 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
808 
809 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
810 						struct syscall_arg *arg)
811 {
812 	int printed = 0, flags = arg->val;
813 
814 #define	P_FLAG(n) \
815 	if (flags & O_##n) { \
816 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
817 		flags &= ~O_##n; \
818 	}
819 
820 	P_FLAG(CLOEXEC);
821 	P_FLAG(NONBLOCK);
822 #undef P_FLAG
823 
824 	if (flags)
825 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
826 
827 	return printed;
828 }
829 
830 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
831 
832 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
833 {
834 	int sig = arg->val;
835 
836 	switch (sig) {
837 #define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
838 	P_SIGNUM(HUP);
839 	P_SIGNUM(INT);
840 	P_SIGNUM(QUIT);
841 	P_SIGNUM(ILL);
842 	P_SIGNUM(TRAP);
843 	P_SIGNUM(ABRT);
844 	P_SIGNUM(BUS);
845 	P_SIGNUM(FPE);
846 	P_SIGNUM(KILL);
847 	P_SIGNUM(USR1);
848 	P_SIGNUM(SEGV);
849 	P_SIGNUM(USR2);
850 	P_SIGNUM(PIPE);
851 	P_SIGNUM(ALRM);
852 	P_SIGNUM(TERM);
853 	P_SIGNUM(CHLD);
854 	P_SIGNUM(CONT);
855 	P_SIGNUM(STOP);
856 	P_SIGNUM(TSTP);
857 	P_SIGNUM(TTIN);
858 	P_SIGNUM(TTOU);
859 	P_SIGNUM(URG);
860 	P_SIGNUM(XCPU);
861 	P_SIGNUM(XFSZ);
862 	P_SIGNUM(VTALRM);
863 	P_SIGNUM(PROF);
864 	P_SIGNUM(WINCH);
865 	P_SIGNUM(IO);
866 	P_SIGNUM(PWR);
867 	P_SIGNUM(SYS);
868 #ifdef SIGEMT
869 	P_SIGNUM(EMT);
870 #endif
871 #ifdef SIGSTKFLT
872 	P_SIGNUM(STKFLT);
873 #endif
874 #ifdef SIGSWI
875 	P_SIGNUM(SWI);
876 #endif
877 	default: break;
878 	}
879 
880 	return scnprintf(bf, size, "%#x", sig);
881 }
882 
883 #define SCA_SIGNUM syscall_arg__scnprintf_signum
884 
885 #if defined(__i386__) || defined(__x86_64__)
886 /*
887  * FIXME: Make this available to all arches.
888  */
889 #define TCGETS		0x5401
890 
891 static const char *tioctls[] = {
892 	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
893 	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
894 	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
895 	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
896 	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
897 	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
898 	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
899 	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
900 	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
901 	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
902 	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
903 	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
904 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
905 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
906 	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
907 };
908 
909 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
910 #endif /* defined(__i386__) || defined(__x86_64__) */
911 
912 #define STRARRAY(arg, name, array) \
913 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
914 	  .arg_parm	 = { [arg] = &strarray__##array, }
915 
916 static struct syscall_fmt {
917 	const char *name;
918 	const char *alias;
919 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
920 	void	   *arg_parm[6];
921 	bool	   errmsg;
922 	bool	   timeout;
923 	bool	   hexret;
924 } syscall_fmts[] = {
925 	{ .name	    = "access",	    .errmsg = true,
926 	  .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
927 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
928 	{ .name	    = "brk",	    .hexret = true,
929 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
930 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
931 	{ .name	    = "close",	    .errmsg = true,
932 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
933 	{ .name	    = "connect",    .errmsg = true, },
934 	{ .name	    = "dup",	    .errmsg = true,
935 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
936 	{ .name	    = "dup2",	    .errmsg = true,
937 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
938 	{ .name	    = "dup3",	    .errmsg = true,
939 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940 	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
941 	{ .name	    = "eventfd2",   .errmsg = true,
942 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
943 	{ .name	    = "faccessat",  .errmsg = true,
944 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
945 	{ .name	    = "fadvise64",  .errmsg = true,
946 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
947 	{ .name	    = "fallocate",  .errmsg = true,
948 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
949 	{ .name	    = "fchdir",	    .errmsg = true,
950 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
951 	{ .name	    = "fchmod",	    .errmsg = true,
952 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
953 	{ .name	    = "fchmodat",   .errmsg = true,
954 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
955 	{ .name	    = "fchown",	    .errmsg = true,
956 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
957 	{ .name	    = "fchownat",   .errmsg = true,
958 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
959 	{ .name	    = "fcntl",	    .errmsg = true,
960 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
961 			     [1] = SCA_STRARRAY, /* cmd */ },
962 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
963 	{ .name	    = "fdatasync",  .errmsg = true,
964 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
965 	{ .name	    = "flock",	    .errmsg = true,
966 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
967 			     [1] = SCA_FLOCK, /* cmd */ }, },
968 	{ .name	    = "fsetxattr",  .errmsg = true,
969 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
970 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
971 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
972 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
973 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
974 	{ .name	    = "fstatfs",    .errmsg = true,
975 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
976 	{ .name	    = "fsync",    .errmsg = true,
977 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
978 	{ .name	    = "ftruncate", .errmsg = true,
979 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
980 	{ .name	    = "futex",	    .errmsg = true,
981 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
982 	{ .name	    = "futimesat", .errmsg = true,
983 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
984 	{ .name	    = "getdents",   .errmsg = true,
985 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
986 	{ .name	    = "getdents64", .errmsg = true,
987 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
988 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
989 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
990 	{ .name	    = "ioctl",	    .errmsg = true,
991 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
992 #if defined(__i386__) || defined(__x86_64__)
993 /*
994  * FIXME: Make this available to all arches.
995  */
996 			     [1] = SCA_STRHEXARRAY, /* cmd */
997 			     [2] = SCA_HEX, /* arg */ },
998 	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
999 #else
1000 			     [2] = SCA_HEX, /* arg */ }, },
1001 #endif
1002 	{ .name	    = "kill",	    .errmsg = true,
1003 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1004 	{ .name	    = "linkat",	    .errmsg = true,
1005 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1006 	{ .name	    = "lseek",	    .errmsg = true,
1007 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1008 			     [2] = SCA_STRARRAY, /* whence */ },
1009 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
1010 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
1011 	{ .name     = "madvise",    .errmsg = true,
1012 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
1013 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
1014 	{ .name	    = "mkdirat",    .errmsg = true,
1015 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1016 	{ .name	    = "mknodat",    .errmsg = true,
1017 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1018 	{ .name	    = "mlock",	    .errmsg = true,
1019 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1020 	{ .name	    = "mlockall",   .errmsg = true,
1021 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1022 	{ .name	    = "mmap",	    .hexret = true,
1023 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
1024 			     [2] = SCA_MMAP_PROT, /* prot */
1025 			     [3] = SCA_MMAP_FLAGS, /* flags */
1026 			     [4] = SCA_FD, 	  /* fd */ }, },
1027 	{ .name	    = "mprotect",   .errmsg = true,
1028 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
1029 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
1030 	{ .name	    = "mremap",	    .hexret = true,
1031 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1032 			     [3] = SCA_MREMAP_FLAGS, /* flags */
1033 			     [4] = SCA_HEX, /* new_addr */ }, },
1034 	{ .name	    = "munlock",    .errmsg = true,
1035 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1036 	{ .name	    = "munmap",	    .errmsg = true,
1037 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1038 	{ .name	    = "name_to_handle_at", .errmsg = true,
1039 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1040 	{ .name	    = "newfstatat", .errmsg = true,
1041 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1042 	{ .name	    = "open",	    .errmsg = true,
1043 	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1044 	{ .name	    = "open_by_handle_at", .errmsg = true,
1045 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1047 	{ .name	    = "openat",	    .errmsg = true,
1048 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1050 	{ .name	    = "pipe2",	    .errmsg = true,
1051 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1052 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
1053 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
1054 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
1055 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056 	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
1057 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1058 	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1059 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1060 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061 	{ .name	    = "pwritev",    .errmsg = true,
1062 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063 	{ .name	    = "read",	    .errmsg = true,
1064 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1065 	{ .name	    = "readlinkat", .errmsg = true,
1066 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1067 	{ .name	    = "readv",	    .errmsg = true,
1068 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 	{ .name	    = "recvfrom",   .errmsg = true,
1070 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1071 	{ .name	    = "recvmmsg",   .errmsg = true,
1072 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1073 	{ .name	    = "recvmsg",    .errmsg = true,
1074 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1075 	{ .name	    = "renameat",   .errmsg = true,
1076 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1077 	{ .name	    = "rt_sigaction", .errmsg = true,
1078 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1079 	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1080 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1081 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1082 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1083 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1084 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1085 	{ .name	    = "sendmmsg",    .errmsg = true,
1086 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1087 	{ .name	    = "sendmsg",    .errmsg = true,
1088 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1089 	{ .name	    = "sendto",	    .errmsg = true,
1090 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1091 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1092 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1093 	{ .name	    = "shutdown",   .errmsg = true,
1094 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1095 	{ .name	    = "socket",	    .errmsg = true,
1096 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1097 			     [1] = SCA_SK_TYPE, /* type */ },
1098 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1099 	{ .name	    = "socketpair", .errmsg = true,
1100 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1101 			     [1] = SCA_SK_TYPE, /* type */ },
1102 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1103 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
1104 	{ .name	    = "symlinkat",  .errmsg = true,
1105 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1106 	{ .name	    = "tgkill",	    .errmsg = true,
1107 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1108 	{ .name	    = "tkill",	    .errmsg = true,
1109 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1110 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1111 	{ .name	    = "unlinkat",   .errmsg = true,
1112 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1113 	{ .name	    = "utimensat",  .errmsg = true,
1114 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1115 	{ .name	    = "write",	    .errmsg = true,
1116 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1117 	{ .name	    = "writev",	    .errmsg = true,
1118 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1119 };
1120 
1121 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1122 {
1123 	const struct syscall_fmt *fmt = fmtp;
1124 	return strcmp(name, fmt->name);
1125 }
1126 
1127 static struct syscall_fmt *syscall_fmt__find(const char *name)
1128 {
1129 	const int nmemb = ARRAY_SIZE(syscall_fmts);
1130 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1131 }
1132 
1133 struct syscall {
1134 	struct event_format *tp_format;
1135 	const char	    *name;
1136 	bool		    filtered;
1137 	bool		    is_exit;
1138 	struct syscall_fmt  *fmt;
1139 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1140 	void		    **arg_parm;
1141 };
1142 
1143 static size_t fprintf_duration(unsigned long t, FILE *fp)
1144 {
1145 	double duration = (double)t / NSEC_PER_MSEC;
1146 	size_t printed = fprintf(fp, "(");
1147 
1148 	if (duration >= 1.0)
1149 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1150 	else if (duration >= 0.01)
1151 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1152 	else
1153 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1154 	return printed + fprintf(fp, "): ");
1155 }
1156 
1157 struct thread_trace {
1158 	u64		  entry_time;
1159 	u64		  exit_time;
1160 	bool		  entry_pending;
1161 	unsigned long	  nr_events;
1162 	unsigned long	  pfmaj, pfmin;
1163 	char		  *entry_str;
1164 	double		  runtime_ms;
1165 	struct {
1166 		int	  max;
1167 		char	  **table;
1168 	} paths;
1169 
1170 	struct intlist *syscall_stats;
1171 };
1172 
1173 static struct thread_trace *thread_trace__new(void)
1174 {
1175 	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1176 
1177 	if (ttrace)
1178 		ttrace->paths.max = -1;
1179 
1180 	ttrace->syscall_stats = intlist__new(NULL);
1181 
1182 	return ttrace;
1183 }
1184 
1185 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1186 {
1187 	struct thread_trace *ttrace;
1188 
1189 	if (thread == NULL)
1190 		goto fail;
1191 
1192 	if (thread__priv(thread) == NULL)
1193 		thread__set_priv(thread, thread_trace__new());
1194 
1195 	if (thread__priv(thread) == NULL)
1196 		goto fail;
1197 
1198 	ttrace = thread__priv(thread);
1199 	++ttrace->nr_events;
1200 
1201 	return ttrace;
1202 fail:
1203 	color_fprintf(fp, PERF_COLOR_RED,
1204 		      "WARNING: not enough memory, dropping samples!\n");
1205 	return NULL;
1206 }
1207 
1208 #define TRACE_PFMAJ		(1 << 0)
1209 #define TRACE_PFMIN		(1 << 1)
1210 
1211 struct trace {
1212 	struct perf_tool	tool;
1213 	struct {
1214 		int		machine;
1215 		int		open_id;
1216 	}			audit;
1217 	struct {
1218 		int		max;
1219 		struct syscall  *table;
1220 	} syscalls;
1221 	struct record_opts	opts;
1222 	struct machine		*host;
1223 	u64			base_time;
1224 	FILE			*output;
1225 	unsigned long		nr_events;
1226 	struct strlist		*ev_qualifier;
1227 	const char 		*last_vfs_getname;
1228 	struct intlist		*tid_list;
1229 	struct intlist		*pid_list;
1230 	double			duration_filter;
1231 	double			runtime_ms;
1232 	struct {
1233 		u64		vfs_getname,
1234 				proc_getname;
1235 	} stats;
1236 	bool			not_ev_qualifier;
1237 	bool			live;
1238 	bool			full_time;
1239 	bool			sched;
1240 	bool			multiple_threads;
1241 	bool			summary;
1242 	bool			summary_only;
1243 	bool			show_comm;
1244 	bool			show_tool_stats;
1245 	bool			trace_syscalls;
1246 	int			trace_pgfaults;
1247 };
1248 
1249 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1250 {
1251 	struct thread_trace *ttrace = thread__priv(thread);
1252 
1253 	if (fd > ttrace->paths.max) {
1254 		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1255 
1256 		if (npath == NULL)
1257 			return -1;
1258 
1259 		if (ttrace->paths.max != -1) {
1260 			memset(npath + ttrace->paths.max + 1, 0,
1261 			       (fd - ttrace->paths.max) * sizeof(char *));
1262 		} else {
1263 			memset(npath, 0, (fd + 1) * sizeof(char *));
1264 		}
1265 
1266 		ttrace->paths.table = npath;
1267 		ttrace->paths.max   = fd;
1268 	}
1269 
1270 	ttrace->paths.table[fd] = strdup(pathname);
1271 
1272 	return ttrace->paths.table[fd] != NULL ? 0 : -1;
1273 }
1274 
1275 static int thread__read_fd_path(struct thread *thread, int fd)
1276 {
1277 	char linkname[PATH_MAX], pathname[PATH_MAX];
1278 	struct stat st;
1279 	int ret;
1280 
1281 	if (thread->pid_ == thread->tid) {
1282 		scnprintf(linkname, sizeof(linkname),
1283 			  "/proc/%d/fd/%d", thread->pid_, fd);
1284 	} else {
1285 		scnprintf(linkname, sizeof(linkname),
1286 			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1287 	}
1288 
1289 	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1290 		return -1;
1291 
1292 	ret = readlink(linkname, pathname, sizeof(pathname));
1293 
1294 	if (ret < 0 || ret > st.st_size)
1295 		return -1;
1296 
1297 	pathname[ret] = '\0';
1298 	return trace__set_fd_pathname(thread, fd, pathname);
1299 }
1300 
1301 static const char *thread__fd_path(struct thread *thread, int fd,
1302 				   struct trace *trace)
1303 {
1304 	struct thread_trace *ttrace = thread__priv(thread);
1305 
1306 	if (ttrace == NULL)
1307 		return NULL;
1308 
1309 	if (fd < 0)
1310 		return NULL;
1311 
1312 	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1313 		if (!trace->live)
1314 			return NULL;
1315 		++trace->stats.proc_getname;
1316 		if (thread__read_fd_path(thread, fd))
1317 			return NULL;
1318 	}
1319 
1320 	return ttrace->paths.table[fd];
1321 }
1322 
1323 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1324 					struct syscall_arg *arg)
1325 {
1326 	int fd = arg->val;
1327 	size_t printed = scnprintf(bf, size, "%d", fd);
1328 	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1329 
1330 	if (path)
1331 		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1332 
1333 	return printed;
1334 }
1335 
1336 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1337 					      struct syscall_arg *arg)
1338 {
1339 	int fd = arg->val;
1340 	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1341 	struct thread_trace *ttrace = thread__priv(arg->thread);
1342 
1343 	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1344 		zfree(&ttrace->paths.table[fd]);
1345 
1346 	return printed;
1347 }
1348 
1349 static bool trace__filter_duration(struct trace *trace, double t)
1350 {
1351 	return t < (trace->duration_filter * NSEC_PER_MSEC);
1352 }
1353 
1354 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1355 {
1356 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1357 
1358 	return fprintf(fp, "%10.3f ", ts);
1359 }
1360 
1361 static bool done = false;
1362 static bool interrupted = false;
1363 
1364 static void sig_handler(int sig)
1365 {
1366 	done = true;
1367 	interrupted = sig == SIGINT;
1368 }
1369 
1370 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1371 					u64 duration, u64 tstamp, FILE *fp)
1372 {
1373 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1374 	printed += fprintf_duration(duration, fp);
1375 
1376 	if (trace->multiple_threads) {
1377 		if (trace->show_comm)
1378 			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1379 		printed += fprintf(fp, "%d ", thread->tid);
1380 	}
1381 
1382 	return printed;
1383 }
1384 
1385 static int trace__process_event(struct trace *trace, struct machine *machine,
1386 				union perf_event *event, struct perf_sample *sample)
1387 {
1388 	int ret = 0;
1389 
1390 	switch (event->header.type) {
1391 	case PERF_RECORD_LOST:
1392 		color_fprintf(trace->output, PERF_COLOR_RED,
1393 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1394 		ret = machine__process_lost_event(machine, event, sample);
1395 	default:
1396 		ret = machine__process_event(machine, event, sample);
1397 		break;
1398 	}
1399 
1400 	return ret;
1401 }
1402 
1403 static int trace__tool_process(struct perf_tool *tool,
1404 			       union perf_event *event,
1405 			       struct perf_sample *sample,
1406 			       struct machine *machine)
1407 {
1408 	struct trace *trace = container_of(tool, struct trace, tool);
1409 	return trace__process_event(trace, machine, event, sample);
1410 }
1411 
1412 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1413 {
1414 	int err = symbol__init(NULL);
1415 
1416 	if (err)
1417 		return err;
1418 
1419 	trace->host = machine__new_host();
1420 	if (trace->host == NULL)
1421 		return -ENOMEM;
1422 
1423 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1424 					    evlist->threads, trace__tool_process, false);
1425 	if (err)
1426 		symbol__exit();
1427 
1428 	return err;
1429 }
1430 
1431 static int syscall__set_arg_fmts(struct syscall *sc)
1432 {
1433 	struct format_field *field;
1434 	int idx = 0;
1435 
1436 	sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1437 	if (sc->arg_scnprintf == NULL)
1438 		return -1;
1439 
1440 	if (sc->fmt)
1441 		sc->arg_parm = sc->fmt->arg_parm;
1442 
1443 	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1444 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1445 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1446 		else if (field->flags & FIELD_IS_POINTER)
1447 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1448 		++idx;
1449 	}
1450 
1451 	return 0;
1452 }
1453 
1454 static int trace__read_syscall_info(struct trace *trace, int id)
1455 {
1456 	char tp_name[128];
1457 	struct syscall *sc;
1458 	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1459 
1460 	if (name == NULL)
1461 		return -1;
1462 
1463 	if (id > trace->syscalls.max) {
1464 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1465 
1466 		if (nsyscalls == NULL)
1467 			return -1;
1468 
1469 		if (trace->syscalls.max != -1) {
1470 			memset(nsyscalls + trace->syscalls.max + 1, 0,
1471 			       (id - trace->syscalls.max) * sizeof(*sc));
1472 		} else {
1473 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1474 		}
1475 
1476 		trace->syscalls.table = nsyscalls;
1477 		trace->syscalls.max   = id;
1478 	}
1479 
1480 	sc = trace->syscalls.table + id;
1481 	sc->name = name;
1482 
1483 	if (trace->ev_qualifier) {
1484 		bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1485 
1486 		if (!(in ^ trace->not_ev_qualifier)) {
1487 			sc->filtered = true;
1488 			/*
1489 			 * No need to do read tracepoint information since this will be
1490 			 * filtered out.
1491 			 */
1492 			return 0;
1493 		}
1494 	}
1495 
1496 	sc->fmt  = syscall_fmt__find(sc->name);
1497 
1498 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1499 	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1500 
1501 	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1502 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1503 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1504 	}
1505 
1506 	if (sc->tp_format == NULL)
1507 		return -1;
1508 
1509 	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1510 
1511 	return syscall__set_arg_fmts(sc);
1512 }
1513 
1514 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1515 				      unsigned long *args, struct trace *trace,
1516 				      struct thread *thread)
1517 {
1518 	size_t printed = 0;
1519 
1520 	if (sc->tp_format != NULL) {
1521 		struct format_field *field;
1522 		u8 bit = 1;
1523 		struct syscall_arg arg = {
1524 			.idx	= 0,
1525 			.mask	= 0,
1526 			.trace  = trace,
1527 			.thread = thread,
1528 		};
1529 
1530 		for (field = sc->tp_format->format.fields->next; field;
1531 		     field = field->next, ++arg.idx, bit <<= 1) {
1532 			if (arg.mask & bit)
1533 				continue;
1534 			/*
1535  			 * Suppress this argument if its value is zero and
1536  			 * and we don't have a string associated in an
1537  			 * strarray for it.
1538  			 */
1539 			if (args[arg.idx] == 0 &&
1540 			    !(sc->arg_scnprintf &&
1541 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1542 			      sc->arg_parm[arg.idx]))
1543 				continue;
1544 
1545 			printed += scnprintf(bf + printed, size - printed,
1546 					     "%s%s: ", printed ? ", " : "", field->name);
1547 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1548 				arg.val = args[arg.idx];
1549 				if (sc->arg_parm)
1550 					arg.parm = sc->arg_parm[arg.idx];
1551 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1552 								      size - printed, &arg);
1553 			} else {
1554 				printed += scnprintf(bf + printed, size - printed,
1555 						     "%ld", args[arg.idx]);
1556 			}
1557 		}
1558 	} else {
1559 		int i = 0;
1560 
1561 		while (i < 6) {
1562 			printed += scnprintf(bf + printed, size - printed,
1563 					     "%sarg%d: %ld",
1564 					     printed ? ", " : "", i, args[i]);
1565 			++i;
1566 		}
1567 	}
1568 
1569 	return printed;
1570 }
1571 
1572 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1573 				  union perf_event *event,
1574 				  struct perf_sample *sample);
1575 
1576 static struct syscall *trace__syscall_info(struct trace *trace,
1577 					   struct perf_evsel *evsel, int id)
1578 {
1579 
1580 	if (id < 0) {
1581 
1582 		/*
1583 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1584 		 * before that, leaving at a higher verbosity level till that is
1585 		 * explained. Reproduced with plain ftrace with:
1586 		 *
1587 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1588 		 * grep "NR -1 " /t/trace_pipe
1589 		 *
1590 		 * After generating some load on the machine.
1591  		 */
1592 		if (verbose > 1) {
1593 			static u64 n;
1594 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1595 				id, perf_evsel__name(evsel), ++n);
1596 		}
1597 		return NULL;
1598 	}
1599 
1600 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1601 	    trace__read_syscall_info(trace, id))
1602 		goto out_cant_read;
1603 
1604 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1605 		goto out_cant_read;
1606 
1607 	return &trace->syscalls.table[id];
1608 
1609 out_cant_read:
1610 	if (verbose) {
1611 		fprintf(trace->output, "Problems reading syscall %d", id);
1612 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1613 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1614 		fputs(" information\n", trace->output);
1615 	}
1616 	return NULL;
1617 }
1618 
1619 static void thread__update_stats(struct thread_trace *ttrace,
1620 				 int id, struct perf_sample *sample)
1621 {
1622 	struct int_node *inode;
1623 	struct stats *stats;
1624 	u64 duration = 0;
1625 
1626 	inode = intlist__findnew(ttrace->syscall_stats, id);
1627 	if (inode == NULL)
1628 		return;
1629 
1630 	stats = inode->priv;
1631 	if (stats == NULL) {
1632 		stats = malloc(sizeof(struct stats));
1633 		if (stats == NULL)
1634 			return;
1635 		init_stats(stats);
1636 		inode->priv = stats;
1637 	}
1638 
1639 	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1640 		duration = sample->time - ttrace->entry_time;
1641 
1642 	update_stats(stats, duration);
1643 }
1644 
1645 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1646 			    union perf_event *event __maybe_unused,
1647 			    struct perf_sample *sample)
1648 {
1649 	char *msg;
1650 	void *args;
1651 	size_t printed = 0;
1652 	struct thread *thread;
1653 	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1654 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1655 	struct thread_trace *ttrace;
1656 
1657 	if (sc == NULL)
1658 		return -1;
1659 
1660 	if (sc->filtered)
1661 		return 0;
1662 
1663 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1664 	ttrace = thread__trace(thread, trace->output);
1665 	if (ttrace == NULL)
1666 		return -1;
1667 
1668 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1669 
1670 	if (ttrace->entry_str == NULL) {
1671 		ttrace->entry_str = malloc(1024);
1672 		if (!ttrace->entry_str)
1673 			return -1;
1674 	}
1675 
1676 	ttrace->entry_time = sample->time;
1677 	msg = ttrace->entry_str;
1678 	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1679 
1680 	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1681 					   args, trace, thread);
1682 
1683 	if (sc->is_exit) {
1684 		if (!trace->duration_filter && !trace->summary_only) {
1685 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1686 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1687 		}
1688 	} else
1689 		ttrace->entry_pending = true;
1690 
1691 	return 0;
1692 }
1693 
1694 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1695 			   union perf_event *event __maybe_unused,
1696 			   struct perf_sample *sample)
1697 {
1698 	long ret;
1699 	u64 duration = 0;
1700 	struct thread *thread;
1701 	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1702 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1703 	struct thread_trace *ttrace;
1704 
1705 	if (sc == NULL)
1706 		return -1;
1707 
1708 	if (sc->filtered)
1709 		return 0;
1710 
1711 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1712 	ttrace = thread__trace(thread, trace->output);
1713 	if (ttrace == NULL)
1714 		return -1;
1715 
1716 	if (trace->summary)
1717 		thread__update_stats(ttrace, id, sample);
1718 
1719 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1720 
1721 	if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1722 		trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1723 		trace->last_vfs_getname = NULL;
1724 		++trace->stats.vfs_getname;
1725 	}
1726 
1727 	ttrace->exit_time = sample->time;
1728 
1729 	if (ttrace->entry_time) {
1730 		duration = sample->time - ttrace->entry_time;
1731 		if (trace__filter_duration(trace, duration))
1732 			goto out;
1733 	} else if (trace->duration_filter)
1734 		goto out;
1735 
1736 	if (trace->summary_only)
1737 		goto out;
1738 
1739 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1740 
1741 	if (ttrace->entry_pending) {
1742 		fprintf(trace->output, "%-70s", ttrace->entry_str);
1743 	} else {
1744 		fprintf(trace->output, " ... [");
1745 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1746 		fprintf(trace->output, "]: %s()", sc->name);
1747 	}
1748 
1749 	if (sc->fmt == NULL) {
1750 signed_print:
1751 		fprintf(trace->output, ") = %ld", ret);
1752 	} else if (ret < 0 && sc->fmt->errmsg) {
1753 		char bf[STRERR_BUFSIZE];
1754 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1755 			   *e = audit_errno_to_name(-ret);
1756 
1757 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
1758 	} else if (ret == 0 && sc->fmt->timeout)
1759 		fprintf(trace->output, ") = 0 Timeout");
1760 	else if (sc->fmt->hexret)
1761 		fprintf(trace->output, ") = %#lx", ret);
1762 	else
1763 		goto signed_print;
1764 
1765 	fputc('\n', trace->output);
1766 out:
1767 	ttrace->entry_pending = false;
1768 
1769 	return 0;
1770 }
1771 
1772 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1773 			      union perf_event *event __maybe_unused,
1774 			      struct perf_sample *sample)
1775 {
1776 	trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1777 	return 0;
1778 }
1779 
1780 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1781 				     union perf_event *event __maybe_unused,
1782 				     struct perf_sample *sample)
1783 {
1784         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1785 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1786 	struct thread *thread = machine__findnew_thread(trace->host,
1787 							sample->pid,
1788 							sample->tid);
1789 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
1790 
1791 	if (ttrace == NULL)
1792 		goto out_dump;
1793 
1794 	ttrace->runtime_ms += runtime_ms;
1795 	trace->runtime_ms += runtime_ms;
1796 	return 0;
1797 
1798 out_dump:
1799 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1800 	       evsel->name,
1801 	       perf_evsel__strval(evsel, sample, "comm"),
1802 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1803 	       runtime,
1804 	       perf_evsel__intval(evsel, sample, "vruntime"));
1805 	return 0;
1806 }
1807 
1808 static void print_location(FILE *f, struct perf_sample *sample,
1809 			   struct addr_location *al,
1810 			   bool print_dso, bool print_sym)
1811 {
1812 
1813 	if ((verbose || print_dso) && al->map)
1814 		fprintf(f, "%s@", al->map->dso->long_name);
1815 
1816 	if ((verbose || print_sym) && al->sym)
1817 		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1818 			al->addr - al->sym->start);
1819 	else if (al->map)
1820 		fprintf(f, "0x%" PRIx64, al->addr);
1821 	else
1822 		fprintf(f, "0x%" PRIx64, sample->addr);
1823 }
1824 
1825 static int trace__pgfault(struct trace *trace,
1826 			  struct perf_evsel *evsel,
1827 			  union perf_event *event,
1828 			  struct perf_sample *sample)
1829 {
1830 	struct thread *thread;
1831 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1832 	struct addr_location al;
1833 	char map_type = 'd';
1834 	struct thread_trace *ttrace;
1835 
1836 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1837 	ttrace = thread__trace(thread, trace->output);
1838 	if (ttrace == NULL)
1839 		return -1;
1840 
1841 	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1842 		ttrace->pfmaj++;
1843 	else
1844 		ttrace->pfmin++;
1845 
1846 	if (trace->summary_only)
1847 		return 0;
1848 
1849 	thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1850 			      sample->ip, &al);
1851 
1852 	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1853 
1854 	fprintf(trace->output, "%sfault [",
1855 		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1856 		"maj" : "min");
1857 
1858 	print_location(trace->output, sample, &al, false, true);
1859 
1860 	fprintf(trace->output, "] => ");
1861 
1862 	thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
1863 				   sample->addr, &al);
1864 
1865 	if (!al.map) {
1866 		thread__find_addr_location(thread, cpumode,
1867 					   MAP__FUNCTION, sample->addr, &al);
1868 
1869 		if (al.map)
1870 			map_type = 'x';
1871 		else
1872 			map_type = '?';
1873 	}
1874 
1875 	print_location(trace->output, sample, &al, true, false);
1876 
1877 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1878 
1879 	return 0;
1880 }
1881 
1882 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1883 {
1884 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1885 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1886 		return false;
1887 
1888 	if (trace->pid_list || trace->tid_list)
1889 		return true;
1890 
1891 	return false;
1892 }
1893 
1894 static int trace__process_sample(struct perf_tool *tool,
1895 				 union perf_event *event,
1896 				 struct perf_sample *sample,
1897 				 struct perf_evsel *evsel,
1898 				 struct machine *machine __maybe_unused)
1899 {
1900 	struct trace *trace = container_of(tool, struct trace, tool);
1901 	int err = 0;
1902 
1903 	tracepoint_handler handler = evsel->handler;
1904 
1905 	if (skip_sample(trace, sample))
1906 		return 0;
1907 
1908 	if (!trace->full_time && trace->base_time == 0)
1909 		trace->base_time = sample->time;
1910 
1911 	if (handler) {
1912 		++trace->nr_events;
1913 		handler(trace, evsel, event, sample);
1914 	}
1915 
1916 	return err;
1917 }
1918 
1919 static int parse_target_str(struct trace *trace)
1920 {
1921 	if (trace->opts.target.pid) {
1922 		trace->pid_list = intlist__new(trace->opts.target.pid);
1923 		if (trace->pid_list == NULL) {
1924 			pr_err("Error parsing process id string\n");
1925 			return -EINVAL;
1926 		}
1927 	}
1928 
1929 	if (trace->opts.target.tid) {
1930 		trace->tid_list = intlist__new(trace->opts.target.tid);
1931 		if (trace->tid_list == NULL) {
1932 			pr_err("Error parsing thread id string\n");
1933 			return -EINVAL;
1934 		}
1935 	}
1936 
1937 	return 0;
1938 }
1939 
1940 static int trace__record(struct trace *trace, int argc, const char **argv)
1941 {
1942 	unsigned int rec_argc, i, j;
1943 	const char **rec_argv;
1944 	const char * const record_args[] = {
1945 		"record",
1946 		"-R",
1947 		"-m", "1024",
1948 		"-c", "1",
1949 	};
1950 
1951 	const char * const sc_args[] = { "-e", };
1952 	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1953 	const char * const majpf_args[] = { "-e", "major-faults" };
1954 	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1955 	const char * const minpf_args[] = { "-e", "minor-faults" };
1956 	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1957 
1958 	/* +1 is for the event string below */
1959 	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1960 		majpf_args_nr + minpf_args_nr + argc;
1961 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1962 
1963 	if (rec_argv == NULL)
1964 		return -ENOMEM;
1965 
1966 	j = 0;
1967 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1968 		rec_argv[j++] = record_args[i];
1969 
1970 	if (trace->trace_syscalls) {
1971 		for (i = 0; i < sc_args_nr; i++)
1972 			rec_argv[j++] = sc_args[i];
1973 
1974 		/* event string may be different for older kernels - e.g., RHEL6 */
1975 		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1976 			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1977 		else if (is_valid_tracepoint("syscalls:sys_enter"))
1978 			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
1979 		else {
1980 			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1981 			return -1;
1982 		}
1983 	}
1984 
1985 	if (trace->trace_pgfaults & TRACE_PFMAJ)
1986 		for (i = 0; i < majpf_args_nr; i++)
1987 			rec_argv[j++] = majpf_args[i];
1988 
1989 	if (trace->trace_pgfaults & TRACE_PFMIN)
1990 		for (i = 0; i < minpf_args_nr; i++)
1991 			rec_argv[j++] = minpf_args[i];
1992 
1993 	for (i = 0; i < (unsigned int)argc; i++)
1994 		rec_argv[j++] = argv[i];
1995 
1996 	return cmd_record(j, rec_argv, NULL);
1997 }
1998 
1999 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2000 
2001 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2002 {
2003 	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2004 	if (evsel == NULL)
2005 		return;
2006 
2007 	if (perf_evsel__field(evsel, "pathname") == NULL) {
2008 		perf_evsel__delete(evsel);
2009 		return;
2010 	}
2011 
2012 	evsel->handler = trace__vfs_getname;
2013 	perf_evlist__add(evlist, evsel);
2014 }
2015 
2016 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2017 				    u64 config)
2018 {
2019 	struct perf_evsel *evsel;
2020 	struct perf_event_attr attr = {
2021 		.type = PERF_TYPE_SOFTWARE,
2022 		.mmap_data = 1,
2023 	};
2024 
2025 	attr.config = config;
2026 	attr.sample_period = 1;
2027 
2028 	event_attr_init(&attr);
2029 
2030 	evsel = perf_evsel__new(&attr);
2031 	if (!evsel)
2032 		return -ENOMEM;
2033 
2034 	evsel->handler = trace__pgfault;
2035 	perf_evlist__add(evlist, evsel);
2036 
2037 	return 0;
2038 }
2039 
2040 static int trace__run(struct trace *trace, int argc, const char **argv)
2041 {
2042 	struct perf_evlist *evlist = perf_evlist__new();
2043 	struct perf_evsel *evsel;
2044 	int err = -1, i;
2045 	unsigned long before;
2046 	const bool forks = argc > 0;
2047 	bool draining = false;
2048 
2049 	trace->live = true;
2050 
2051 	if (evlist == NULL) {
2052 		fprintf(trace->output, "Not enough memory to run!\n");
2053 		goto out;
2054 	}
2055 
2056 	if (trace->trace_syscalls &&
2057 	    perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2058 					   trace__sys_exit))
2059 		goto out_error_raw_syscalls;
2060 
2061 	if (trace->trace_syscalls)
2062 		perf_evlist__add_vfs_getname(evlist);
2063 
2064 	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2065 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2066 		goto out_error_mem;
2067 	}
2068 
2069 	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2070 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2071 		goto out_error_mem;
2072 
2073 	if (trace->sched &&
2074 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2075 				   trace__sched_stat_runtime))
2076 		goto out_error_sched_stat_runtime;
2077 
2078 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2079 	if (err < 0) {
2080 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2081 		goto out_delete_evlist;
2082 	}
2083 
2084 	err = trace__symbols_init(trace, evlist);
2085 	if (err < 0) {
2086 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2087 		goto out_delete_evlist;
2088 	}
2089 
2090 	perf_evlist__config(evlist, &trace->opts);
2091 
2092 	signal(SIGCHLD, sig_handler);
2093 	signal(SIGINT, sig_handler);
2094 
2095 	if (forks) {
2096 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2097 						    argv, false, NULL);
2098 		if (err < 0) {
2099 			fprintf(trace->output, "Couldn't run the workload!\n");
2100 			goto out_delete_evlist;
2101 		}
2102 	}
2103 
2104 	err = perf_evlist__open(evlist);
2105 	if (err < 0)
2106 		goto out_error_open;
2107 
2108 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2109 	if (err < 0)
2110 		goto out_error_mmap;
2111 
2112 	if (forks)
2113 		perf_evlist__start_workload(evlist);
2114 	else
2115 		perf_evlist__enable(evlist);
2116 
2117 	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
2118 again:
2119 	before = trace->nr_events;
2120 
2121 	for (i = 0; i < evlist->nr_mmaps; i++) {
2122 		union perf_event *event;
2123 
2124 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2125 			const u32 type = event->header.type;
2126 			tracepoint_handler handler;
2127 			struct perf_sample sample;
2128 
2129 			++trace->nr_events;
2130 
2131 			err = perf_evlist__parse_sample(evlist, event, &sample);
2132 			if (err) {
2133 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2134 				goto next_event;
2135 			}
2136 
2137 			if (!trace->full_time && trace->base_time == 0)
2138 				trace->base_time = sample.time;
2139 
2140 			if (type != PERF_RECORD_SAMPLE) {
2141 				trace__process_event(trace, trace->host, event, &sample);
2142 				continue;
2143 			}
2144 
2145 			evsel = perf_evlist__id2evsel(evlist, sample.id);
2146 			if (evsel == NULL) {
2147 				fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
2148 				goto next_event;
2149 			}
2150 
2151 			if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2152 			    sample.raw_data == NULL) {
2153 				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2154 				       perf_evsel__name(evsel), sample.tid,
2155 				       sample.cpu, sample.raw_size);
2156 				goto next_event;
2157 			}
2158 
2159 			handler = evsel->handler;
2160 			handler(trace, evsel, event, &sample);
2161 next_event:
2162 			perf_evlist__mmap_consume(evlist, i);
2163 
2164 			if (interrupted)
2165 				goto out_disable;
2166 		}
2167 	}
2168 
2169 	if (trace->nr_events == before) {
2170 		int timeout = done ? 100 : -1;
2171 
2172 		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2173 			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2174 				draining = true;
2175 
2176 			goto again;
2177 		}
2178 	} else {
2179 		goto again;
2180 	}
2181 
2182 out_disable:
2183 	perf_evlist__disable(evlist);
2184 
2185 	if (!err) {
2186 		if (trace->summary)
2187 			trace__fprintf_thread_summary(trace, trace->output);
2188 
2189 		if (trace->show_tool_stats) {
2190 			fprintf(trace->output, "Stats:\n "
2191 					       " vfs_getname : %" PRIu64 "\n"
2192 					       " proc_getname: %" PRIu64 "\n",
2193 				trace->stats.vfs_getname,
2194 				trace->stats.proc_getname);
2195 		}
2196 	}
2197 
2198 out_delete_evlist:
2199 	perf_evlist__delete(evlist);
2200 out:
2201 	trace->live = false;
2202 	return err;
2203 {
2204 	char errbuf[BUFSIZ];
2205 
2206 out_error_sched_stat_runtime:
2207 	debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2208 	goto out_error;
2209 
2210 out_error_raw_syscalls:
2211 	debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2212 	goto out_error;
2213 
2214 out_error_mmap:
2215 	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2216 	goto out_error;
2217 
2218 out_error_open:
2219 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2220 
2221 out_error:
2222 	fprintf(trace->output, "%s\n", errbuf);
2223 	goto out_delete_evlist;
2224 }
2225 out_error_mem:
2226 	fprintf(trace->output, "Not enough memory to run!\n");
2227 	goto out_delete_evlist;
2228 }
2229 
2230 static int trace__replay(struct trace *trace)
2231 {
2232 	const struct perf_evsel_str_handler handlers[] = {
2233 		{ "probe:vfs_getname",	     trace__vfs_getname, },
2234 	};
2235 	struct perf_data_file file = {
2236 		.path  = input_name,
2237 		.mode  = PERF_DATA_MODE_READ,
2238 	};
2239 	struct perf_session *session;
2240 	struct perf_evsel *evsel;
2241 	int err = -1;
2242 
2243 	trace->tool.sample	  = trace__process_sample;
2244 	trace->tool.mmap	  = perf_event__process_mmap;
2245 	trace->tool.mmap2	  = perf_event__process_mmap2;
2246 	trace->tool.comm	  = perf_event__process_comm;
2247 	trace->tool.exit	  = perf_event__process_exit;
2248 	trace->tool.fork	  = perf_event__process_fork;
2249 	trace->tool.attr	  = perf_event__process_attr;
2250 	trace->tool.tracing_data = perf_event__process_tracing_data;
2251 	trace->tool.build_id	  = perf_event__process_build_id;
2252 
2253 	trace->tool.ordered_events = true;
2254 	trace->tool.ordering_requires_timestamps = true;
2255 
2256 	/* add tid to output */
2257 	trace->multiple_threads = true;
2258 
2259 	session = perf_session__new(&file, false, &trace->tool);
2260 	if (session == NULL)
2261 		return -1;
2262 
2263 	if (symbol__init(&session->header.env) < 0)
2264 		goto out;
2265 
2266 	trace->host = &session->machines.host;
2267 
2268 	err = perf_session__set_tracepoints_handlers(session, handlers);
2269 	if (err)
2270 		goto out;
2271 
2272 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2273 						     "raw_syscalls:sys_enter");
2274 	/* older kernels have syscalls tp versus raw_syscalls */
2275 	if (evsel == NULL)
2276 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2277 							     "syscalls:sys_enter");
2278 
2279 	if (evsel &&
2280 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2281 	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2282 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2283 		goto out;
2284 	}
2285 
2286 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2287 						     "raw_syscalls:sys_exit");
2288 	if (evsel == NULL)
2289 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2290 							     "syscalls:sys_exit");
2291 	if (evsel &&
2292 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2293 	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2294 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2295 		goto out;
2296 	}
2297 
2298 	evlist__for_each(session->evlist, evsel) {
2299 		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2300 		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2301 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2302 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2303 			evsel->handler = trace__pgfault;
2304 	}
2305 
2306 	err = parse_target_str(trace);
2307 	if (err != 0)
2308 		goto out;
2309 
2310 	setup_pager();
2311 
2312 	err = perf_session__process_events(session, &trace->tool);
2313 	if (err)
2314 		pr_err("Failed to process events, error %d", err);
2315 
2316 	else if (trace->summary)
2317 		trace__fprintf_thread_summary(trace, trace->output);
2318 
2319 out:
2320 	perf_session__delete(session);
2321 
2322 	return err;
2323 }
2324 
2325 static size_t trace__fprintf_threads_header(FILE *fp)
2326 {
2327 	size_t printed;
2328 
2329 	printed  = fprintf(fp, "\n Summary of events:\n\n");
2330 
2331 	return printed;
2332 }
2333 
2334 static size_t thread__dump_stats(struct thread_trace *ttrace,
2335 				 struct trace *trace, FILE *fp)
2336 {
2337 	struct stats *stats;
2338 	size_t printed = 0;
2339 	struct syscall *sc;
2340 	struct int_node *inode = intlist__first(ttrace->syscall_stats);
2341 
2342 	if (inode == NULL)
2343 		return 0;
2344 
2345 	printed += fprintf(fp, "\n");
2346 
2347 	printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2348 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2349 	printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2350 
2351 	/* each int_node is a syscall */
2352 	while (inode) {
2353 		stats = inode->priv;
2354 		if (stats) {
2355 			double min = (double)(stats->min) / NSEC_PER_MSEC;
2356 			double max = (double)(stats->max) / NSEC_PER_MSEC;
2357 			double avg = avg_stats(stats);
2358 			double pct;
2359 			u64 n = (u64) stats->n;
2360 
2361 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2362 			avg /= NSEC_PER_MSEC;
2363 
2364 			sc = &trace->syscalls.table[inode->i];
2365 			printed += fprintf(fp, "   %-15s", sc->name);
2366 			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2367 					   n, min, avg);
2368 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2369 		}
2370 
2371 		inode = intlist__next(inode);
2372 	}
2373 
2374 	printed += fprintf(fp, "\n\n");
2375 
2376 	return printed;
2377 }
2378 
2379 /* struct used to pass data to per-thread function */
2380 struct summary_data {
2381 	FILE *fp;
2382 	struct trace *trace;
2383 	size_t printed;
2384 };
2385 
2386 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2387 {
2388 	struct summary_data *data = priv;
2389 	FILE *fp = data->fp;
2390 	size_t printed = data->printed;
2391 	struct trace *trace = data->trace;
2392 	struct thread_trace *ttrace = thread__priv(thread);
2393 	double ratio;
2394 
2395 	if (ttrace == NULL)
2396 		return 0;
2397 
2398 	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2399 
2400 	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2401 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2402 	printed += fprintf(fp, "%.1f%%", ratio);
2403 	if (ttrace->pfmaj)
2404 		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2405 	if (ttrace->pfmin)
2406 		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2407 	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2408 	printed += thread__dump_stats(ttrace, trace, fp);
2409 
2410 	data->printed += printed;
2411 
2412 	return 0;
2413 }
2414 
2415 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2416 {
2417 	struct summary_data data = {
2418 		.fp = fp,
2419 		.trace = trace
2420 	};
2421 	data.printed = trace__fprintf_threads_header(fp);
2422 
2423 	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2424 
2425 	return data.printed;
2426 }
2427 
2428 static int trace__set_duration(const struct option *opt, const char *str,
2429 			       int unset __maybe_unused)
2430 {
2431 	struct trace *trace = opt->value;
2432 
2433 	trace->duration_filter = atof(str);
2434 	return 0;
2435 }
2436 
2437 static int trace__open_output(struct trace *trace, const char *filename)
2438 {
2439 	struct stat st;
2440 
2441 	if (!stat(filename, &st) && st.st_size) {
2442 		char oldname[PATH_MAX];
2443 
2444 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2445 		unlink(oldname);
2446 		rename(filename, oldname);
2447 	}
2448 
2449 	trace->output = fopen(filename, "w");
2450 
2451 	return trace->output == NULL ? -errno : 0;
2452 }
2453 
2454 static int parse_pagefaults(const struct option *opt, const char *str,
2455 			    int unset __maybe_unused)
2456 {
2457 	int *trace_pgfaults = opt->value;
2458 
2459 	if (strcmp(str, "all") == 0)
2460 		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2461 	else if (strcmp(str, "maj") == 0)
2462 		*trace_pgfaults |= TRACE_PFMAJ;
2463 	else if (strcmp(str, "min") == 0)
2464 		*trace_pgfaults |= TRACE_PFMIN;
2465 	else
2466 		return -1;
2467 
2468 	return 0;
2469 }
2470 
2471 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2472 {
2473 	const char * const trace_usage[] = {
2474 		"perf trace [<options>] [<command>]",
2475 		"perf trace [<options>] -- <command> [<options>]",
2476 		"perf trace record [<options>] [<command>]",
2477 		"perf trace record [<options>] -- <command> [<options>]",
2478 		NULL
2479 	};
2480 	struct trace trace = {
2481 		.audit = {
2482 			.machine = audit_detect_machine(),
2483 			.open_id = audit_name_to_syscall("open", trace.audit.machine),
2484 		},
2485 		.syscalls = {
2486 			. max = -1,
2487 		},
2488 		.opts = {
2489 			.target = {
2490 				.uid	   = UINT_MAX,
2491 				.uses_mmap = true,
2492 			},
2493 			.user_freq     = UINT_MAX,
2494 			.user_interval = ULLONG_MAX,
2495 			.no_buffering  = true,
2496 			.mmap_pages    = UINT_MAX,
2497 		},
2498 		.output = stdout,
2499 		.show_comm = true,
2500 		.trace_syscalls = true,
2501 	};
2502 	const char *output_name = NULL;
2503 	const char *ev_qualifier_str = NULL;
2504 	const struct option trace_options[] = {
2505 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
2506 		    "show the thread COMM next to its id"),
2507 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2508 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2509 		    "list of events to trace"),
2510 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
2511 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2512 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2513 		    "trace events on existing process id"),
2514 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2515 		    "trace events on existing thread id"),
2516 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2517 		    "system-wide collection from all CPUs"),
2518 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2519 		    "list of cpus to monitor"),
2520 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2521 		    "child tasks do not inherit counters"),
2522 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2523 		     "number of mmap data pages",
2524 		     perf_evlist__parse_mmap_pages),
2525 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2526 		   "user to profile"),
2527 	OPT_CALLBACK(0, "duration", &trace, "float",
2528 		     "show only events with duration > N.M ms",
2529 		     trace__set_duration),
2530 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2531 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2532 	OPT_BOOLEAN('T', "time", &trace.full_time,
2533 		    "Show full timestamp, not time relative to first start"),
2534 	OPT_BOOLEAN('s', "summary", &trace.summary_only,
2535 		    "Show only syscall summary with statistics"),
2536 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
2537 		    "Show all syscalls and summary with statistics"),
2538 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2539 		     "Trace pagefaults", parse_pagefaults, "maj"),
2540 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2541 	OPT_END()
2542 	};
2543 	int err;
2544 	char bf[BUFSIZ];
2545 
2546 	argc = parse_options(argc, argv, trace_options, trace_usage,
2547 			     PARSE_OPT_STOP_AT_NON_OPTION);
2548 
2549 	if (trace.trace_pgfaults) {
2550 		trace.opts.sample_address = true;
2551 		trace.opts.sample_time = true;
2552 	}
2553 
2554 	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2555 		return trace__record(&trace, argc-1, &argv[1]);
2556 
2557 	/* summary_only implies summary option, but don't overwrite summary if set */
2558 	if (trace.summary_only)
2559 		trace.summary = trace.summary_only;
2560 
2561 	if (!trace.trace_syscalls && !trace.trace_pgfaults) {
2562 		pr_err("Please specify something to trace.\n");
2563 		return -1;
2564 	}
2565 
2566 	if (output_name != NULL) {
2567 		err = trace__open_output(&trace, output_name);
2568 		if (err < 0) {
2569 			perror("failed to create output file");
2570 			goto out;
2571 		}
2572 	}
2573 
2574 	if (ev_qualifier_str != NULL) {
2575 		const char *s = ev_qualifier_str;
2576 
2577 		trace.not_ev_qualifier = *s == '!';
2578 		if (trace.not_ev_qualifier)
2579 			++s;
2580 		trace.ev_qualifier = strlist__new(true, s);
2581 		if (trace.ev_qualifier == NULL) {
2582 			fputs("Not enough memory to parse event qualifier",
2583 			      trace.output);
2584 			err = -ENOMEM;
2585 			goto out_close;
2586 		}
2587 	}
2588 
2589 	err = target__validate(&trace.opts.target);
2590 	if (err) {
2591 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2592 		fprintf(trace.output, "%s", bf);
2593 		goto out_close;
2594 	}
2595 
2596 	err = target__parse_uid(&trace.opts.target);
2597 	if (err) {
2598 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2599 		fprintf(trace.output, "%s", bf);
2600 		goto out_close;
2601 	}
2602 
2603 	if (!argc && target__none(&trace.opts.target))
2604 		trace.opts.target.system_wide = true;
2605 
2606 	if (input_name)
2607 		err = trace__replay(&trace);
2608 	else
2609 		err = trace__run(&trace, argc, argv);
2610 
2611 out_close:
2612 	if (output_name != NULL)
2613 		fclose(trace.output);
2614 out:
2615 	return err;
2616 }
2617