xref: /openbmc/linux/tools/perf/builtin-trace.c (revision 161f4089)
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 
15 #include <libaudit.h>
16 #include <stdlib.h>
17 #include <sys/eventfd.h>
18 #include <sys/mman.h>
19 #include <linux/futex.h>
20 
21 /* For older distros: */
22 #ifndef MAP_STACK
23 # define MAP_STACK		0x20000
24 #endif
25 
26 #ifndef MADV_HWPOISON
27 # define MADV_HWPOISON		100
28 #endif
29 
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE		12
32 #endif
33 
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE	13
36 #endif
37 
38 struct tp_field {
39 	int offset;
40 	union {
41 		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
42 		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
43 	};
44 };
45 
46 #define TP_UINT_FIELD(bits) \
47 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
48 { \
49 	return *(u##bits *)(sample->raw_data + field->offset); \
50 }
51 
52 TP_UINT_FIELD(8);
53 TP_UINT_FIELD(16);
54 TP_UINT_FIELD(32);
55 TP_UINT_FIELD(64);
56 
57 #define TP_UINT_FIELD__SWAPPED(bits) \
58 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
59 { \
60 	u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
61 	return bswap_##bits(value);\
62 }
63 
64 TP_UINT_FIELD__SWAPPED(16);
65 TP_UINT_FIELD__SWAPPED(32);
66 TP_UINT_FIELD__SWAPPED(64);
67 
68 static int tp_field__init_uint(struct tp_field *field,
69 			       struct format_field *format_field,
70 			       bool needs_swap)
71 {
72 	field->offset = format_field->offset;
73 
74 	switch (format_field->size) {
75 	case 1:
76 		field->integer = tp_field__u8;
77 		break;
78 	case 2:
79 		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
80 		break;
81 	case 4:
82 		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
83 		break;
84 	case 8:
85 		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
86 		break;
87 	default:
88 		return -1;
89 	}
90 
91 	return 0;
92 }
93 
94 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
95 {
96 	return sample->raw_data + field->offset;
97 }
98 
99 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
100 {
101 	field->offset = format_field->offset;
102 	field->pointer = tp_field__ptr;
103 	return 0;
104 }
105 
106 struct syscall_tp {
107 	struct tp_field id;
108 	union {
109 		struct tp_field args, ret;
110 	};
111 };
112 
113 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
114 					  struct tp_field *field,
115 					  const char *name)
116 {
117 	struct format_field *format_field = perf_evsel__field(evsel, name);
118 
119 	if (format_field == NULL)
120 		return -1;
121 
122 	return tp_field__init_uint(field, format_field, evsel->needs_swap);
123 }
124 
125 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
126 	({ struct syscall_tp *sc = evsel->priv;\
127 	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
128 
129 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
130 					 struct tp_field *field,
131 					 const char *name)
132 {
133 	struct format_field *format_field = perf_evsel__field(evsel, name);
134 
135 	if (format_field == NULL)
136 		return -1;
137 
138 	return tp_field__init_ptr(field, format_field);
139 }
140 
141 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
142 	({ struct syscall_tp *sc = evsel->priv;\
143 	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
144 
145 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
146 {
147 	free(evsel->priv);
148 	evsel->priv = NULL;
149 	perf_evsel__delete(evsel);
150 }
151 
152 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction,
153 						    void *handler, int idx)
154 {
155 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction, idx);
156 
157 	if (evsel) {
158 		evsel->priv = malloc(sizeof(struct syscall_tp));
159 
160 		if (evsel->priv == NULL)
161 			goto out_delete;
162 
163 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
164 			goto out_delete;
165 
166 		evsel->handler = handler;
167 	}
168 
169 	return evsel;
170 
171 out_delete:
172 	perf_evsel__delete_priv(evsel);
173 	return NULL;
174 }
175 
176 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
177 	({ struct syscall_tp *fields = evsel->priv; \
178 	   fields->name.integer(&fields->name, sample); })
179 
180 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
181 	({ struct syscall_tp *fields = evsel->priv; \
182 	   fields->name.pointer(&fields->name, sample); })
183 
184 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
185 					  void *sys_enter_handler,
186 					  void *sys_exit_handler)
187 {
188 	int ret = -1;
189 	int idx = evlist->nr_entries;
190 	struct perf_evsel *sys_enter, *sys_exit;
191 
192 	sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler, idx++);
193 	if (sys_enter == NULL)
194 		goto out;
195 
196 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
197 		goto out_delete_sys_enter;
198 
199 	sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler, idx++);
200 	if (sys_exit == NULL)
201 		goto out_delete_sys_enter;
202 
203 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
204 		goto out_delete_sys_exit;
205 
206 	perf_evlist__add(evlist, sys_enter);
207 	perf_evlist__add(evlist, sys_exit);
208 
209 	ret = 0;
210 out:
211 	return ret;
212 
213 out_delete_sys_exit:
214 	perf_evsel__delete_priv(sys_exit);
215 out_delete_sys_enter:
216 	perf_evsel__delete_priv(sys_enter);
217 	goto out;
218 }
219 
220 
221 struct syscall_arg {
222 	unsigned long val;
223 	struct thread *thread;
224 	struct trace  *trace;
225 	void	      *parm;
226 	u8	      idx;
227 	u8	      mask;
228 };
229 
230 struct strarray {
231 	int	    offset;
232 	int	    nr_entries;
233 	const char **entries;
234 };
235 
236 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
237 	.nr_entries = ARRAY_SIZE(array), \
238 	.entries = array, \
239 }
240 
241 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
242 	.offset	    = off, \
243 	.nr_entries = ARRAY_SIZE(array), \
244 	.entries = array, \
245 }
246 
247 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
248 						const char *intfmt,
249 					        struct syscall_arg *arg)
250 {
251 	struct strarray *sa = arg->parm;
252 	int idx = arg->val - sa->offset;
253 
254 	if (idx < 0 || idx >= sa->nr_entries)
255 		return scnprintf(bf, size, intfmt, arg->val);
256 
257 	return scnprintf(bf, size, "%s", sa->entries[idx]);
258 }
259 
260 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
261 					      struct syscall_arg *arg)
262 {
263 	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
264 }
265 
266 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
267 
268 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
269 						 struct syscall_arg *arg)
270 {
271 	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
272 }
273 
274 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
275 
276 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
277 					struct syscall_arg *arg);
278 
279 #define SCA_FD syscall_arg__scnprintf_fd
280 
281 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
282 					   struct syscall_arg *arg)
283 {
284 	int fd = arg->val;
285 
286 	if (fd == AT_FDCWD)
287 		return scnprintf(bf, size, "CWD");
288 
289 	return syscall_arg__scnprintf_fd(bf, size, arg);
290 }
291 
292 #define SCA_FDAT syscall_arg__scnprintf_fd_at
293 
294 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
295 					      struct syscall_arg *arg);
296 
297 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
298 
299 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
300 					 struct syscall_arg *arg)
301 {
302 	return scnprintf(bf, size, "%#lx", arg->val);
303 }
304 
305 #define SCA_HEX syscall_arg__scnprintf_hex
306 
307 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
308 					       struct syscall_arg *arg)
309 {
310 	int printed = 0, prot = arg->val;
311 
312 	if (prot == PROT_NONE)
313 		return scnprintf(bf, size, "NONE");
314 #define	P_MMAP_PROT(n) \
315 	if (prot & PROT_##n) { \
316 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
317 		prot &= ~PROT_##n; \
318 	}
319 
320 	P_MMAP_PROT(EXEC);
321 	P_MMAP_PROT(READ);
322 	P_MMAP_PROT(WRITE);
323 #ifdef PROT_SEM
324 	P_MMAP_PROT(SEM);
325 #endif
326 	P_MMAP_PROT(GROWSDOWN);
327 	P_MMAP_PROT(GROWSUP);
328 #undef P_MMAP_PROT
329 
330 	if (prot)
331 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
332 
333 	return printed;
334 }
335 
336 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
337 
338 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
339 						struct syscall_arg *arg)
340 {
341 	int printed = 0, flags = arg->val;
342 
343 #define	P_MMAP_FLAG(n) \
344 	if (flags & MAP_##n) { \
345 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
346 		flags &= ~MAP_##n; \
347 	}
348 
349 	P_MMAP_FLAG(SHARED);
350 	P_MMAP_FLAG(PRIVATE);
351 #ifdef MAP_32BIT
352 	P_MMAP_FLAG(32BIT);
353 #endif
354 	P_MMAP_FLAG(ANONYMOUS);
355 	P_MMAP_FLAG(DENYWRITE);
356 	P_MMAP_FLAG(EXECUTABLE);
357 	P_MMAP_FLAG(FILE);
358 	P_MMAP_FLAG(FIXED);
359 	P_MMAP_FLAG(GROWSDOWN);
360 #ifdef MAP_HUGETLB
361 	P_MMAP_FLAG(HUGETLB);
362 #endif
363 	P_MMAP_FLAG(LOCKED);
364 	P_MMAP_FLAG(NONBLOCK);
365 	P_MMAP_FLAG(NORESERVE);
366 	P_MMAP_FLAG(POPULATE);
367 	P_MMAP_FLAG(STACK);
368 #ifdef MAP_UNINITIALIZED
369 	P_MMAP_FLAG(UNINITIALIZED);
370 #endif
371 #undef P_MMAP_FLAG
372 
373 	if (flags)
374 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
375 
376 	return printed;
377 }
378 
379 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
380 
381 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
382 						      struct syscall_arg *arg)
383 {
384 	int behavior = arg->val;
385 
386 	switch (behavior) {
387 #define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
388 	P_MADV_BHV(NORMAL);
389 	P_MADV_BHV(RANDOM);
390 	P_MADV_BHV(SEQUENTIAL);
391 	P_MADV_BHV(WILLNEED);
392 	P_MADV_BHV(DONTNEED);
393 	P_MADV_BHV(REMOVE);
394 	P_MADV_BHV(DONTFORK);
395 	P_MADV_BHV(DOFORK);
396 	P_MADV_BHV(HWPOISON);
397 #ifdef MADV_SOFT_OFFLINE
398 	P_MADV_BHV(SOFT_OFFLINE);
399 #endif
400 	P_MADV_BHV(MERGEABLE);
401 	P_MADV_BHV(UNMERGEABLE);
402 #ifdef MADV_HUGEPAGE
403 	P_MADV_BHV(HUGEPAGE);
404 #endif
405 #ifdef MADV_NOHUGEPAGE
406 	P_MADV_BHV(NOHUGEPAGE);
407 #endif
408 #ifdef MADV_DONTDUMP
409 	P_MADV_BHV(DONTDUMP);
410 #endif
411 #ifdef MADV_DODUMP
412 	P_MADV_BHV(DODUMP);
413 #endif
414 #undef P_MADV_PHV
415 	default: break;
416 	}
417 
418 	return scnprintf(bf, size, "%#x", behavior);
419 }
420 
421 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
422 
423 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
424 					   struct syscall_arg *arg)
425 {
426 	int printed = 0, op = arg->val;
427 
428 	if (op == 0)
429 		return scnprintf(bf, size, "NONE");
430 #define	P_CMD(cmd) \
431 	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
432 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
433 		op &= ~LOCK_##cmd; \
434 	}
435 
436 	P_CMD(SH);
437 	P_CMD(EX);
438 	P_CMD(NB);
439 	P_CMD(UN);
440 	P_CMD(MAND);
441 	P_CMD(RW);
442 	P_CMD(READ);
443 	P_CMD(WRITE);
444 #undef P_OP
445 
446 	if (op)
447 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
448 
449 	return printed;
450 }
451 
452 #define SCA_FLOCK syscall_arg__scnprintf_flock
453 
454 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
455 {
456 	enum syscall_futex_args {
457 		SCF_UADDR   = (1 << 0),
458 		SCF_OP	    = (1 << 1),
459 		SCF_VAL	    = (1 << 2),
460 		SCF_TIMEOUT = (1 << 3),
461 		SCF_UADDR2  = (1 << 4),
462 		SCF_VAL3    = (1 << 5),
463 	};
464 	int op = arg->val;
465 	int cmd = op & FUTEX_CMD_MASK;
466 	size_t printed = 0;
467 
468 	switch (cmd) {
469 #define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
470 	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
471 	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
472 	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
473 	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
474 	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
475 	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
476 	P_FUTEX_OP(WAKE_OP);							  break;
477 	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
478 	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
479 	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
480 	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
481 	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
482 	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
483 	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
484 	}
485 
486 	if (op & FUTEX_PRIVATE_FLAG)
487 		printed += scnprintf(bf + printed, size - printed, "|PRIV");
488 
489 	if (op & FUTEX_CLOCK_REALTIME)
490 		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
491 
492 	return printed;
493 }
494 
495 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
496 
497 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
498 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
499 
500 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
501 static DEFINE_STRARRAY(itimers);
502 
503 static const char *whences[] = { "SET", "CUR", "END",
504 #ifdef SEEK_DATA
505 "DATA",
506 #endif
507 #ifdef SEEK_HOLE
508 "HOLE",
509 #endif
510 };
511 static DEFINE_STRARRAY(whences);
512 
513 static const char *fcntl_cmds[] = {
514 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
515 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
516 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
517 	"F_GETOWNER_UIDS",
518 };
519 static DEFINE_STRARRAY(fcntl_cmds);
520 
521 static const char *rlimit_resources[] = {
522 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
523 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
524 	"RTTIME",
525 };
526 static DEFINE_STRARRAY(rlimit_resources);
527 
528 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
529 static DEFINE_STRARRAY(sighow);
530 
531 static const char *clockid[] = {
532 	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
533 	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
534 };
535 static DEFINE_STRARRAY(clockid);
536 
537 static const char *socket_families[] = {
538 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
539 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
540 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
541 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
542 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
543 	"ALG", "NFC", "VSOCK",
544 };
545 static DEFINE_STRARRAY(socket_families);
546 
547 #ifndef SOCK_TYPE_MASK
548 #define SOCK_TYPE_MASK 0xf
549 #endif
550 
551 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
552 						      struct syscall_arg *arg)
553 {
554 	size_t printed;
555 	int type = arg->val,
556 	    flags = type & ~SOCK_TYPE_MASK;
557 
558 	type &= SOCK_TYPE_MASK;
559 	/*
560  	 * Can't use a strarray, MIPS may override for ABI reasons.
561  	 */
562 	switch (type) {
563 #define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
564 	P_SK_TYPE(STREAM);
565 	P_SK_TYPE(DGRAM);
566 	P_SK_TYPE(RAW);
567 	P_SK_TYPE(RDM);
568 	P_SK_TYPE(SEQPACKET);
569 	P_SK_TYPE(DCCP);
570 	P_SK_TYPE(PACKET);
571 #undef P_SK_TYPE
572 	default:
573 		printed = scnprintf(bf, size, "%#x", type);
574 	}
575 
576 #define	P_SK_FLAG(n) \
577 	if (flags & SOCK_##n) { \
578 		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
579 		flags &= ~SOCK_##n; \
580 	}
581 
582 	P_SK_FLAG(CLOEXEC);
583 	P_SK_FLAG(NONBLOCK);
584 #undef P_SK_FLAG
585 
586 	if (flags)
587 		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
588 
589 	return printed;
590 }
591 
592 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
593 
594 #ifndef MSG_PROBE
595 #define MSG_PROBE	     0x10
596 #endif
597 #ifndef MSG_WAITFORONE
598 #define MSG_WAITFORONE	0x10000
599 #endif
600 #ifndef MSG_SENDPAGE_NOTLAST
601 #define MSG_SENDPAGE_NOTLAST 0x20000
602 #endif
603 #ifndef MSG_FASTOPEN
604 #define MSG_FASTOPEN	     0x20000000
605 #endif
606 
607 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
608 					       struct syscall_arg *arg)
609 {
610 	int printed = 0, flags = arg->val;
611 
612 	if (flags == 0)
613 		return scnprintf(bf, size, "NONE");
614 #define	P_MSG_FLAG(n) \
615 	if (flags & MSG_##n) { \
616 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
617 		flags &= ~MSG_##n; \
618 	}
619 
620 	P_MSG_FLAG(OOB);
621 	P_MSG_FLAG(PEEK);
622 	P_MSG_FLAG(DONTROUTE);
623 	P_MSG_FLAG(TRYHARD);
624 	P_MSG_FLAG(CTRUNC);
625 	P_MSG_FLAG(PROBE);
626 	P_MSG_FLAG(TRUNC);
627 	P_MSG_FLAG(DONTWAIT);
628 	P_MSG_FLAG(EOR);
629 	P_MSG_FLAG(WAITALL);
630 	P_MSG_FLAG(FIN);
631 	P_MSG_FLAG(SYN);
632 	P_MSG_FLAG(CONFIRM);
633 	P_MSG_FLAG(RST);
634 	P_MSG_FLAG(ERRQUEUE);
635 	P_MSG_FLAG(NOSIGNAL);
636 	P_MSG_FLAG(MORE);
637 	P_MSG_FLAG(WAITFORONE);
638 	P_MSG_FLAG(SENDPAGE_NOTLAST);
639 	P_MSG_FLAG(FASTOPEN);
640 	P_MSG_FLAG(CMSG_CLOEXEC);
641 #undef P_MSG_FLAG
642 
643 	if (flags)
644 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
645 
646 	return printed;
647 }
648 
649 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
650 
651 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
652 						 struct syscall_arg *arg)
653 {
654 	size_t printed = 0;
655 	int mode = arg->val;
656 
657 	if (mode == F_OK) /* 0 */
658 		return scnprintf(bf, size, "F");
659 #define	P_MODE(n) \
660 	if (mode & n##_OK) { \
661 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
662 		mode &= ~n##_OK; \
663 	}
664 
665 	P_MODE(R);
666 	P_MODE(W);
667 	P_MODE(X);
668 #undef P_MODE
669 
670 	if (mode)
671 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
672 
673 	return printed;
674 }
675 
676 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
677 
678 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
679 					       struct syscall_arg *arg)
680 {
681 	int printed = 0, flags = arg->val;
682 
683 	if (!(flags & O_CREAT))
684 		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
685 
686 	if (flags == 0)
687 		return scnprintf(bf, size, "RDONLY");
688 #define	P_FLAG(n) \
689 	if (flags & O_##n) { \
690 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
691 		flags &= ~O_##n; \
692 	}
693 
694 	P_FLAG(APPEND);
695 	P_FLAG(ASYNC);
696 	P_FLAG(CLOEXEC);
697 	P_FLAG(CREAT);
698 	P_FLAG(DIRECT);
699 	P_FLAG(DIRECTORY);
700 	P_FLAG(EXCL);
701 	P_FLAG(LARGEFILE);
702 	P_FLAG(NOATIME);
703 	P_FLAG(NOCTTY);
704 #ifdef O_NONBLOCK
705 	P_FLAG(NONBLOCK);
706 #elif O_NDELAY
707 	P_FLAG(NDELAY);
708 #endif
709 #ifdef O_PATH
710 	P_FLAG(PATH);
711 #endif
712 	P_FLAG(RDWR);
713 #ifdef O_DSYNC
714 	if ((flags & O_SYNC) == O_SYNC)
715 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
716 	else {
717 		P_FLAG(DSYNC);
718 	}
719 #else
720 	P_FLAG(SYNC);
721 #endif
722 	P_FLAG(TRUNC);
723 	P_FLAG(WRONLY);
724 #undef P_FLAG
725 
726 	if (flags)
727 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
728 
729 	return printed;
730 }
731 
732 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
733 
734 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
735 						   struct syscall_arg *arg)
736 {
737 	int printed = 0, flags = arg->val;
738 
739 	if (flags == 0)
740 		return scnprintf(bf, size, "NONE");
741 #define	P_FLAG(n) \
742 	if (flags & EFD_##n) { \
743 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
744 		flags &= ~EFD_##n; \
745 	}
746 
747 	P_FLAG(SEMAPHORE);
748 	P_FLAG(CLOEXEC);
749 	P_FLAG(NONBLOCK);
750 #undef P_FLAG
751 
752 	if (flags)
753 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
754 
755 	return printed;
756 }
757 
758 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
759 
760 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
761 						struct syscall_arg *arg)
762 {
763 	int printed = 0, flags = arg->val;
764 
765 #define	P_FLAG(n) \
766 	if (flags & O_##n) { \
767 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
768 		flags &= ~O_##n; \
769 	}
770 
771 	P_FLAG(CLOEXEC);
772 	P_FLAG(NONBLOCK);
773 #undef P_FLAG
774 
775 	if (flags)
776 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
777 
778 	return printed;
779 }
780 
781 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
782 
783 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
784 {
785 	int sig = arg->val;
786 
787 	switch (sig) {
788 #define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
789 	P_SIGNUM(HUP);
790 	P_SIGNUM(INT);
791 	P_SIGNUM(QUIT);
792 	P_SIGNUM(ILL);
793 	P_SIGNUM(TRAP);
794 	P_SIGNUM(ABRT);
795 	P_SIGNUM(BUS);
796 	P_SIGNUM(FPE);
797 	P_SIGNUM(KILL);
798 	P_SIGNUM(USR1);
799 	P_SIGNUM(SEGV);
800 	P_SIGNUM(USR2);
801 	P_SIGNUM(PIPE);
802 	P_SIGNUM(ALRM);
803 	P_SIGNUM(TERM);
804 	P_SIGNUM(STKFLT);
805 	P_SIGNUM(CHLD);
806 	P_SIGNUM(CONT);
807 	P_SIGNUM(STOP);
808 	P_SIGNUM(TSTP);
809 	P_SIGNUM(TTIN);
810 	P_SIGNUM(TTOU);
811 	P_SIGNUM(URG);
812 	P_SIGNUM(XCPU);
813 	P_SIGNUM(XFSZ);
814 	P_SIGNUM(VTALRM);
815 	P_SIGNUM(PROF);
816 	P_SIGNUM(WINCH);
817 	P_SIGNUM(IO);
818 	P_SIGNUM(PWR);
819 	P_SIGNUM(SYS);
820 	default: break;
821 	}
822 
823 	return scnprintf(bf, size, "%#x", sig);
824 }
825 
826 #define SCA_SIGNUM syscall_arg__scnprintf_signum
827 
828 #define TCGETS		0x5401
829 
830 static const char *tioctls[] = {
831 	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
832 	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
833 	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
834 	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
835 	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
836 	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
837 	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
838 	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
839 	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
840 	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
841 	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
842 	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
843 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
844 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
845 	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
846 };
847 
848 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
849 
850 #define STRARRAY(arg, name, array) \
851 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
852 	  .arg_parm	 = { [arg] = &strarray__##array, }
853 
854 static struct syscall_fmt {
855 	const char *name;
856 	const char *alias;
857 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
858 	void	   *arg_parm[6];
859 	bool	   errmsg;
860 	bool	   timeout;
861 	bool	   hexret;
862 } syscall_fmts[] = {
863 	{ .name	    = "access",	    .errmsg = true,
864 	  .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
865 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
866 	{ .name	    = "brk",	    .hexret = true,
867 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
868 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
869 	{ .name	    = "close",	    .errmsg = true,
870 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
871 	{ .name	    = "connect",    .errmsg = true, },
872 	{ .name	    = "dup",	    .errmsg = true,
873 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
874 	{ .name	    = "dup2",	    .errmsg = true,
875 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
876 	{ .name	    = "dup3",	    .errmsg = true,
877 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
878 	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
879 	{ .name	    = "eventfd2",   .errmsg = true,
880 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
881 	{ .name	    = "faccessat",  .errmsg = true,
882 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
883 	{ .name	    = "fadvise64",  .errmsg = true,
884 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
885 	{ .name	    = "fallocate",  .errmsg = true,
886 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
887 	{ .name	    = "fchdir",	    .errmsg = true,
888 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
889 	{ .name	    = "fchmod",	    .errmsg = true,
890 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
891 	{ .name	    = "fchmodat",   .errmsg = true,
892 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
893 	{ .name	    = "fchown",	    .errmsg = true,
894 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
895 	{ .name	    = "fchownat",   .errmsg = true,
896 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
897 	{ .name	    = "fcntl",	    .errmsg = true,
898 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
899 			     [1] = SCA_STRARRAY, /* cmd */ },
900 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
901 	{ .name	    = "fdatasync",  .errmsg = true,
902 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
903 	{ .name	    = "flock",	    .errmsg = true,
904 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
905 			     [1] = SCA_FLOCK, /* cmd */ }, },
906 	{ .name	    = "fsetxattr",  .errmsg = true,
907 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
908 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
909 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
910 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
911 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
912 	{ .name	    = "fstatfs",    .errmsg = true,
913 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
914 	{ .name	    = "fsync",    .errmsg = true,
915 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
916 	{ .name	    = "ftruncate", .errmsg = true,
917 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
918 	{ .name	    = "futex",	    .errmsg = true,
919 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
920 	{ .name	    = "futimesat", .errmsg = true,
921 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
922 	{ .name	    = "getdents",   .errmsg = true,
923 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 	{ .name	    = "getdents64", .errmsg = true,
925 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
926 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
927 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
928 	{ .name	    = "ioctl",	    .errmsg = true,
929 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
930 			     [1] = SCA_STRHEXARRAY, /* cmd */
931 			     [2] = SCA_HEX, /* arg */ },
932 	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
933 	{ .name	    = "kill",	    .errmsg = true,
934 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
935 	{ .name	    = "linkat",	    .errmsg = true,
936 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
937 	{ .name	    = "lseek",	    .errmsg = true,
938 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
939 			     [2] = SCA_STRARRAY, /* whence */ },
940 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
941 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
942 	{ .name     = "madvise",    .errmsg = true,
943 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
944 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
945 	{ .name	    = "mkdirat",    .errmsg = true,
946 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
947 	{ .name	    = "mknodat",    .errmsg = true,
948 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
949 	{ .name	    = "mlock",	    .errmsg = true,
950 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
951 	{ .name	    = "mlockall",   .errmsg = true,
952 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
953 	{ .name	    = "mmap",	    .hexret = true,
954 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
955 			     [2] = SCA_MMAP_PROT, /* prot */
956 			     [3] = SCA_MMAP_FLAGS, /* flags */ }, },
957 	{ .name	    = "mprotect",   .errmsg = true,
958 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
959 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
960 	{ .name	    = "mremap",	    .hexret = true,
961 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
962 			     [4] = SCA_HEX, /* new_addr */ }, },
963 	{ .name	    = "munlock",    .errmsg = true,
964 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
965 	{ .name	    = "munmap",	    .errmsg = true,
966 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
967 	{ .name	    = "name_to_handle_at", .errmsg = true,
968 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
969 	{ .name	    = "newfstatat", .errmsg = true,
970 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
971 	{ .name	    = "open",	    .errmsg = true,
972 	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
973 	{ .name	    = "open_by_handle_at", .errmsg = true,
974 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
975 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
976 	{ .name	    = "openat",	    .errmsg = true,
977 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
978 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
979 	{ .name	    = "pipe2",	    .errmsg = true,
980 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
981 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
982 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
983 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
984 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
985 	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
986 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
987 	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
988 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
989 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
990 	{ .name	    = "pwritev",    .errmsg = true,
991 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
992 	{ .name	    = "read",	    .errmsg = true,
993 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
994 	{ .name	    = "readlinkat", .errmsg = true,
995 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
996 	{ .name	    = "readv",	    .errmsg = true,
997 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
998 	{ .name	    = "recvfrom",   .errmsg = true,
999 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1000 	{ .name	    = "recvmmsg",   .errmsg = true,
1001 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1002 	{ .name	    = "recvmsg",    .errmsg = true,
1003 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1004 	{ .name	    = "renameat",   .errmsg = true,
1005 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1006 	{ .name	    = "rt_sigaction", .errmsg = true,
1007 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1008 	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1009 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1010 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1011 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1012 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1013 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1014 	{ .name	    = "sendmmsg",    .errmsg = true,
1015 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1016 	{ .name	    = "sendmsg",    .errmsg = true,
1017 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1018 	{ .name	    = "sendto",	    .errmsg = true,
1019 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1020 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1021 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1022 	{ .name	    = "shutdown",   .errmsg = true,
1023 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1024 	{ .name	    = "socket",	    .errmsg = true,
1025 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1026 			     [1] = SCA_SK_TYPE, /* type */ },
1027 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1028 	{ .name	    = "socketpair", .errmsg = true,
1029 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1030 			     [1] = SCA_SK_TYPE, /* type */ },
1031 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1032 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
1033 	{ .name	    = "symlinkat",  .errmsg = true,
1034 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1035 	{ .name	    = "tgkill",	    .errmsg = true,
1036 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1037 	{ .name	    = "tkill",	    .errmsg = true,
1038 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1039 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1040 	{ .name	    = "unlinkat",   .errmsg = true,
1041 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1042 	{ .name	    = "utimensat",  .errmsg = true,
1043 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1044 	{ .name	    = "write",	    .errmsg = true,
1045 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1046 	{ .name	    = "writev",	    .errmsg = true,
1047 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1048 };
1049 
1050 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1051 {
1052 	const struct syscall_fmt *fmt = fmtp;
1053 	return strcmp(name, fmt->name);
1054 }
1055 
1056 static struct syscall_fmt *syscall_fmt__find(const char *name)
1057 {
1058 	const int nmemb = ARRAY_SIZE(syscall_fmts);
1059 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1060 }
1061 
1062 struct syscall {
1063 	struct event_format *tp_format;
1064 	const char	    *name;
1065 	bool		    filtered;
1066 	struct syscall_fmt  *fmt;
1067 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1068 	void		    **arg_parm;
1069 };
1070 
1071 static size_t fprintf_duration(unsigned long t, FILE *fp)
1072 {
1073 	double duration = (double)t / NSEC_PER_MSEC;
1074 	size_t printed = fprintf(fp, "(");
1075 
1076 	if (duration >= 1.0)
1077 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1078 	else if (duration >= 0.01)
1079 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1080 	else
1081 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1082 	return printed + fprintf(fp, "): ");
1083 }
1084 
1085 struct thread_trace {
1086 	u64		  entry_time;
1087 	u64		  exit_time;
1088 	bool		  entry_pending;
1089 	unsigned long	  nr_events;
1090 	char		  *entry_str;
1091 	double		  runtime_ms;
1092 	struct {
1093 		int	  max;
1094 		char	  **table;
1095 	} paths;
1096 
1097 	struct intlist *syscall_stats;
1098 };
1099 
1100 static struct thread_trace *thread_trace__new(void)
1101 {
1102 	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1103 
1104 	if (ttrace)
1105 		ttrace->paths.max = -1;
1106 
1107 	ttrace->syscall_stats = intlist__new(NULL);
1108 
1109 	return ttrace;
1110 }
1111 
1112 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1113 {
1114 	struct thread_trace *ttrace;
1115 
1116 	if (thread == NULL)
1117 		goto fail;
1118 
1119 	if (thread->priv == NULL)
1120 		thread->priv = thread_trace__new();
1121 
1122 	if (thread->priv == NULL)
1123 		goto fail;
1124 
1125 	ttrace = thread->priv;
1126 	++ttrace->nr_events;
1127 
1128 	return ttrace;
1129 fail:
1130 	color_fprintf(fp, PERF_COLOR_RED,
1131 		      "WARNING: not enough memory, dropping samples!\n");
1132 	return NULL;
1133 }
1134 
1135 struct trace {
1136 	struct perf_tool	tool;
1137 	struct {
1138 		int		machine;
1139 		int		open_id;
1140 	}			audit;
1141 	struct {
1142 		int		max;
1143 		struct syscall  *table;
1144 	} syscalls;
1145 	struct perf_record_opts opts;
1146 	struct machine		*host;
1147 	u64			base_time;
1148 	bool			full_time;
1149 	FILE			*output;
1150 	unsigned long		nr_events;
1151 	struct strlist		*ev_qualifier;
1152 	bool			not_ev_qualifier;
1153 	bool			live;
1154 	const char 		*last_vfs_getname;
1155 	struct intlist		*tid_list;
1156 	struct intlist		*pid_list;
1157 	bool			sched;
1158 	bool			multiple_threads;
1159 	bool			summary;
1160 	bool			show_comm;
1161 	bool			show_tool_stats;
1162 	double			duration_filter;
1163 	double			runtime_ms;
1164 	struct {
1165 		u64		vfs_getname, proc_getname;
1166 	} stats;
1167 };
1168 
1169 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1170 {
1171 	struct thread_trace *ttrace = thread->priv;
1172 
1173 	if (fd > ttrace->paths.max) {
1174 		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1175 
1176 		if (npath == NULL)
1177 			return -1;
1178 
1179 		if (ttrace->paths.max != -1) {
1180 			memset(npath + ttrace->paths.max + 1, 0,
1181 			       (fd - ttrace->paths.max) * sizeof(char *));
1182 		} else {
1183 			memset(npath, 0, (fd + 1) * sizeof(char *));
1184 		}
1185 
1186 		ttrace->paths.table = npath;
1187 		ttrace->paths.max   = fd;
1188 	}
1189 
1190 	ttrace->paths.table[fd] = strdup(pathname);
1191 
1192 	return ttrace->paths.table[fd] != NULL ? 0 : -1;
1193 }
1194 
1195 static int thread__read_fd_path(struct thread *thread, int fd)
1196 {
1197 	char linkname[PATH_MAX], pathname[PATH_MAX];
1198 	struct stat st;
1199 	int ret;
1200 
1201 	if (thread->pid_ == thread->tid) {
1202 		scnprintf(linkname, sizeof(linkname),
1203 			  "/proc/%d/fd/%d", thread->pid_, fd);
1204 	} else {
1205 		scnprintf(linkname, sizeof(linkname),
1206 			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1207 	}
1208 
1209 	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1210 		return -1;
1211 
1212 	ret = readlink(linkname, pathname, sizeof(pathname));
1213 
1214 	if (ret < 0 || ret > st.st_size)
1215 		return -1;
1216 
1217 	pathname[ret] = '\0';
1218 	return trace__set_fd_pathname(thread, fd, pathname);
1219 }
1220 
1221 static const char *thread__fd_path(struct thread *thread, int fd,
1222 				   struct trace *trace)
1223 {
1224 	struct thread_trace *ttrace = thread->priv;
1225 
1226 	if (ttrace == NULL)
1227 		return NULL;
1228 
1229 	if (fd < 0)
1230 		return NULL;
1231 
1232 	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1233 		if (!trace->live)
1234 			return NULL;
1235 		++trace->stats.proc_getname;
1236 		if (thread__read_fd_path(thread, fd)) {
1237 			return NULL;
1238 	}
1239 
1240 	return ttrace->paths.table[fd];
1241 }
1242 
1243 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1244 					struct syscall_arg *arg)
1245 {
1246 	int fd = arg->val;
1247 	size_t printed = scnprintf(bf, size, "%d", fd);
1248 	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1249 
1250 	if (path)
1251 		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1252 
1253 	return printed;
1254 }
1255 
1256 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1257 					      struct syscall_arg *arg)
1258 {
1259 	int fd = arg->val;
1260 	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1261 	struct thread_trace *ttrace = arg->thread->priv;
1262 
1263 	if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1264 		free(ttrace->paths.table[fd]);
1265 		ttrace->paths.table[fd] = NULL;
1266 	}
1267 
1268 	return printed;
1269 }
1270 
1271 static bool trace__filter_duration(struct trace *trace, double t)
1272 {
1273 	return t < (trace->duration_filter * NSEC_PER_MSEC);
1274 }
1275 
1276 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1277 {
1278 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1279 
1280 	return fprintf(fp, "%10.3f ", ts);
1281 }
1282 
1283 static bool done = false;
1284 static bool interrupted = false;
1285 
1286 static void sig_handler(int sig)
1287 {
1288 	done = true;
1289 	interrupted = sig == SIGINT;
1290 }
1291 
1292 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1293 					u64 duration, u64 tstamp, FILE *fp)
1294 {
1295 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1296 	printed += fprintf_duration(duration, fp);
1297 
1298 	if (trace->multiple_threads) {
1299 		if (trace->show_comm)
1300 			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1301 		printed += fprintf(fp, "%d ", thread->tid);
1302 	}
1303 
1304 	return printed;
1305 }
1306 
1307 static int trace__process_event(struct trace *trace, struct machine *machine,
1308 				union perf_event *event, struct perf_sample *sample)
1309 {
1310 	int ret = 0;
1311 
1312 	switch (event->header.type) {
1313 	case PERF_RECORD_LOST:
1314 		color_fprintf(trace->output, PERF_COLOR_RED,
1315 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1316 		ret = machine__process_lost_event(machine, event, sample);
1317 	default:
1318 		ret = machine__process_event(machine, event, sample);
1319 		break;
1320 	}
1321 
1322 	return ret;
1323 }
1324 
1325 static int trace__tool_process(struct perf_tool *tool,
1326 			       union perf_event *event,
1327 			       struct perf_sample *sample,
1328 			       struct machine *machine)
1329 {
1330 	struct trace *trace = container_of(tool, struct trace, tool);
1331 	return trace__process_event(trace, machine, event, sample);
1332 }
1333 
1334 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1335 {
1336 	int err = symbol__init();
1337 
1338 	if (err)
1339 		return err;
1340 
1341 	trace->host = machine__new_host();
1342 	if (trace->host == NULL)
1343 		return -ENOMEM;
1344 
1345 	if (perf_target__has_task(&trace->opts.target)) {
1346 		err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
1347 							trace__tool_process,
1348 							trace->host);
1349 	} else {
1350 		err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
1351 						     trace->host);
1352 	}
1353 
1354 	if (err)
1355 		symbol__exit();
1356 
1357 	return err;
1358 }
1359 
1360 static int syscall__set_arg_fmts(struct syscall *sc)
1361 {
1362 	struct format_field *field;
1363 	int idx = 0;
1364 
1365 	sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1366 	if (sc->arg_scnprintf == NULL)
1367 		return -1;
1368 
1369 	if (sc->fmt)
1370 		sc->arg_parm = sc->fmt->arg_parm;
1371 
1372 	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1373 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1374 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1375 		else if (field->flags & FIELD_IS_POINTER)
1376 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1377 		++idx;
1378 	}
1379 
1380 	return 0;
1381 }
1382 
1383 static int trace__read_syscall_info(struct trace *trace, int id)
1384 {
1385 	char tp_name[128];
1386 	struct syscall *sc;
1387 	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1388 
1389 	if (name == NULL)
1390 		return -1;
1391 
1392 	if (id > trace->syscalls.max) {
1393 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1394 
1395 		if (nsyscalls == NULL)
1396 			return -1;
1397 
1398 		if (trace->syscalls.max != -1) {
1399 			memset(nsyscalls + trace->syscalls.max + 1, 0,
1400 			       (id - trace->syscalls.max) * sizeof(*sc));
1401 		} else {
1402 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1403 		}
1404 
1405 		trace->syscalls.table = nsyscalls;
1406 		trace->syscalls.max   = id;
1407 	}
1408 
1409 	sc = trace->syscalls.table + id;
1410 	sc->name = name;
1411 
1412 	if (trace->ev_qualifier) {
1413 		bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1414 
1415 		if (!(in ^ trace->not_ev_qualifier)) {
1416 			sc->filtered = true;
1417 			/*
1418 			 * No need to do read tracepoint information since this will be
1419 			 * filtered out.
1420 			 */
1421 			return 0;
1422 		}
1423 	}
1424 
1425 	sc->fmt  = syscall_fmt__find(sc->name);
1426 
1427 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1428 	sc->tp_format = event_format__new("syscalls", tp_name);
1429 
1430 	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1431 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1432 		sc->tp_format = event_format__new("syscalls", tp_name);
1433 	}
1434 
1435 	if (sc->tp_format == NULL)
1436 		return -1;
1437 
1438 	return syscall__set_arg_fmts(sc);
1439 }
1440 
1441 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1442 				      unsigned long *args, struct trace *trace,
1443 				      struct thread *thread)
1444 {
1445 	size_t printed = 0;
1446 
1447 	if (sc->tp_format != NULL) {
1448 		struct format_field *field;
1449 		u8 bit = 1;
1450 		struct syscall_arg arg = {
1451 			.idx	= 0,
1452 			.mask	= 0,
1453 			.trace  = trace,
1454 			.thread = thread,
1455 		};
1456 
1457 		for (field = sc->tp_format->format.fields->next; field;
1458 		     field = field->next, ++arg.idx, bit <<= 1) {
1459 			if (arg.mask & bit)
1460 				continue;
1461 			/*
1462  			 * Suppress this argument if its value is zero and
1463  			 * and we don't have a string associated in an
1464  			 * strarray for it.
1465  			 */
1466 			if (args[arg.idx] == 0 &&
1467 			    !(sc->arg_scnprintf &&
1468 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1469 			      sc->arg_parm[arg.idx]))
1470 				continue;
1471 
1472 			printed += scnprintf(bf + printed, size - printed,
1473 					     "%s%s: ", printed ? ", " : "", field->name);
1474 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1475 				arg.val = args[arg.idx];
1476 				if (sc->arg_parm)
1477 					arg.parm = sc->arg_parm[arg.idx];
1478 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1479 								      size - printed, &arg);
1480 			} else {
1481 				printed += scnprintf(bf + printed, size - printed,
1482 						     "%ld", args[arg.idx]);
1483 			}
1484 		}
1485 	} else {
1486 		int i = 0;
1487 
1488 		while (i < 6) {
1489 			printed += scnprintf(bf + printed, size - printed,
1490 					     "%sarg%d: %ld",
1491 					     printed ? ", " : "", i, args[i]);
1492 			++i;
1493 		}
1494 	}
1495 
1496 	return printed;
1497 }
1498 
1499 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1500 				  struct perf_sample *sample);
1501 
1502 static struct syscall *trace__syscall_info(struct trace *trace,
1503 					   struct perf_evsel *evsel, int id)
1504 {
1505 
1506 	if (id < 0) {
1507 
1508 		/*
1509 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1510 		 * before that, leaving at a higher verbosity level till that is
1511 		 * explained. Reproduced with plain ftrace with:
1512 		 *
1513 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1514 		 * grep "NR -1 " /t/trace_pipe
1515 		 *
1516 		 * After generating some load on the machine.
1517  		 */
1518 		if (verbose > 1) {
1519 			static u64 n;
1520 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1521 				id, perf_evsel__name(evsel), ++n);
1522 		}
1523 		return NULL;
1524 	}
1525 
1526 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1527 	    trace__read_syscall_info(trace, id))
1528 		goto out_cant_read;
1529 
1530 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1531 		goto out_cant_read;
1532 
1533 	return &trace->syscalls.table[id];
1534 
1535 out_cant_read:
1536 	if (verbose) {
1537 		fprintf(trace->output, "Problems reading syscall %d", id);
1538 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1539 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1540 		fputs(" information\n", trace->output);
1541 	}
1542 	return NULL;
1543 }
1544 
1545 static void thread__update_stats(struct thread_trace *ttrace,
1546 				 int id, struct perf_sample *sample)
1547 {
1548 	struct int_node *inode;
1549 	struct stats *stats;
1550 	u64 duration = 0;
1551 
1552 	inode = intlist__findnew(ttrace->syscall_stats, id);
1553 	if (inode == NULL)
1554 		return;
1555 
1556 	stats = inode->priv;
1557 	if (stats == NULL) {
1558 		stats = malloc(sizeof(struct stats));
1559 		if (stats == NULL)
1560 			return;
1561 		init_stats(stats);
1562 		inode->priv = stats;
1563 	}
1564 
1565 	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1566 		duration = sample->time - ttrace->entry_time;
1567 
1568 	update_stats(stats, duration);
1569 }
1570 
1571 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1572 			    struct perf_sample *sample)
1573 {
1574 	char *msg;
1575 	void *args;
1576 	size_t printed = 0;
1577 	struct thread *thread;
1578 	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1579 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1580 	struct thread_trace *ttrace;
1581 
1582 	if (sc == NULL)
1583 		return -1;
1584 
1585 	if (sc->filtered)
1586 		return 0;
1587 
1588 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1589 	ttrace = thread__trace(thread, trace->output);
1590 	if (ttrace == NULL)
1591 		return -1;
1592 
1593 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1594 	ttrace = thread->priv;
1595 
1596 	if (ttrace->entry_str == NULL) {
1597 		ttrace->entry_str = malloc(1024);
1598 		if (!ttrace->entry_str)
1599 			return -1;
1600 	}
1601 
1602 	ttrace->entry_time = sample->time;
1603 	msg = ttrace->entry_str;
1604 	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1605 
1606 	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1607 					   args, trace, thread);
1608 
1609 	if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1610 		if (!trace->duration_filter) {
1611 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1612 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1613 		}
1614 	} else
1615 		ttrace->entry_pending = true;
1616 
1617 	return 0;
1618 }
1619 
1620 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1621 			   struct perf_sample *sample)
1622 {
1623 	int ret;
1624 	u64 duration = 0;
1625 	struct thread *thread;
1626 	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1627 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1628 	struct thread_trace *ttrace;
1629 
1630 	if (sc == NULL)
1631 		return -1;
1632 
1633 	if (sc->filtered)
1634 		return 0;
1635 
1636 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1637 	ttrace = thread__trace(thread, trace->output);
1638 	if (ttrace == NULL)
1639 		return -1;
1640 
1641 	if (trace->summary)
1642 		thread__update_stats(ttrace, id, sample);
1643 
1644 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1645 
1646 	if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1647 		trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1648 		trace->last_vfs_getname = NULL;
1649 		++trace->stats.vfs_getname;
1650 	}
1651 
1652 	ttrace = thread->priv;
1653 
1654 	ttrace->exit_time = sample->time;
1655 
1656 	if (ttrace->entry_time) {
1657 		duration = sample->time - ttrace->entry_time;
1658 		if (trace__filter_duration(trace, duration))
1659 			goto out;
1660 	} else if (trace->duration_filter)
1661 		goto out;
1662 
1663 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1664 
1665 	if (ttrace->entry_pending) {
1666 		fprintf(trace->output, "%-70s", ttrace->entry_str);
1667 	} else {
1668 		fprintf(trace->output, " ... [");
1669 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1670 		fprintf(trace->output, "]: %s()", sc->name);
1671 	}
1672 
1673 	if (sc->fmt == NULL) {
1674 signed_print:
1675 		fprintf(trace->output, ") = %d", ret);
1676 	} else if (ret < 0 && sc->fmt->errmsg) {
1677 		char bf[256];
1678 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1679 			   *e = audit_errno_to_name(-ret);
1680 
1681 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
1682 	} else if (ret == 0 && sc->fmt->timeout)
1683 		fprintf(trace->output, ") = 0 Timeout");
1684 	else if (sc->fmt->hexret)
1685 		fprintf(trace->output, ") = %#x", ret);
1686 	else
1687 		goto signed_print;
1688 
1689 	fputc('\n', trace->output);
1690 out:
1691 	ttrace->entry_pending = false;
1692 
1693 	return 0;
1694 }
1695 
1696 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1697 			      struct perf_sample *sample)
1698 {
1699 	trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1700 	return 0;
1701 }
1702 
1703 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1704 				     struct perf_sample *sample)
1705 {
1706         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1707 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1708 	struct thread *thread = machine__findnew_thread(trace->host,
1709 							sample->pid,
1710 							sample->tid);
1711 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
1712 
1713 	if (ttrace == NULL)
1714 		goto out_dump;
1715 
1716 	ttrace->runtime_ms += runtime_ms;
1717 	trace->runtime_ms += runtime_ms;
1718 	return 0;
1719 
1720 out_dump:
1721 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1722 	       evsel->name,
1723 	       perf_evsel__strval(evsel, sample, "comm"),
1724 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1725 	       runtime,
1726 	       perf_evsel__intval(evsel, sample, "vruntime"));
1727 	return 0;
1728 }
1729 
1730 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1731 {
1732 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1733 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1734 		return false;
1735 
1736 	if (trace->pid_list || trace->tid_list)
1737 		return true;
1738 
1739 	return false;
1740 }
1741 
1742 static int trace__process_sample(struct perf_tool *tool,
1743 				 union perf_event *event __maybe_unused,
1744 				 struct perf_sample *sample,
1745 				 struct perf_evsel *evsel,
1746 				 struct machine *machine __maybe_unused)
1747 {
1748 	struct trace *trace = container_of(tool, struct trace, tool);
1749 	int err = 0;
1750 
1751 	tracepoint_handler handler = evsel->handler;
1752 
1753 	if (skip_sample(trace, sample))
1754 		return 0;
1755 
1756 	if (!trace->full_time && trace->base_time == 0)
1757 		trace->base_time = sample->time;
1758 
1759 	if (handler)
1760 		handler(trace, evsel, sample);
1761 
1762 	return err;
1763 }
1764 
1765 static bool
1766 perf_session__has_tp(struct perf_session *session, const char *name)
1767 {
1768 	struct perf_evsel *evsel;
1769 
1770 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1771 
1772 	return evsel != NULL;
1773 }
1774 
1775 static int parse_target_str(struct trace *trace)
1776 {
1777 	if (trace->opts.target.pid) {
1778 		trace->pid_list = intlist__new(trace->opts.target.pid);
1779 		if (trace->pid_list == NULL) {
1780 			pr_err("Error parsing process id string\n");
1781 			return -EINVAL;
1782 		}
1783 	}
1784 
1785 	if (trace->opts.target.tid) {
1786 		trace->tid_list = intlist__new(trace->opts.target.tid);
1787 		if (trace->tid_list == NULL) {
1788 			pr_err("Error parsing thread id string\n");
1789 			return -EINVAL;
1790 		}
1791 	}
1792 
1793 	return 0;
1794 }
1795 
1796 static int trace__record(int argc, const char **argv)
1797 {
1798 	unsigned int rec_argc, i, j;
1799 	const char **rec_argv;
1800 	const char * const record_args[] = {
1801 		"record",
1802 		"-R",
1803 		"-m", "1024",
1804 		"-c", "1",
1805 		"-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1806 	};
1807 
1808 	rec_argc = ARRAY_SIZE(record_args) + argc;
1809 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1810 
1811 	if (rec_argv == NULL)
1812 		return -ENOMEM;
1813 
1814 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1815 		rec_argv[i] = record_args[i];
1816 
1817 	for (j = 0; j < (unsigned int)argc; j++, i++)
1818 		rec_argv[i] = argv[j];
1819 
1820 	return cmd_record(i, rec_argv, NULL);
1821 }
1822 
1823 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1824 
1825 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1826 {
1827 	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname",
1828 						     evlist->nr_entries);
1829 	if (evsel == NULL)
1830 		return;
1831 
1832 	if (perf_evsel__field(evsel, "pathname") == NULL) {
1833 		perf_evsel__delete(evsel);
1834 		return;
1835 	}
1836 
1837 	evsel->handler = trace__vfs_getname;
1838 	perf_evlist__add(evlist, evsel);
1839 }
1840 
1841 static int trace__run(struct trace *trace, int argc, const char **argv)
1842 {
1843 	struct perf_evlist *evlist = perf_evlist__new();
1844 	struct perf_evsel *evsel;
1845 	int err = -1, i;
1846 	unsigned long before;
1847 	const bool forks = argc > 0;
1848 
1849 	trace->live = true;
1850 
1851 	if (evlist == NULL) {
1852 		fprintf(trace->output, "Not enough memory to run!\n");
1853 		goto out;
1854 	}
1855 
1856 	if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1857 		goto out_error_tp;
1858 
1859 	perf_evlist__add_vfs_getname(evlist);
1860 
1861 	if (trace->sched &&
1862 		perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1863 				trace__sched_stat_runtime))
1864 		goto out_error_tp;
1865 
1866 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
1867 	if (err < 0) {
1868 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1869 		goto out_delete_evlist;
1870 	}
1871 
1872 	err = trace__symbols_init(trace, evlist);
1873 	if (err < 0) {
1874 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
1875 		goto out_delete_maps;
1876 	}
1877 
1878 	perf_evlist__config(evlist, &trace->opts);
1879 
1880 	signal(SIGCHLD, sig_handler);
1881 	signal(SIGINT, sig_handler);
1882 
1883 	if (forks) {
1884 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1885 						    argv, false, false);
1886 		if (err < 0) {
1887 			fprintf(trace->output, "Couldn't run the workload!\n");
1888 			goto out_delete_maps;
1889 		}
1890 	}
1891 
1892 	err = perf_evlist__open(evlist);
1893 	if (err < 0)
1894 		goto out_error_open;
1895 
1896 	err = perf_evlist__mmap(evlist, UINT_MAX, false);
1897 	if (err < 0) {
1898 		fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1899 		goto out_close_evlist;
1900 	}
1901 
1902 	perf_evlist__enable(evlist);
1903 
1904 	if (forks)
1905 		perf_evlist__start_workload(evlist);
1906 
1907 	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1908 again:
1909 	before = trace->nr_events;
1910 
1911 	for (i = 0; i < evlist->nr_mmaps; i++) {
1912 		union perf_event *event;
1913 
1914 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1915 			const u32 type = event->header.type;
1916 			tracepoint_handler handler;
1917 			struct perf_sample sample;
1918 
1919 			++trace->nr_events;
1920 
1921 			err = perf_evlist__parse_sample(evlist, event, &sample);
1922 			if (err) {
1923 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1924 				goto next_event;
1925 			}
1926 
1927 			if (!trace->full_time && trace->base_time == 0)
1928 				trace->base_time = sample.time;
1929 
1930 			if (type != PERF_RECORD_SAMPLE) {
1931 				trace__process_event(trace, trace->host, event, &sample);
1932 				continue;
1933 			}
1934 
1935 			evsel = perf_evlist__id2evsel(evlist, sample.id);
1936 			if (evsel == NULL) {
1937 				fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1938 				goto next_event;
1939 			}
1940 
1941 			if (sample.raw_data == NULL) {
1942 				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1943 				       perf_evsel__name(evsel), sample.tid,
1944 				       sample.cpu, sample.raw_size);
1945 				goto next_event;
1946 			}
1947 
1948 			handler = evsel->handler;
1949 			handler(trace, evsel, &sample);
1950 next_event:
1951 			perf_evlist__mmap_consume(evlist, i);
1952 
1953 			if (interrupted)
1954 				goto out_disable;
1955 		}
1956 	}
1957 
1958 	if (trace->nr_events == before) {
1959 		int timeout = done ? 100 : -1;
1960 
1961 		if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1962 			goto again;
1963 	} else {
1964 		goto again;
1965 	}
1966 
1967 out_disable:
1968 	perf_evlist__disable(evlist);
1969 
1970 	if (!err) {
1971 		if (trace->summary)
1972 			trace__fprintf_thread_summary(trace, trace->output);
1973 
1974 		if (trace->show_tool_stats) {
1975 			fprintf(trace->output, "Stats:\n "
1976 					       " vfs_getname : %" PRIu64 "\n"
1977 					       " proc_getname: %" PRIu64 "\n",
1978 				trace->stats.vfs_getname,
1979 				trace->stats.proc_getname);
1980 		}
1981 	}
1982 
1983 	perf_evlist__munmap(evlist);
1984 out_close_evlist:
1985 	perf_evlist__close(evlist);
1986 out_delete_maps:
1987 	perf_evlist__delete_maps(evlist);
1988 out_delete_evlist:
1989 	perf_evlist__delete(evlist);
1990 out:
1991 	trace->live = false;
1992 	return err;
1993 {
1994 	char errbuf[BUFSIZ];
1995 
1996 out_error_tp:
1997 	perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
1998 	goto out_error;
1999 
2000 out_error_open:
2001 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2002 
2003 out_error:
2004 	fprintf(trace->output, "%s\n", errbuf);
2005 	goto out_delete_evlist;
2006 }
2007 }
2008 
2009 static int trace__replay(struct trace *trace)
2010 {
2011 	const struct perf_evsel_str_handler handlers[] = {
2012 		{ "raw_syscalls:sys_enter",  trace__sys_enter, },
2013 		{ "raw_syscalls:sys_exit",   trace__sys_exit, },
2014 		{ "probe:vfs_getname",	     trace__vfs_getname, },
2015 	};
2016 	struct perf_data_file file = {
2017 		.path  = input_name,
2018 		.mode  = PERF_DATA_MODE_READ,
2019 	};
2020 	struct perf_session *session;
2021 	int err = -1;
2022 
2023 	trace->tool.sample	  = trace__process_sample;
2024 	trace->tool.mmap	  = perf_event__process_mmap;
2025 	trace->tool.mmap2	  = perf_event__process_mmap2;
2026 	trace->tool.comm	  = perf_event__process_comm;
2027 	trace->tool.exit	  = perf_event__process_exit;
2028 	trace->tool.fork	  = perf_event__process_fork;
2029 	trace->tool.attr	  = perf_event__process_attr;
2030 	trace->tool.tracing_data = perf_event__process_tracing_data;
2031 	trace->tool.build_id	  = perf_event__process_build_id;
2032 
2033 	trace->tool.ordered_samples = true;
2034 	trace->tool.ordering_requires_timestamps = true;
2035 
2036 	/* add tid to output */
2037 	trace->multiple_threads = true;
2038 
2039 	if (symbol__init() < 0)
2040 		return -1;
2041 
2042 	session = perf_session__new(&file, false, &trace->tool);
2043 	if (session == NULL)
2044 		return -ENOMEM;
2045 
2046 	trace->host = &session->machines.host;
2047 
2048 	err = perf_session__set_tracepoints_handlers(session, handlers);
2049 	if (err)
2050 		goto out;
2051 
2052 	if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
2053 		pr_err("Data file does not have raw_syscalls:sys_enter events\n");
2054 		goto out;
2055 	}
2056 
2057 	if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
2058 		pr_err("Data file does not have raw_syscalls:sys_exit events\n");
2059 		goto out;
2060 	}
2061 
2062 	err = parse_target_str(trace);
2063 	if (err != 0)
2064 		goto out;
2065 
2066 	setup_pager();
2067 
2068 	err = perf_session__process_events(session, &trace->tool);
2069 	if (err)
2070 		pr_err("Failed to process events, error %d", err);
2071 
2072 	else if (trace->summary)
2073 		trace__fprintf_thread_summary(trace, trace->output);
2074 
2075 out:
2076 	perf_session__delete(session);
2077 
2078 	return err;
2079 }
2080 
2081 static size_t trace__fprintf_threads_header(FILE *fp)
2082 {
2083 	size_t printed;
2084 
2085 	printed  = fprintf(fp, "\n _____________________________________________________________________________\n");
2086 	printed += fprintf(fp, " __)    Summary of events    (__\n\n");
2087 	printed += fprintf(fp, "              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
2088 	printed += fprintf(fp, "                                  syscall  count    min     max    avg  stddev\n");
2089 	printed += fprintf(fp, "                                                   msec    msec   msec     %%\n");
2090 	printed += fprintf(fp, " _____________________________________________________________________________\n\n");
2091 
2092 	return printed;
2093 }
2094 
2095 static size_t thread__dump_stats(struct thread_trace *ttrace,
2096 				 struct trace *trace, FILE *fp)
2097 {
2098 	struct stats *stats;
2099 	size_t printed = 0;
2100 	struct syscall *sc;
2101 	struct int_node *inode = intlist__first(ttrace->syscall_stats);
2102 
2103 	if (inode == NULL)
2104 		return 0;
2105 
2106 	printed += fprintf(fp, "\n");
2107 
2108 	/* each int_node is a syscall */
2109 	while (inode) {
2110 		stats = inode->priv;
2111 		if (stats) {
2112 			double min = (double)(stats->min) / NSEC_PER_MSEC;
2113 			double max = (double)(stats->max) / NSEC_PER_MSEC;
2114 			double avg = avg_stats(stats);
2115 			double pct;
2116 			u64 n = (u64) stats->n;
2117 
2118 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2119 			avg /= NSEC_PER_MSEC;
2120 
2121 			sc = &trace->syscalls.table[inode->i];
2122 			printed += fprintf(fp, "%24s  %14s : ", "", sc->name);
2123 			printed += fprintf(fp, "%5" PRIu64 "  %8.3f  %8.3f",
2124 					   n, min, max);
2125 			printed += fprintf(fp, "  %8.3f  %6.2f\n", avg, pct);
2126 		}
2127 
2128 		inode = intlist__next(inode);
2129 	}
2130 
2131 	printed += fprintf(fp, "\n\n");
2132 
2133 	return printed;
2134 }
2135 
2136 /* struct used to pass data to per-thread function */
2137 struct summary_data {
2138 	FILE *fp;
2139 	struct trace *trace;
2140 	size_t printed;
2141 };
2142 
2143 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2144 {
2145 	struct summary_data *data = priv;
2146 	FILE *fp = data->fp;
2147 	size_t printed = data->printed;
2148 	struct trace *trace = data->trace;
2149 	struct thread_trace *ttrace = thread->priv;
2150 	const char *color;
2151 	double ratio;
2152 
2153 	if (ttrace == NULL)
2154 		return 0;
2155 
2156 	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2157 
2158 	color = PERF_COLOR_NORMAL;
2159 	if (ratio > 50.0)
2160 		color = PERF_COLOR_RED;
2161 	else if (ratio > 25.0)
2162 		color = PERF_COLOR_GREEN;
2163 	else if (ratio > 5.0)
2164 		color = PERF_COLOR_YELLOW;
2165 
2166 	printed += color_fprintf(fp, color, "%20s", thread__comm_str(thread));
2167 	printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
2168 	printed += color_fprintf(fp, color, "%5.1f%%", ratio);
2169 	printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
2170 	printed += thread__dump_stats(ttrace, trace, fp);
2171 
2172 	data->printed += printed;
2173 
2174 	return 0;
2175 }
2176 
2177 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2178 {
2179 	struct summary_data data = {
2180 		.fp = fp,
2181 		.trace = trace
2182 	};
2183 	data.printed = trace__fprintf_threads_header(fp);
2184 
2185 	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2186 
2187 	return data.printed;
2188 }
2189 
2190 static int trace__set_duration(const struct option *opt, const char *str,
2191 			       int unset __maybe_unused)
2192 {
2193 	struct trace *trace = opt->value;
2194 
2195 	trace->duration_filter = atof(str);
2196 	return 0;
2197 }
2198 
2199 static int trace__open_output(struct trace *trace, const char *filename)
2200 {
2201 	struct stat st;
2202 
2203 	if (!stat(filename, &st) && st.st_size) {
2204 		char oldname[PATH_MAX];
2205 
2206 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2207 		unlink(oldname);
2208 		rename(filename, oldname);
2209 	}
2210 
2211 	trace->output = fopen(filename, "w");
2212 
2213 	return trace->output == NULL ? -errno : 0;
2214 }
2215 
2216 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2217 {
2218 	const char * const trace_usage[] = {
2219 		"perf trace [<options>] [<command>]",
2220 		"perf trace [<options>] -- <command> [<options>]",
2221 		"perf trace record [<options>] [<command>]",
2222 		"perf trace record [<options>] -- <command> [<options>]",
2223 		NULL
2224 	};
2225 	struct trace trace = {
2226 		.audit = {
2227 			.machine = audit_detect_machine(),
2228 			.open_id = audit_name_to_syscall("open", trace.audit.machine),
2229 		},
2230 		.syscalls = {
2231 			. max = -1,
2232 		},
2233 		.opts = {
2234 			.target = {
2235 				.uid	   = UINT_MAX,
2236 				.uses_mmap = true,
2237 			},
2238 			.user_freq     = UINT_MAX,
2239 			.user_interval = ULLONG_MAX,
2240 			.no_delay      = true,
2241 			.mmap_pages    = 1024,
2242 		},
2243 		.output = stdout,
2244 		.show_comm = true,
2245 	};
2246 	const char *output_name = NULL;
2247 	const char *ev_qualifier_str = NULL;
2248 	const struct option trace_options[] = {
2249 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
2250 		    "show the thread COMM next to its id"),
2251 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2252 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2253 		    "list of events to trace"),
2254 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
2255 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2256 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2257 		    "trace events on existing process id"),
2258 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2259 		    "trace events on existing thread id"),
2260 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2261 		    "system-wide collection from all CPUs"),
2262 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2263 		    "list of cpus to monitor"),
2264 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2265 		    "child tasks do not inherit counters"),
2266 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2267 		     "number of mmap data pages",
2268 		     perf_evlist__parse_mmap_pages),
2269 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2270 		   "user to profile"),
2271 	OPT_CALLBACK(0, "duration", &trace, "float",
2272 		     "show only events with duration > N.M ms",
2273 		     trace__set_duration),
2274 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2275 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2276 	OPT_BOOLEAN('T', "time", &trace.full_time,
2277 		    "Show full timestamp, not time relative to first start"),
2278 	OPT_BOOLEAN(0, "summary", &trace.summary,
2279 		    "Show syscall summary with statistics"),
2280 	OPT_END()
2281 	};
2282 	int err;
2283 	char bf[BUFSIZ];
2284 
2285 	if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2286 		return trace__record(argc-2, &argv[2]);
2287 
2288 	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2289 
2290 	if (output_name != NULL) {
2291 		err = trace__open_output(&trace, output_name);
2292 		if (err < 0) {
2293 			perror("failed to create output file");
2294 			goto out;
2295 		}
2296 	}
2297 
2298 	if (ev_qualifier_str != NULL) {
2299 		const char *s = ev_qualifier_str;
2300 
2301 		trace.not_ev_qualifier = *s == '!';
2302 		if (trace.not_ev_qualifier)
2303 			++s;
2304 		trace.ev_qualifier = strlist__new(true, s);
2305 		if (trace.ev_qualifier == NULL) {
2306 			fputs("Not enough memory to parse event qualifier",
2307 			      trace.output);
2308 			err = -ENOMEM;
2309 			goto out_close;
2310 		}
2311 	}
2312 
2313 	err = perf_target__validate(&trace.opts.target);
2314 	if (err) {
2315 		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2316 		fprintf(trace.output, "%s", bf);
2317 		goto out_close;
2318 	}
2319 
2320 	err = perf_target__parse_uid(&trace.opts.target);
2321 	if (err) {
2322 		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2323 		fprintf(trace.output, "%s", bf);
2324 		goto out_close;
2325 	}
2326 
2327 	if (!argc && perf_target__none(&trace.opts.target))
2328 		trace.opts.target.system_wide = true;
2329 
2330 	if (input_name)
2331 		err = trace__replay(&trace);
2332 	else
2333 		err = trace__run(&trace, argc, argv);
2334 
2335 out_close:
2336 	if (output_name != NULL)
2337 		fclose(trace.output);
2338 out:
2339 	return err;
2340 }
2341