xref: /openbmc/linux/tools/perf/builtin-ftrace.c (revision a34a9f1a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * builtin-ftrace.c
4  *
5  * Copyright (c) 2013  LG Electronics,  Namhyung Kim <namhyung@kernel.org>
6  * Copyright (c) 2020  Changbin Du <changbin.du@gmail.com>, significant enhancement.
7  */
8 
9 #include "builtin.h"
10 
11 #include <errno.h>
12 #include <unistd.h>
13 #include <signal.h>
14 #include <stdlib.h>
15 #include <fcntl.h>
16 #include <math.h>
17 #include <poll.h>
18 #include <ctype.h>
19 #include <linux/capability.h>
20 #include <linux/string.h>
21 
22 #include "debug.h"
23 #include <subcmd/pager.h>
24 #include <subcmd/parse-options.h>
25 #include <api/fs/tracing_path.h>
26 #include "evlist.h"
27 #include "target.h"
28 #include "cpumap.h"
29 #include "thread_map.h"
30 #include "strfilter.h"
31 #include "util/cap.h"
32 #include "util/config.h"
33 #include "util/ftrace.h"
34 #include "util/units.h"
35 #include "util/parse-sublevel-options.h"
36 
37 #define DEFAULT_TRACER  "function_graph"
38 
39 static volatile sig_atomic_t workload_exec_errno;
40 static volatile sig_atomic_t done;
41 
42 static void sig_handler(int sig __maybe_unused)
43 {
44 	done = true;
45 }
46 
47 /*
48  * evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
49  * we asked by setting its exec_error to the function below,
50  * ftrace__workload_exec_failed_signal.
51  *
52  * XXX We need to handle this more appropriately, emitting an error, etc.
53  */
54 static void ftrace__workload_exec_failed_signal(int signo __maybe_unused,
55 						siginfo_t *info __maybe_unused,
56 						void *ucontext __maybe_unused)
57 {
58 	workload_exec_errno = info->si_value.sival_int;
59 	done = true;
60 }
61 
62 static int __write_tracing_file(const char *name, const char *val, bool append)
63 {
64 	char *file;
65 	int fd, ret = -1;
66 	ssize_t size = strlen(val);
67 	int flags = O_WRONLY;
68 	char errbuf[512];
69 	char *val_copy;
70 
71 	file = get_tracing_file(name);
72 	if (!file) {
73 		pr_debug("cannot get tracing file: %s\n", name);
74 		return -1;
75 	}
76 
77 	if (append)
78 		flags |= O_APPEND;
79 	else
80 		flags |= O_TRUNC;
81 
82 	fd = open(file, flags);
83 	if (fd < 0) {
84 		pr_debug("cannot open tracing file: %s: %s\n",
85 			 name, str_error_r(errno, errbuf, sizeof(errbuf)));
86 		goto out;
87 	}
88 
89 	/*
90 	 * Copy the original value and append a '\n'. Without this,
91 	 * the kernel can hide possible errors.
92 	 */
93 	val_copy = strdup(val);
94 	if (!val_copy)
95 		goto out_close;
96 	val_copy[size] = '\n';
97 
98 	if (write(fd, val_copy, size + 1) == size + 1)
99 		ret = 0;
100 	else
101 		pr_debug("write '%s' to tracing/%s failed: %s\n",
102 			 val, name, str_error_r(errno, errbuf, sizeof(errbuf)));
103 
104 	free(val_copy);
105 out_close:
106 	close(fd);
107 out:
108 	put_tracing_file(file);
109 	return ret;
110 }
111 
112 static int write_tracing_file(const char *name, const char *val)
113 {
114 	return __write_tracing_file(name, val, false);
115 }
116 
117 static int append_tracing_file(const char *name, const char *val)
118 {
119 	return __write_tracing_file(name, val, true);
120 }
121 
122 static int read_tracing_file_to_stdout(const char *name)
123 {
124 	char buf[4096];
125 	char *file;
126 	int fd;
127 	int ret = -1;
128 
129 	file = get_tracing_file(name);
130 	if (!file) {
131 		pr_debug("cannot get tracing file: %s\n", name);
132 		return -1;
133 	}
134 
135 	fd = open(file, O_RDONLY);
136 	if (fd < 0) {
137 		pr_debug("cannot open tracing file: %s: %s\n",
138 			 name, str_error_r(errno, buf, sizeof(buf)));
139 		goto out;
140 	}
141 
142 	/* read contents to stdout */
143 	while (true) {
144 		int n = read(fd, buf, sizeof(buf));
145 		if (n == 0)
146 			break;
147 		else if (n < 0)
148 			goto out_close;
149 
150 		if (fwrite(buf, n, 1, stdout) != 1)
151 			goto out_close;
152 	}
153 	ret = 0;
154 
155 out_close:
156 	close(fd);
157 out:
158 	put_tracing_file(file);
159 	return ret;
160 }
161 
162 static int read_tracing_file_by_line(const char *name,
163 				     void (*cb)(char *str, void *arg),
164 				     void *cb_arg)
165 {
166 	char *line = NULL;
167 	size_t len = 0;
168 	char *file;
169 	FILE *fp;
170 
171 	file = get_tracing_file(name);
172 	if (!file) {
173 		pr_debug("cannot get tracing file: %s\n", name);
174 		return -1;
175 	}
176 
177 	fp = fopen(file, "r");
178 	if (fp == NULL) {
179 		pr_debug("cannot open tracing file: %s\n", name);
180 		put_tracing_file(file);
181 		return -1;
182 	}
183 
184 	while (getline(&line, &len, fp) != -1) {
185 		cb(line, cb_arg);
186 	}
187 
188 	if (line)
189 		free(line);
190 
191 	fclose(fp);
192 	put_tracing_file(file);
193 	return 0;
194 }
195 
196 static int write_tracing_file_int(const char *name, int value)
197 {
198 	char buf[16];
199 
200 	snprintf(buf, sizeof(buf), "%d", value);
201 	if (write_tracing_file(name, buf) < 0)
202 		return -1;
203 
204 	return 0;
205 }
206 
207 static int write_tracing_option_file(const char *name, const char *val)
208 {
209 	char *file;
210 	int ret;
211 
212 	if (asprintf(&file, "options/%s", name) < 0)
213 		return -1;
214 
215 	ret = __write_tracing_file(file, val, false);
216 	free(file);
217 	return ret;
218 }
219 
220 static int reset_tracing_cpu(void);
221 static void reset_tracing_filters(void);
222 
223 static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused)
224 {
225 	write_tracing_option_file("function-fork", "0");
226 	write_tracing_option_file("func_stack_trace", "0");
227 	write_tracing_option_file("sleep-time", "1");
228 	write_tracing_option_file("funcgraph-irqs", "1");
229 	write_tracing_option_file("funcgraph-proc", "0");
230 	write_tracing_option_file("funcgraph-abstime", "0");
231 	write_tracing_option_file("latency-format", "0");
232 	write_tracing_option_file("irq-info", "0");
233 }
234 
235 static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
236 {
237 	if (write_tracing_file("tracing_on", "0") < 0)
238 		return -1;
239 
240 	if (write_tracing_file("current_tracer", "nop") < 0)
241 		return -1;
242 
243 	if (write_tracing_file("set_ftrace_pid", " ") < 0)
244 		return -1;
245 
246 	if (reset_tracing_cpu() < 0)
247 		return -1;
248 
249 	if (write_tracing_file("max_graph_depth", "0") < 0)
250 		return -1;
251 
252 	if (write_tracing_file("tracing_thresh", "0") < 0)
253 		return -1;
254 
255 	reset_tracing_filters();
256 	reset_tracing_options(ftrace);
257 	return 0;
258 }
259 
260 static int set_tracing_pid(struct perf_ftrace *ftrace)
261 {
262 	int i;
263 	char buf[16];
264 
265 	if (target__has_cpu(&ftrace->target))
266 		return 0;
267 
268 	for (i = 0; i < perf_thread_map__nr(ftrace->evlist->core.threads); i++) {
269 		scnprintf(buf, sizeof(buf), "%d",
270 			  perf_thread_map__pid(ftrace->evlist->core.threads, i));
271 		if (append_tracing_file("set_ftrace_pid", buf) < 0)
272 			return -1;
273 	}
274 	return 0;
275 }
276 
277 static int set_tracing_cpumask(struct perf_cpu_map *cpumap)
278 {
279 	char *cpumask;
280 	size_t mask_size;
281 	int ret;
282 	int last_cpu;
283 
284 	last_cpu = perf_cpu_map__cpu(cpumap, perf_cpu_map__nr(cpumap) - 1).cpu;
285 	mask_size = last_cpu / 4 + 2; /* one more byte for EOS */
286 	mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */
287 
288 	cpumask = malloc(mask_size);
289 	if (cpumask == NULL) {
290 		pr_debug("failed to allocate cpu mask\n");
291 		return -1;
292 	}
293 
294 	cpu_map__snprint_mask(cpumap, cpumask, mask_size);
295 
296 	ret = write_tracing_file("tracing_cpumask", cpumask);
297 
298 	free(cpumask);
299 	return ret;
300 }
301 
302 static int set_tracing_cpu(struct perf_ftrace *ftrace)
303 {
304 	struct perf_cpu_map *cpumap = ftrace->evlist->core.user_requested_cpus;
305 
306 	if (!target__has_cpu(&ftrace->target))
307 		return 0;
308 
309 	return set_tracing_cpumask(cpumap);
310 }
311 
312 static int set_tracing_func_stack_trace(struct perf_ftrace *ftrace)
313 {
314 	if (!ftrace->func_stack_trace)
315 		return 0;
316 
317 	if (write_tracing_option_file("func_stack_trace", "1") < 0)
318 		return -1;
319 
320 	return 0;
321 }
322 
323 static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace)
324 {
325 	if (!ftrace->func_irq_info)
326 		return 0;
327 
328 	if (write_tracing_option_file("irq-info", "1") < 0)
329 		return -1;
330 
331 	return 0;
332 }
333 
334 static int reset_tracing_cpu(void)
335 {
336 	struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL);
337 	int ret;
338 
339 	ret = set_tracing_cpumask(cpumap);
340 	perf_cpu_map__put(cpumap);
341 	return ret;
342 }
343 
344 static int __set_tracing_filter(const char *filter_file, struct list_head *funcs)
345 {
346 	struct filter_entry *pos;
347 
348 	list_for_each_entry(pos, funcs, list) {
349 		if (append_tracing_file(filter_file, pos->name) < 0)
350 			return -1;
351 	}
352 
353 	return 0;
354 }
355 
356 static int set_tracing_filters(struct perf_ftrace *ftrace)
357 {
358 	int ret;
359 
360 	ret = __set_tracing_filter("set_ftrace_filter", &ftrace->filters);
361 	if (ret < 0)
362 		return ret;
363 
364 	ret = __set_tracing_filter("set_ftrace_notrace", &ftrace->notrace);
365 	if (ret < 0)
366 		return ret;
367 
368 	ret = __set_tracing_filter("set_graph_function", &ftrace->graph_funcs);
369 	if (ret < 0)
370 		return ret;
371 
372 	/* old kernels do not have this filter */
373 	__set_tracing_filter("set_graph_notrace", &ftrace->nograph_funcs);
374 
375 	return ret;
376 }
377 
378 static void reset_tracing_filters(void)
379 {
380 	write_tracing_file("set_ftrace_filter", " ");
381 	write_tracing_file("set_ftrace_notrace", " ");
382 	write_tracing_file("set_graph_function", " ");
383 	write_tracing_file("set_graph_notrace", " ");
384 }
385 
386 static int set_tracing_depth(struct perf_ftrace *ftrace)
387 {
388 	if (ftrace->graph_depth == 0)
389 		return 0;
390 
391 	if (ftrace->graph_depth < 0) {
392 		pr_err("invalid graph depth: %d\n", ftrace->graph_depth);
393 		return -1;
394 	}
395 
396 	if (write_tracing_file_int("max_graph_depth", ftrace->graph_depth) < 0)
397 		return -1;
398 
399 	return 0;
400 }
401 
402 static int set_tracing_percpu_buffer_size(struct perf_ftrace *ftrace)
403 {
404 	int ret;
405 
406 	if (ftrace->percpu_buffer_size == 0)
407 		return 0;
408 
409 	ret = write_tracing_file_int("buffer_size_kb",
410 				     ftrace->percpu_buffer_size / 1024);
411 	if (ret < 0)
412 		return ret;
413 
414 	return 0;
415 }
416 
417 static int set_tracing_trace_inherit(struct perf_ftrace *ftrace)
418 {
419 	if (!ftrace->inherit)
420 		return 0;
421 
422 	if (write_tracing_option_file("function-fork", "1") < 0)
423 		return -1;
424 
425 	return 0;
426 }
427 
428 static int set_tracing_sleep_time(struct perf_ftrace *ftrace)
429 {
430 	if (!ftrace->graph_nosleep_time)
431 		return 0;
432 
433 	if (write_tracing_option_file("sleep-time", "0") < 0)
434 		return -1;
435 
436 	return 0;
437 }
438 
439 static int set_tracing_funcgraph_irqs(struct perf_ftrace *ftrace)
440 {
441 	if (!ftrace->graph_noirqs)
442 		return 0;
443 
444 	if (write_tracing_option_file("funcgraph-irqs", "0") < 0)
445 		return -1;
446 
447 	return 0;
448 }
449 
450 static int set_tracing_funcgraph_verbose(struct perf_ftrace *ftrace)
451 {
452 	if (!ftrace->graph_verbose)
453 		return 0;
454 
455 	if (write_tracing_option_file("funcgraph-proc", "1") < 0)
456 		return -1;
457 
458 	if (write_tracing_option_file("funcgraph-abstime", "1") < 0)
459 		return -1;
460 
461 	if (write_tracing_option_file("latency-format", "1") < 0)
462 		return -1;
463 
464 	return 0;
465 }
466 
467 static int set_tracing_thresh(struct perf_ftrace *ftrace)
468 {
469 	int ret;
470 
471 	if (ftrace->graph_thresh == 0)
472 		return 0;
473 
474 	ret = write_tracing_file_int("tracing_thresh", ftrace->graph_thresh);
475 	if (ret < 0)
476 		return ret;
477 
478 	return 0;
479 }
480 
481 static int set_tracing_options(struct perf_ftrace *ftrace)
482 {
483 	if (set_tracing_pid(ftrace) < 0) {
484 		pr_err("failed to set ftrace pid\n");
485 		return -1;
486 	}
487 
488 	if (set_tracing_cpu(ftrace) < 0) {
489 		pr_err("failed to set tracing cpumask\n");
490 		return -1;
491 	}
492 
493 	if (set_tracing_func_stack_trace(ftrace) < 0) {
494 		pr_err("failed to set tracing option func_stack_trace\n");
495 		return -1;
496 	}
497 
498 	if (set_tracing_func_irqinfo(ftrace) < 0) {
499 		pr_err("failed to set tracing option irq-info\n");
500 		return -1;
501 	}
502 
503 	if (set_tracing_filters(ftrace) < 0) {
504 		pr_err("failed to set tracing filters\n");
505 		return -1;
506 	}
507 
508 	if (set_tracing_depth(ftrace) < 0) {
509 		pr_err("failed to set graph depth\n");
510 		return -1;
511 	}
512 
513 	if (set_tracing_percpu_buffer_size(ftrace) < 0) {
514 		pr_err("failed to set tracing per-cpu buffer size\n");
515 		return -1;
516 	}
517 
518 	if (set_tracing_trace_inherit(ftrace) < 0) {
519 		pr_err("failed to set tracing option function-fork\n");
520 		return -1;
521 	}
522 
523 	if (set_tracing_sleep_time(ftrace) < 0) {
524 		pr_err("failed to set tracing option sleep-time\n");
525 		return -1;
526 	}
527 
528 	if (set_tracing_funcgraph_irqs(ftrace) < 0) {
529 		pr_err("failed to set tracing option funcgraph-irqs\n");
530 		return -1;
531 	}
532 
533 	if (set_tracing_funcgraph_verbose(ftrace) < 0) {
534 		pr_err("failed to set tracing option funcgraph-proc/funcgraph-abstime\n");
535 		return -1;
536 	}
537 
538 	if (set_tracing_thresh(ftrace) < 0) {
539 		pr_err("failed to set tracing thresh\n");
540 		return -1;
541 	}
542 
543 	return 0;
544 }
545 
546 static void select_tracer(struct perf_ftrace *ftrace)
547 {
548 	bool graph = !list_empty(&ftrace->graph_funcs) ||
549 		     !list_empty(&ftrace->nograph_funcs);
550 	bool func = !list_empty(&ftrace->filters) ||
551 		    !list_empty(&ftrace->notrace);
552 
553 	/* The function_graph has priority over function tracer. */
554 	if (graph)
555 		ftrace->tracer = "function_graph";
556 	else if (func)
557 		ftrace->tracer = "function";
558 	/* Otherwise, the default tracer is used. */
559 
560 	pr_debug("%s tracer is used\n", ftrace->tracer);
561 }
562 
563 static int __cmd_ftrace(struct perf_ftrace *ftrace)
564 {
565 	char *trace_file;
566 	int trace_fd;
567 	char buf[4096];
568 	struct pollfd pollfd = {
569 		.events = POLLIN,
570 	};
571 
572 	if (!(perf_cap__capable(CAP_PERFMON) ||
573 	      perf_cap__capable(CAP_SYS_ADMIN))) {
574 		pr_err("ftrace only works for %s!\n",
575 #ifdef HAVE_LIBCAP_SUPPORT
576 		"users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
577 #else
578 		"root"
579 #endif
580 		);
581 		return -1;
582 	}
583 
584 	select_tracer(ftrace);
585 
586 	if (reset_tracing_files(ftrace) < 0) {
587 		pr_err("failed to reset ftrace\n");
588 		goto out;
589 	}
590 
591 	/* reset ftrace buffer */
592 	if (write_tracing_file("trace", "0") < 0)
593 		goto out;
594 
595 	if (set_tracing_options(ftrace) < 0)
596 		goto out_reset;
597 
598 	if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
599 		pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
600 		goto out_reset;
601 	}
602 
603 	setup_pager();
604 
605 	trace_file = get_tracing_file("trace_pipe");
606 	if (!trace_file) {
607 		pr_err("failed to open trace_pipe\n");
608 		goto out_reset;
609 	}
610 
611 	trace_fd = open(trace_file, O_RDONLY);
612 
613 	put_tracing_file(trace_file);
614 
615 	if (trace_fd < 0) {
616 		pr_err("failed to open trace_pipe\n");
617 		goto out_reset;
618 	}
619 
620 	fcntl(trace_fd, F_SETFL, O_NONBLOCK);
621 	pollfd.fd = trace_fd;
622 
623 	/* display column headers */
624 	read_tracing_file_to_stdout("trace");
625 
626 	if (!ftrace->target.initial_delay) {
627 		if (write_tracing_file("tracing_on", "1") < 0) {
628 			pr_err("can't enable tracing\n");
629 			goto out_close_fd;
630 		}
631 	}
632 
633 	evlist__start_workload(ftrace->evlist);
634 
635 	if (ftrace->target.initial_delay > 0) {
636 		usleep(ftrace->target.initial_delay * 1000);
637 		if (write_tracing_file("tracing_on", "1") < 0) {
638 			pr_err("can't enable tracing\n");
639 			goto out_close_fd;
640 		}
641 	}
642 
643 	while (!done) {
644 		if (poll(&pollfd, 1, -1) < 0)
645 			break;
646 
647 		if (pollfd.revents & POLLIN) {
648 			int n = read(trace_fd, buf, sizeof(buf));
649 			if (n < 0)
650 				break;
651 			if (fwrite(buf, n, 1, stdout) != 1)
652 				break;
653 			/* flush output since stdout is in full buffering mode due to pager */
654 			fflush(stdout);
655 		}
656 	}
657 
658 	write_tracing_file("tracing_on", "0");
659 
660 	if (workload_exec_errno) {
661 		const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
662 		/* flush stdout first so below error msg appears at the end. */
663 		fflush(stdout);
664 		pr_err("workload failed: %s\n", emsg);
665 		goto out_close_fd;
666 	}
667 
668 	/* read remaining buffer contents */
669 	while (true) {
670 		int n = read(trace_fd, buf, sizeof(buf));
671 		if (n <= 0)
672 			break;
673 		if (fwrite(buf, n, 1, stdout) != 1)
674 			break;
675 	}
676 
677 out_close_fd:
678 	close(trace_fd);
679 out_reset:
680 	reset_tracing_files(ftrace);
681 out:
682 	return (done && !workload_exec_errno) ? 0 : -1;
683 }
684 
685 static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
686 			   bool use_nsec)
687 {
688 	char *p, *q;
689 	char *unit;
690 	double num;
691 	int i;
692 
693 	/* ensure NUL termination */
694 	buf[len] = '\0';
695 
696 	/* handle data line by line */
697 	for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) {
698 		*q = '\0';
699 		/* move it to the line buffer */
700 		strcat(linebuf, p);
701 
702 		/*
703 		 * parse trace output to get function duration like in
704 		 *
705 		 * # tracer: function_graph
706 		 * #
707 		 * # CPU  DURATION                  FUNCTION CALLS
708 		 * # |     |   |                     |   |   |   |
709 		 *  1) + 10.291 us   |  do_filp_open();
710 		 *  1)   4.889 us    |  do_filp_open();
711 		 *  1)   6.086 us    |  do_filp_open();
712 		 *
713 		 */
714 		if (linebuf[0] == '#')
715 			goto next;
716 
717 		/* ignore CPU */
718 		p = strchr(linebuf, ')');
719 		if (p == NULL)
720 			p = linebuf;
721 
722 		while (*p && !isdigit(*p) && (*p != '|'))
723 			p++;
724 
725 		/* no duration */
726 		if (*p == '\0' || *p == '|')
727 			goto next;
728 
729 		num = strtod(p, &unit);
730 		if (!unit || strncmp(unit, " us", 3))
731 			goto next;
732 
733 		if (use_nsec)
734 			num *= 1000;
735 
736 		i = log2(num);
737 		if (i < 0)
738 			i = 0;
739 		if (i >= NUM_BUCKET)
740 			i = NUM_BUCKET - 1;
741 
742 		buckets[i]++;
743 
744 next:
745 		/* empty the line buffer for the next output  */
746 		linebuf[0] = '\0';
747 	}
748 
749 	/* preserve any remaining output (before newline) */
750 	strcat(linebuf, p);
751 }
752 
753 static void display_histogram(int buckets[], bool use_nsec)
754 {
755 	int i;
756 	int total = 0;
757 	int bar_total = 46;  /* to fit in 80 column */
758 	char bar[] = "###############################################";
759 	int bar_len;
760 
761 	for (i = 0; i < NUM_BUCKET; i++)
762 		total += buckets[i];
763 
764 	if (total == 0) {
765 		printf("No data found\n");
766 		return;
767 	}
768 
769 	printf("# %14s | %10s | %-*s |\n",
770 	       "  DURATION    ", "COUNT", bar_total, "GRAPH");
771 
772 	bar_len = buckets[0] * bar_total / total;
773 	printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
774 	       0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
775 
776 	for (i = 1; i < NUM_BUCKET - 1; i++) {
777 		int start = (1 << (i - 1));
778 		int stop = 1 << i;
779 		const char *unit = use_nsec ? "ns" : "us";
780 
781 		if (start >= 1024) {
782 			start >>= 10;
783 			stop >>= 10;
784 			unit = use_nsec ? "us" : "ms";
785 		}
786 		bar_len = buckets[i] * bar_total / total;
787 		printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
788 		       start, stop, unit, buckets[i], bar_len, bar,
789 		       bar_total - bar_len, "");
790 	}
791 
792 	bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
793 	printf("  %4d - %-4s %s | %10d | %.*s%*s |\n",
794 	       1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
795 	       bar_len, bar, bar_total - bar_len, "");
796 
797 }
798 
799 static int prepare_func_latency(struct perf_ftrace *ftrace)
800 {
801 	char *trace_file;
802 	int fd;
803 
804 	if (ftrace->target.use_bpf)
805 		return perf_ftrace__latency_prepare_bpf(ftrace);
806 
807 	if (reset_tracing_files(ftrace) < 0) {
808 		pr_err("failed to reset ftrace\n");
809 		return -1;
810 	}
811 
812 	/* reset ftrace buffer */
813 	if (write_tracing_file("trace", "0") < 0)
814 		return -1;
815 
816 	if (set_tracing_options(ftrace) < 0)
817 		return -1;
818 
819 	/* force to use the function_graph tracer to track duration */
820 	if (write_tracing_file("current_tracer", "function_graph") < 0) {
821 		pr_err("failed to set current_tracer to function_graph\n");
822 		return -1;
823 	}
824 
825 	trace_file = get_tracing_file("trace_pipe");
826 	if (!trace_file) {
827 		pr_err("failed to open trace_pipe\n");
828 		return -1;
829 	}
830 
831 	fd = open(trace_file, O_RDONLY);
832 	if (fd < 0)
833 		pr_err("failed to open trace_pipe\n");
834 
835 	put_tracing_file(trace_file);
836 	return fd;
837 }
838 
839 static int start_func_latency(struct perf_ftrace *ftrace)
840 {
841 	if (ftrace->target.use_bpf)
842 		return perf_ftrace__latency_start_bpf(ftrace);
843 
844 	if (write_tracing_file("tracing_on", "1") < 0) {
845 		pr_err("can't enable tracing\n");
846 		return -1;
847 	}
848 
849 	return 0;
850 }
851 
852 static int stop_func_latency(struct perf_ftrace *ftrace)
853 {
854 	if (ftrace->target.use_bpf)
855 		return perf_ftrace__latency_stop_bpf(ftrace);
856 
857 	write_tracing_file("tracing_on", "0");
858 	return 0;
859 }
860 
861 static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
862 {
863 	if (ftrace->target.use_bpf)
864 		return perf_ftrace__latency_read_bpf(ftrace, buckets);
865 
866 	return 0;
867 }
868 
869 static int cleanup_func_latency(struct perf_ftrace *ftrace)
870 {
871 	if (ftrace->target.use_bpf)
872 		return perf_ftrace__latency_cleanup_bpf(ftrace);
873 
874 	reset_tracing_files(ftrace);
875 	return 0;
876 }
877 
878 static int __cmd_latency(struct perf_ftrace *ftrace)
879 {
880 	int trace_fd;
881 	char buf[4096];
882 	char line[256];
883 	struct pollfd pollfd = {
884 		.events = POLLIN,
885 	};
886 	int buckets[NUM_BUCKET] = { };
887 
888 	if (!(perf_cap__capable(CAP_PERFMON) ||
889 	      perf_cap__capable(CAP_SYS_ADMIN))) {
890 		pr_err("ftrace only works for %s!\n",
891 #ifdef HAVE_LIBCAP_SUPPORT
892 		"users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
893 #else
894 		"root"
895 #endif
896 		);
897 		return -1;
898 	}
899 
900 	trace_fd = prepare_func_latency(ftrace);
901 	if (trace_fd < 0)
902 		goto out;
903 
904 	fcntl(trace_fd, F_SETFL, O_NONBLOCK);
905 	pollfd.fd = trace_fd;
906 
907 	if (start_func_latency(ftrace) < 0)
908 		goto out;
909 
910 	evlist__start_workload(ftrace->evlist);
911 
912 	line[0] = '\0';
913 	while (!done) {
914 		if (poll(&pollfd, 1, -1) < 0)
915 			break;
916 
917 		if (pollfd.revents & POLLIN) {
918 			int n = read(trace_fd, buf, sizeof(buf) - 1);
919 			if (n < 0)
920 				break;
921 
922 			make_histogram(buckets, buf, n, line, ftrace->use_nsec);
923 		}
924 	}
925 
926 	stop_func_latency(ftrace);
927 
928 	if (workload_exec_errno) {
929 		const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
930 		pr_err("workload failed: %s\n", emsg);
931 		goto out;
932 	}
933 
934 	/* read remaining buffer contents */
935 	while (!ftrace->target.use_bpf) {
936 		int n = read(trace_fd, buf, sizeof(buf) - 1);
937 		if (n <= 0)
938 			break;
939 		make_histogram(buckets, buf, n, line, ftrace->use_nsec);
940 	}
941 
942 	read_func_latency(ftrace, buckets);
943 
944 	display_histogram(buckets, ftrace->use_nsec);
945 
946 out:
947 	close(trace_fd);
948 	cleanup_func_latency(ftrace);
949 
950 	return (done && !workload_exec_errno) ? 0 : -1;
951 }
952 
953 static int perf_ftrace_config(const char *var, const char *value, void *cb)
954 {
955 	struct perf_ftrace *ftrace = cb;
956 
957 	if (!strstarts(var, "ftrace."))
958 		return 0;
959 
960 	if (strcmp(var, "ftrace.tracer"))
961 		return -1;
962 
963 	if (!strcmp(value, "function_graph") ||
964 	    !strcmp(value, "function")) {
965 		ftrace->tracer = value;
966 		return 0;
967 	}
968 
969 	pr_err("Please select \"function_graph\" (default) or \"function\"\n");
970 	return -1;
971 }
972 
973 static void list_function_cb(char *str, void *arg)
974 {
975 	struct strfilter *filter = (struct strfilter *)arg;
976 
977 	if (strfilter__compare(filter, str))
978 		printf("%s", str);
979 }
980 
981 static int opt_list_avail_functions(const struct option *opt __maybe_unused,
982 				    const char *str, int unset)
983 {
984 	struct strfilter *filter;
985 	const char *err = NULL;
986 	int ret;
987 
988 	if (unset || !str)
989 		return -1;
990 
991 	filter = strfilter__new(str, &err);
992 	if (!filter)
993 		return err ? -EINVAL : -ENOMEM;
994 
995 	ret = strfilter__or(filter, str, &err);
996 	if (ret == -EINVAL) {
997 		pr_err("Filter parse error at %td.\n", err - str + 1);
998 		pr_err("Source: \"%s\"\n", str);
999 		pr_err("         %*c\n", (int)(err - str + 1), '^');
1000 		strfilter__delete(filter);
1001 		return ret;
1002 	}
1003 
1004 	ret = read_tracing_file_by_line("available_filter_functions",
1005 					list_function_cb, filter);
1006 	strfilter__delete(filter);
1007 	if (ret < 0)
1008 		return ret;
1009 
1010 	exit(0);
1011 }
1012 
1013 static int parse_filter_func(const struct option *opt, const char *str,
1014 			     int unset __maybe_unused)
1015 {
1016 	struct list_head *head = opt->value;
1017 	struct filter_entry *entry;
1018 
1019 	entry = malloc(sizeof(*entry) + strlen(str) + 1);
1020 	if (entry == NULL)
1021 		return -ENOMEM;
1022 
1023 	strcpy(entry->name, str);
1024 	list_add_tail(&entry->list, head);
1025 
1026 	return 0;
1027 }
1028 
1029 static void delete_filter_func(struct list_head *head)
1030 {
1031 	struct filter_entry *pos, *tmp;
1032 
1033 	list_for_each_entry_safe(pos, tmp, head, list) {
1034 		list_del_init(&pos->list);
1035 		free(pos);
1036 	}
1037 }
1038 
1039 static int parse_buffer_size(const struct option *opt,
1040 			     const char *str, int unset)
1041 {
1042 	unsigned long *s = (unsigned long *)opt->value;
1043 	static struct parse_tag tags_size[] = {
1044 		{ .tag  = 'B', .mult = 1       },
1045 		{ .tag  = 'K', .mult = 1 << 10 },
1046 		{ .tag  = 'M', .mult = 1 << 20 },
1047 		{ .tag  = 'G', .mult = 1 << 30 },
1048 		{ .tag  = 0 },
1049 	};
1050 	unsigned long val;
1051 
1052 	if (unset) {
1053 		*s = 0;
1054 		return 0;
1055 	}
1056 
1057 	val = parse_tag_value(str, tags_size);
1058 	if (val != (unsigned long) -1) {
1059 		if (val < 1024) {
1060 			pr_err("buffer size too small, must larger than 1KB.");
1061 			return -1;
1062 		}
1063 		*s = val;
1064 		return 0;
1065 	}
1066 
1067 	return -1;
1068 }
1069 
1070 static int parse_func_tracer_opts(const struct option *opt,
1071 				  const char *str, int unset)
1072 {
1073 	int ret;
1074 	struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value;
1075 	struct sublevel_option func_tracer_opts[] = {
1076 		{ .name = "call-graph",	.value_ptr = &ftrace->func_stack_trace },
1077 		{ .name = "irq-info",	.value_ptr = &ftrace->func_irq_info },
1078 		{ .name = NULL, }
1079 	};
1080 
1081 	if (unset)
1082 		return 0;
1083 
1084 	ret = perf_parse_sublevel_options(str, func_tracer_opts);
1085 	if (ret)
1086 		return ret;
1087 
1088 	return 0;
1089 }
1090 
1091 static int parse_graph_tracer_opts(const struct option *opt,
1092 				  const char *str, int unset)
1093 {
1094 	int ret;
1095 	struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value;
1096 	struct sublevel_option graph_tracer_opts[] = {
1097 		{ .name = "nosleep-time",	.value_ptr = &ftrace->graph_nosleep_time },
1098 		{ .name = "noirqs",		.value_ptr = &ftrace->graph_noirqs },
1099 		{ .name = "verbose",		.value_ptr = &ftrace->graph_verbose },
1100 		{ .name = "thresh",		.value_ptr = &ftrace->graph_thresh },
1101 		{ .name = "depth",		.value_ptr = &ftrace->graph_depth },
1102 		{ .name = NULL, }
1103 	};
1104 
1105 	if (unset)
1106 		return 0;
1107 
1108 	ret = perf_parse_sublevel_options(str, graph_tracer_opts);
1109 	if (ret)
1110 		return ret;
1111 
1112 	return 0;
1113 }
1114 
1115 enum perf_ftrace_subcommand {
1116 	PERF_FTRACE_NONE,
1117 	PERF_FTRACE_TRACE,
1118 	PERF_FTRACE_LATENCY,
1119 };
1120 
1121 int cmd_ftrace(int argc, const char **argv)
1122 {
1123 	int ret;
1124 	int (*cmd_func)(struct perf_ftrace *) = NULL;
1125 	struct perf_ftrace ftrace = {
1126 		.tracer = DEFAULT_TRACER,
1127 		.target = { .uid = UINT_MAX, },
1128 	};
1129 	const struct option common_options[] = {
1130 	OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
1131 		   "Trace on existing process id"),
1132 	/* TODO: Add short option -t after -t/--tracer can be removed. */
1133 	OPT_STRING(0, "tid", &ftrace.target.tid, "tid",
1134 		   "Trace on existing thread id (exclusive to --pid)"),
1135 	OPT_INCR('v', "verbose", &verbose,
1136 		 "Be more verbose"),
1137 	OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide,
1138 		    "System-wide collection from all CPUs"),
1139 	OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
1140 		    "List of cpus to monitor"),
1141 	OPT_END()
1142 	};
1143 	const struct option ftrace_options[] = {
1144 	OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
1145 		   "Tracer to use: function_graph(default) or function"),
1146 	OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
1147 			     "Show available functions to filter",
1148 			     opt_list_avail_functions, "*"),
1149 	OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
1150 		     "Trace given functions using function tracer",
1151 		     parse_filter_func),
1152 	OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
1153 		     "Do not trace given functions", parse_filter_func),
1154 	OPT_CALLBACK(0, "func-opts", &ftrace, "options",
1155 		     "Function tracer options, available options: call-graph,irq-info",
1156 		     parse_func_tracer_opts),
1157 	OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
1158 		     "Trace given functions using function_graph tracer",
1159 		     parse_filter_func),
1160 	OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
1161 		     "Set nograph filter on given functions", parse_filter_func),
1162 	OPT_CALLBACK(0, "graph-opts", &ftrace, "options",
1163 		     "Graph tracer options, available options: nosleep-time,noirqs,verbose,thresh=<n>,depth=<n>",
1164 		     parse_graph_tracer_opts),
1165 	OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size",
1166 		     "Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size),
1167 	OPT_BOOLEAN(0, "inherit", &ftrace.inherit,
1168 		    "Trace children processes"),
1169 	OPT_INTEGER('D', "delay", &ftrace.target.initial_delay,
1170 		    "Number of milliseconds to wait before starting tracing after program start"),
1171 	OPT_PARENT(common_options),
1172 	};
1173 	const struct option latency_options[] = {
1174 	OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
1175 		     "Show latency of given function", parse_filter_func),
1176 #ifdef HAVE_BPF_SKEL
1177 	OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
1178 		    "Use BPF to measure function latency"),
1179 #endif
1180 	OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec,
1181 		    "Use nano-second histogram"),
1182 	OPT_PARENT(common_options),
1183 	};
1184 	const struct option *options = ftrace_options;
1185 
1186 	const char * const ftrace_usage[] = {
1187 		"perf ftrace [<options>] [<command>]",
1188 		"perf ftrace [<options>] -- [<command>] [<options>]",
1189 		"perf ftrace {trace|latency} [<options>] [<command>]",
1190 		"perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
1191 		NULL
1192 	};
1193 	enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
1194 
1195 	INIT_LIST_HEAD(&ftrace.filters);
1196 	INIT_LIST_HEAD(&ftrace.notrace);
1197 	INIT_LIST_HEAD(&ftrace.graph_funcs);
1198 	INIT_LIST_HEAD(&ftrace.nograph_funcs);
1199 
1200 	signal(SIGINT, sig_handler);
1201 	signal(SIGUSR1, sig_handler);
1202 	signal(SIGCHLD, sig_handler);
1203 	signal(SIGPIPE, sig_handler);
1204 
1205 	ret = perf_config(perf_ftrace_config, &ftrace);
1206 	if (ret < 0)
1207 		return -1;
1208 
1209 	if (argc > 1) {
1210 		if (!strcmp(argv[1], "trace")) {
1211 			subcmd = PERF_FTRACE_TRACE;
1212 		} else if (!strcmp(argv[1], "latency")) {
1213 			subcmd = PERF_FTRACE_LATENCY;
1214 			options = latency_options;
1215 		}
1216 
1217 		if (subcmd != PERF_FTRACE_NONE) {
1218 			argc--;
1219 			argv++;
1220 		}
1221 	}
1222 	/* for backward compatibility */
1223 	if (subcmd == PERF_FTRACE_NONE)
1224 		subcmd = PERF_FTRACE_TRACE;
1225 
1226 	argc = parse_options(argc, argv, options, ftrace_usage,
1227 			    PARSE_OPT_STOP_AT_NON_OPTION);
1228 	if (argc < 0) {
1229 		ret = -EINVAL;
1230 		goto out_delete_filters;
1231 	}
1232 
1233 	/* Make system wide (-a) the default target. */
1234 	if (!argc && target__none(&ftrace.target))
1235 		ftrace.target.system_wide = true;
1236 
1237 	switch (subcmd) {
1238 	case PERF_FTRACE_TRACE:
1239 		cmd_func = __cmd_ftrace;
1240 		break;
1241 	case PERF_FTRACE_LATENCY:
1242 		if (list_empty(&ftrace.filters)) {
1243 			pr_err("Should provide a function to measure\n");
1244 			parse_options_usage(ftrace_usage, options, "T", 1);
1245 			ret = -EINVAL;
1246 			goto out_delete_filters;
1247 		}
1248 		cmd_func = __cmd_latency;
1249 		break;
1250 	case PERF_FTRACE_NONE:
1251 	default:
1252 		pr_err("Invalid subcommand\n");
1253 		ret = -EINVAL;
1254 		goto out_delete_filters;
1255 	}
1256 
1257 	ret = target__validate(&ftrace.target);
1258 	if (ret) {
1259 		char errbuf[512];
1260 
1261 		target__strerror(&ftrace.target, ret, errbuf, 512);
1262 		pr_err("%s\n", errbuf);
1263 		goto out_delete_filters;
1264 	}
1265 
1266 	ftrace.evlist = evlist__new();
1267 	if (ftrace.evlist == NULL) {
1268 		ret = -ENOMEM;
1269 		goto out_delete_filters;
1270 	}
1271 
1272 	ret = evlist__create_maps(ftrace.evlist, &ftrace.target);
1273 	if (ret < 0)
1274 		goto out_delete_evlist;
1275 
1276 	if (argc) {
1277 		ret = evlist__prepare_workload(ftrace.evlist, &ftrace.target,
1278 					       argv, false,
1279 					       ftrace__workload_exec_failed_signal);
1280 		if (ret < 0)
1281 			goto out_delete_evlist;
1282 	}
1283 
1284 	ret = cmd_func(&ftrace);
1285 
1286 out_delete_evlist:
1287 	evlist__delete(ftrace.evlist);
1288 
1289 out_delete_filters:
1290 	delete_filter_func(&ftrace.filters);
1291 	delete_filter_func(&ftrace.notrace);
1292 	delete_filter_func(&ftrace.graph_funcs);
1293 	delete_filter_func(&ftrace.nograph_funcs);
1294 
1295 	return ret;
1296 }
1297