xref: /openbmc/linux/tools/perf/builtin-stat.c (revision 4464005a12b5c79e1a364e6272ee10a83413f928)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * builtin-stat.c
4  *
5  * Builtin stat command: Give a precise performance counters summary
6  * overview about any workload, CPU or specific PID.
7  *
8  * Sample output:
9 
10    $ perf stat ./hackbench 10
11 
12   Time: 0.118
13 
14   Performance counter stats for './hackbench 10':
15 
16        1708.761321 task-clock                #   11.037 CPUs utilized
17             41,190 context-switches          #    0.024 M/sec
18              6,735 CPU-migrations            #    0.004 M/sec
19             17,318 page-faults               #    0.010 M/sec
20      5,205,202,243 cycles                    #    3.046 GHz
21      3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
22      1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
23      2,603,501,247 instructions              #    0.50  insns per cycle
24                                              #    1.48  stalled cycles per insn
25        484,357,498 branches                  #  283.455 M/sec
26          6,388,934 branch-misses             #    1.32% of all branches
27 
28         0.154822978  seconds time elapsed
29 
30  *
31  * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
32  *
33  * Improvements and fixes by:
34  *
35  *   Arjan van de Ven <arjan@linux.intel.com>
36  *   Yanmin Zhang <yanmin.zhang@intel.com>
37  *   Wu Fengguang <fengguang.wu@intel.com>
38  *   Mike Galbraith <efault@gmx.de>
39  *   Paul Mackerras <paulus@samba.org>
40  *   Jaswinder Singh Rajput <jaswinder@kernel.org>
41  */
42 
43 #include "builtin.h"
44 #include "perf.h"
45 #include "util/cgroup.h"
46 #include <subcmd/parse-options.h>
47 #include "util/parse-events.h"
48 #include "util/pmu.h"
49 #include "util/event.h"
50 #include "util/evlist.h"
51 #include "util/evsel.h"
52 #include "util/debug.h"
53 #include "util/color.h"
54 #include "util/stat.h"
55 #include "util/header.h"
56 #include "util/cpumap.h"
57 #include "util/thread_map.h"
58 #include "util/counts.h"
59 #include "util/group.h"
60 #include "util/session.h"
61 #include "util/tool.h"
62 #include "util/string2.h"
63 #include "util/metricgroup.h"
64 #include "util/synthetic-events.h"
65 #include "util/target.h"
66 #include "util/time-utils.h"
67 #include "util/top.h"
68 #include "util/affinity.h"
69 #include "util/pfm.h"
70 #include "asm/bug.h"
71 
72 #include <linux/time64.h>
73 #include <linux/zalloc.h>
74 #include <api/fs/fs.h>
75 #include <errno.h>
76 #include <signal.h>
77 #include <stdlib.h>
78 #include <sys/prctl.h>
79 #include <inttypes.h>
80 #include <locale.h>
81 #include <math.h>
82 #include <sys/types.h>
83 #include <sys/stat.h>
84 #include <sys/wait.h>
85 #include <unistd.h>
86 #include <sys/time.h>
87 #include <sys/resource.h>
88 #include <linux/err.h>
89 
90 #include <linux/ctype.h>
91 #include <perf/evlist.h>
92 
93 #define DEFAULT_SEPARATOR	" "
94 #define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
95 
96 static void print_counters(struct timespec *ts, int argc, const char **argv);
97 
98 /* Default events used for perf stat -T */
99 static const char *transaction_attrs = {
100 	"task-clock,"
101 	"{"
102 	"instructions,"
103 	"cycles,"
104 	"cpu/cycles-t/,"
105 	"cpu/tx-start/,"
106 	"cpu/el-start/,"
107 	"cpu/cycles-ct/"
108 	"}"
109 };
110 
111 /* More limited version when the CPU does not have all events. */
112 static const char * transaction_limited_attrs = {
113 	"task-clock,"
114 	"{"
115 	"instructions,"
116 	"cycles,"
117 	"cpu/cycles-t/,"
118 	"cpu/tx-start/"
119 	"}"
120 };
121 
122 static const char * topdown_attrs[] = {
123 	"topdown-total-slots",
124 	"topdown-slots-retired",
125 	"topdown-recovery-bubbles",
126 	"topdown-fetch-bubbles",
127 	"topdown-slots-issued",
128 	NULL,
129 };
130 
131 static const char *smi_cost_attrs = {
132 	"{"
133 	"msr/aperf/,"
134 	"msr/smi/,"
135 	"cycles"
136 	"}"
137 };
138 
139 static struct evlist	*evsel_list;
140 
141 static struct target target = {
142 	.uid	= UINT_MAX,
143 };
144 
145 #define METRIC_ONLY_LEN 20
146 
147 static volatile pid_t		child_pid			= -1;
148 static int			detailed_run			=  0;
149 static bool			transaction_run;
150 static bool			topdown_run			= false;
151 static bool			smi_cost			= false;
152 static bool			smi_reset			= false;
153 static int			big_num_opt			=  -1;
154 static bool			group				= false;
155 static const char		*pre_cmd			= NULL;
156 static const char		*post_cmd			= NULL;
157 static bool			sync_run			= false;
158 static bool			forever				= false;
159 static bool			force_metric_only		= false;
160 static struct timespec		ref_time;
161 static bool			append_file;
162 static bool			interval_count;
163 static const char		*output_name;
164 static int			output_fd;
165 
166 struct perf_stat {
167 	bool			 record;
168 	struct perf_data	 data;
169 	struct perf_session	*session;
170 	u64			 bytes_written;
171 	struct perf_tool	 tool;
172 	bool			 maps_allocated;
173 	struct perf_cpu_map	*cpus;
174 	struct perf_thread_map *threads;
175 	enum aggr_mode		 aggr_mode;
176 };
177 
178 static struct perf_stat		perf_stat;
179 #define STAT_RECORD		perf_stat.record
180 
181 static volatile int done = 0;
182 
183 static struct perf_stat_config stat_config = {
184 	.aggr_mode		= AGGR_GLOBAL,
185 	.scale			= true,
186 	.unit_width		= 4, /* strlen("unit") */
187 	.run_count		= 1,
188 	.metric_only_len	= METRIC_ONLY_LEN,
189 	.walltime_nsecs_stats	= &walltime_nsecs_stats,
190 	.big_num		= true,
191 };
192 
193 static bool cpus_map_matched(struct evsel *a, struct evsel *b)
194 {
195 	if (!a->core.cpus && !b->core.cpus)
196 		return true;
197 
198 	if (!a->core.cpus || !b->core.cpus)
199 		return false;
200 
201 	if (a->core.cpus->nr != b->core.cpus->nr)
202 		return false;
203 
204 	for (int i = 0; i < a->core.cpus->nr; i++) {
205 		if (a->core.cpus->map[i] != b->core.cpus->map[i])
206 			return false;
207 	}
208 
209 	return true;
210 }
211 
212 static void evlist__check_cpu_maps(struct evlist *evlist)
213 {
214 	struct evsel *evsel, *pos, *leader;
215 	char buf[1024];
216 
217 	evlist__for_each_entry(evlist, evsel) {
218 		leader = evsel->leader;
219 
220 		/* Check that leader matches cpus with each member. */
221 		if (leader == evsel)
222 			continue;
223 		if (cpus_map_matched(leader, evsel))
224 			continue;
225 
226 		/* If there's mismatch disable the group and warn user. */
227 		WARN_ONCE(1, "WARNING: grouped events cpus do not match, disabling group:\n");
228 		evsel__group_desc(leader, buf, sizeof(buf));
229 		pr_warning("  %s\n", buf);
230 
231 		if (verbose) {
232 			cpu_map__snprint(leader->core.cpus, buf, sizeof(buf));
233 			pr_warning("     %s: %s\n", leader->name, buf);
234 			cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf));
235 			pr_warning("     %s: %s\n", evsel->name, buf);
236 		}
237 
238 		for_each_group_evsel(pos, leader) {
239 			pos->leader = pos;
240 			pos->core.nr_members = 0;
241 		}
242 		evsel->leader->core.nr_members = 0;
243 	}
244 }
245 
246 static inline void diff_timespec(struct timespec *r, struct timespec *a,
247 				 struct timespec *b)
248 {
249 	r->tv_sec = a->tv_sec - b->tv_sec;
250 	if (a->tv_nsec < b->tv_nsec) {
251 		r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
252 		r->tv_sec--;
253 	} else {
254 		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
255 	}
256 }
257 
258 static void perf_stat__reset_stats(void)
259 {
260 	int i;
261 
262 	perf_evlist__reset_stats(evsel_list);
263 	perf_stat__reset_shadow_stats();
264 
265 	for (i = 0; i < stat_config.stats_num; i++)
266 		perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
267 }
268 
269 static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
270 				     union perf_event *event,
271 				     struct perf_sample *sample __maybe_unused,
272 				     struct machine *machine __maybe_unused)
273 {
274 	if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
275 		pr_err("failed to write perf data, error: %m\n");
276 		return -1;
277 	}
278 
279 	perf_stat.bytes_written += event->header.size;
280 	return 0;
281 }
282 
283 static int write_stat_round_event(u64 tm, u64 type)
284 {
285 	return perf_event__synthesize_stat_round(NULL, tm, type,
286 						 process_synthesized_event,
287 						 NULL);
288 }
289 
290 #define WRITE_STAT_ROUND_EVENT(time, interval) \
291 	write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
292 
293 #define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
294 
295 static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
296 				   struct perf_counts_values *count)
297 {
298 	struct perf_sample_id *sid = SID(counter, cpu, thread);
299 
300 	return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
301 					   process_synthesized_event, NULL);
302 }
303 
304 static int read_single_counter(struct evsel *counter, int cpu,
305 			       int thread, struct timespec *rs)
306 {
307 	if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
308 		u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
309 		struct perf_counts_values *count =
310 			perf_counts(counter->counts, cpu, thread);
311 		count->ena = count->run = val;
312 		count->val = val;
313 		return 0;
314 	}
315 	return evsel__read_counter(counter, cpu, thread);
316 }
317 
318 /*
319  * Read out the results of a single counter:
320  * do not aggregate counts across CPUs in system-wide mode
321  */
322 static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
323 {
324 	int nthreads = perf_thread_map__nr(evsel_list->core.threads);
325 	int thread;
326 
327 	if (!counter->supported)
328 		return -ENOENT;
329 
330 	if (counter->core.system_wide)
331 		nthreads = 1;
332 
333 	for (thread = 0; thread < nthreads; thread++) {
334 		struct perf_counts_values *count;
335 
336 		count = perf_counts(counter->counts, cpu, thread);
337 
338 		/*
339 		 * The leader's group read loads data into its group members
340 		 * (via evsel__read_counter()) and sets their count->loaded.
341 		 */
342 		if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
343 		    read_single_counter(counter, cpu, thread, rs)) {
344 			counter->counts->scaled = -1;
345 			perf_counts(counter->counts, cpu, thread)->ena = 0;
346 			perf_counts(counter->counts, cpu, thread)->run = 0;
347 			return -1;
348 		}
349 
350 		perf_counts__set_loaded(counter->counts, cpu, thread, false);
351 
352 		if (STAT_RECORD) {
353 			if (evsel__write_stat_event(counter, cpu, thread, count)) {
354 				pr_err("failed to write stat event\n");
355 				return -1;
356 			}
357 		}
358 
359 		if (verbose > 1) {
360 			fprintf(stat_config.output,
361 				"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
362 					evsel__name(counter),
363 					cpu,
364 					count->val, count->ena, count->run);
365 		}
366 	}
367 
368 	return 0;
369 }
370 
371 static int read_affinity_counters(struct timespec *rs)
372 {
373 	struct evsel *counter;
374 	struct affinity affinity;
375 	int i, ncpus, cpu;
376 
377 	if (affinity__setup(&affinity) < 0)
378 		return -1;
379 
380 	ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
381 	if (!target__has_cpu(&target) || target__has_per_thread(&target))
382 		ncpus = 1;
383 	evlist__for_each_cpu(evsel_list, i, cpu) {
384 		if (i >= ncpus)
385 			break;
386 		affinity__set(&affinity, cpu);
387 
388 		evlist__for_each_entry(evsel_list, counter) {
389 			if (evsel__cpu_iter_skip(counter, cpu))
390 				continue;
391 			if (!counter->err) {
392 				counter->err = read_counter_cpu(counter, rs,
393 								counter->cpu_iter - 1);
394 			}
395 		}
396 	}
397 	affinity__cleanup(&affinity);
398 	return 0;
399 }
400 
401 static void read_counters(struct timespec *rs)
402 {
403 	struct evsel *counter;
404 
405 	if (!stat_config.summary && (read_affinity_counters(rs) < 0))
406 		return;
407 
408 	evlist__for_each_entry(evsel_list, counter) {
409 		if (counter->err)
410 			pr_debug("failed to read counter %s\n", counter->name);
411 		if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
412 			pr_warning("failed to process counter %s\n", counter->name);
413 		counter->err = 0;
414 	}
415 }
416 
417 static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
418 {
419 	int i;
420 
421 	config->stats = calloc(nthreads, sizeof(struct runtime_stat));
422 	if (!config->stats)
423 		return -1;
424 
425 	config->stats_num = nthreads;
426 
427 	for (i = 0; i < nthreads; i++)
428 		runtime_stat__init(&config->stats[i]);
429 
430 	return 0;
431 }
432 
433 static void runtime_stat_delete(struct perf_stat_config *config)
434 {
435 	int i;
436 
437 	if (!config->stats)
438 		return;
439 
440 	for (i = 0; i < config->stats_num; i++)
441 		runtime_stat__exit(&config->stats[i]);
442 
443 	zfree(&config->stats);
444 }
445 
446 static void runtime_stat_reset(struct perf_stat_config *config)
447 {
448 	int i;
449 
450 	if (!config->stats)
451 		return;
452 
453 	for (i = 0; i < config->stats_num; i++)
454 		perf_stat__reset_shadow_per_stat(&config->stats[i]);
455 }
456 
457 static void process_interval(void)
458 {
459 	struct timespec ts, rs;
460 
461 	clock_gettime(CLOCK_MONOTONIC, &ts);
462 	diff_timespec(&rs, &ts, &ref_time);
463 
464 	perf_stat__reset_shadow_per_stat(&rt_stat);
465 	runtime_stat_reset(&stat_config);
466 	read_counters(&rs);
467 
468 	if (STAT_RECORD) {
469 		if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
470 			pr_err("failed to write stat round event\n");
471 	}
472 
473 	init_stats(&walltime_nsecs_stats);
474 	update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL);
475 	print_counters(&rs, 0, NULL);
476 }
477 
478 static void enable_counters(void)
479 {
480 	if (stat_config.initial_delay)
481 		usleep(stat_config.initial_delay * USEC_PER_MSEC);
482 
483 	/*
484 	 * We need to enable counters only if:
485 	 * - we don't have tracee (attaching to task or cpu)
486 	 * - we have initial delay configured
487 	 */
488 	if (!target__none(&target) || stat_config.initial_delay)
489 		evlist__enable(evsel_list);
490 }
491 
492 static void disable_counters(void)
493 {
494 	/*
495 	 * If we don't have tracee (attaching to task or cpu), counters may
496 	 * still be running. To get accurate group ratios, we must stop groups
497 	 * from counting before reading their constituent counters.
498 	 */
499 	if (!target__none(&target))
500 		evlist__disable(evsel_list);
501 }
502 
503 static volatile int workload_exec_errno;
504 
505 /*
506  * perf_evlist__prepare_workload will send a SIGUSR1
507  * if the fork fails, since we asked by setting its
508  * want_signal to true.
509  */
510 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
511 					void *ucontext __maybe_unused)
512 {
513 	workload_exec_errno = info->si_value.sival_int;
514 }
515 
516 static bool evsel__should_store_id(struct evsel *counter)
517 {
518 	return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID;
519 }
520 
521 static bool is_target_alive(struct target *_target,
522 			    struct perf_thread_map *threads)
523 {
524 	struct stat st;
525 	int i;
526 
527 	if (!target__has_task(_target))
528 		return true;
529 
530 	for (i = 0; i < threads->nr; i++) {
531 		char path[PATH_MAX];
532 
533 		scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
534 			  threads->map[i].pid);
535 
536 		if (!stat(path, &st))
537 			return true;
538 	}
539 
540 	return false;
541 }
542 
543 enum counter_recovery {
544 	COUNTER_SKIP,
545 	COUNTER_RETRY,
546 	COUNTER_FATAL,
547 };
548 
549 static enum counter_recovery stat_handle_error(struct evsel *counter)
550 {
551 	char msg[BUFSIZ];
552 	/*
553 	 * PPC returns ENXIO for HW counters until 2.6.37
554 	 * (behavior changed with commit b0a873e).
555 	 */
556 	if (errno == EINVAL || errno == ENOSYS ||
557 	    errno == ENOENT || errno == EOPNOTSUPP ||
558 	    errno == ENXIO) {
559 		if (verbose > 0)
560 			ui__warning("%s event is not supported by the kernel.\n",
561 				    evsel__name(counter));
562 		counter->supported = false;
563 		/*
564 		 * errored is a sticky flag that means one of the counter's
565 		 * cpu event had a problem and needs to be reexamined.
566 		 */
567 		counter->errored = true;
568 
569 		if ((counter->leader != counter) ||
570 		    !(counter->leader->core.nr_members > 1))
571 			return COUNTER_SKIP;
572 	} else if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
573 		if (verbose > 0)
574 			ui__warning("%s\n", msg);
575 		return COUNTER_RETRY;
576 	} else if (target__has_per_thread(&target) &&
577 		   evsel_list->core.threads &&
578 		   evsel_list->core.threads->err_thread != -1) {
579 		/*
580 		 * For global --per-thread case, skip current
581 		 * error thread.
582 		 */
583 		if (!thread_map__remove(evsel_list->core.threads,
584 					evsel_list->core.threads->err_thread)) {
585 			evsel_list->core.threads->err_thread = -1;
586 			return COUNTER_RETRY;
587 		}
588 	}
589 
590 	evsel__open_strerror(counter, &target, errno, msg, sizeof(msg));
591 	ui__error("%s\n", msg);
592 
593 	if (child_pid != -1)
594 		kill(child_pid, SIGTERM);
595 	return COUNTER_FATAL;
596 }
597 
598 static int __run_perf_stat(int argc, const char **argv, int run_idx)
599 {
600 	int interval = stat_config.interval;
601 	int times = stat_config.times;
602 	int timeout = stat_config.timeout;
603 	char msg[BUFSIZ];
604 	unsigned long long t0, t1;
605 	struct evsel *counter;
606 	struct timespec ts;
607 	size_t l;
608 	int status = 0;
609 	const bool forks = (argc > 0);
610 	bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
611 	struct affinity affinity;
612 	int i, cpu;
613 	bool second_pass = false;
614 
615 	if (interval) {
616 		ts.tv_sec  = interval / USEC_PER_MSEC;
617 		ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
618 	} else if (timeout) {
619 		ts.tv_sec  = timeout / USEC_PER_MSEC;
620 		ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
621 	} else {
622 		ts.tv_sec  = 1;
623 		ts.tv_nsec = 0;
624 	}
625 
626 	if (forks) {
627 		if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
628 						  workload_exec_failed_signal) < 0) {
629 			perror("failed to prepare workload");
630 			return -1;
631 		}
632 		child_pid = evsel_list->workload.pid;
633 	}
634 
635 	if (group)
636 		perf_evlist__set_leader(evsel_list);
637 
638 	if (affinity__setup(&affinity) < 0)
639 		return -1;
640 
641 	evlist__for_each_cpu (evsel_list, i, cpu) {
642 		affinity__set(&affinity, cpu);
643 
644 		evlist__for_each_entry(evsel_list, counter) {
645 			if (evsel__cpu_iter_skip(counter, cpu))
646 				continue;
647 			if (counter->reset_group || counter->errored)
648 				continue;
649 try_again:
650 			if (create_perf_stat_counter(counter, &stat_config, &target,
651 						     counter->cpu_iter - 1) < 0) {
652 
653 				/*
654 				 * Weak group failed. We cannot just undo this here
655 				 * because earlier CPUs might be in group mode, and the kernel
656 				 * doesn't support mixing group and non group reads. Defer
657 				 * it to later.
658 				 * Don't close here because we're in the wrong affinity.
659 				 */
660 				if ((errno == EINVAL || errno == EBADF) &&
661 				    counter->leader != counter &&
662 				    counter->weak_group) {
663 					perf_evlist__reset_weak_group(evsel_list, counter, false);
664 					assert(counter->reset_group);
665 					second_pass = true;
666 					continue;
667 				}
668 
669 				switch (stat_handle_error(counter)) {
670 				case COUNTER_FATAL:
671 					return -1;
672 				case COUNTER_RETRY:
673 					goto try_again;
674 				case COUNTER_SKIP:
675 					continue;
676 				default:
677 					break;
678 				}
679 
680 			}
681 			counter->supported = true;
682 		}
683 	}
684 
685 	if (second_pass) {
686 		/*
687 		 * Now redo all the weak group after closing them,
688 		 * and also close errored counters.
689 		 */
690 
691 		evlist__for_each_cpu(evsel_list, i, cpu) {
692 			affinity__set(&affinity, cpu);
693 			/* First close errored or weak retry */
694 			evlist__for_each_entry(evsel_list, counter) {
695 				if (!counter->reset_group && !counter->errored)
696 					continue;
697 				if (evsel__cpu_iter_skip_no_inc(counter, cpu))
698 					continue;
699 				perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
700 			}
701 			/* Now reopen weak */
702 			evlist__for_each_entry(evsel_list, counter) {
703 				if (!counter->reset_group && !counter->errored)
704 					continue;
705 				if (evsel__cpu_iter_skip(counter, cpu))
706 					continue;
707 				if (!counter->reset_group)
708 					continue;
709 try_again_reset:
710 				pr_debug2("reopening weak %s\n", evsel__name(counter));
711 				if (create_perf_stat_counter(counter, &stat_config, &target,
712 							     counter->cpu_iter - 1) < 0) {
713 
714 					switch (stat_handle_error(counter)) {
715 					case COUNTER_FATAL:
716 						return -1;
717 					case COUNTER_RETRY:
718 						goto try_again_reset;
719 					case COUNTER_SKIP:
720 						continue;
721 					default:
722 						break;
723 					}
724 				}
725 				counter->supported = true;
726 			}
727 		}
728 	}
729 	affinity__cleanup(&affinity);
730 
731 	evlist__for_each_entry(evsel_list, counter) {
732 		if (!counter->supported) {
733 			perf_evsel__free_fd(&counter->core);
734 			continue;
735 		}
736 
737 		l = strlen(counter->unit);
738 		if (l > stat_config.unit_width)
739 			stat_config.unit_width = l;
740 
741 		if (evsel__should_store_id(counter) &&
742 		    evsel__store_ids(counter, evsel_list))
743 			return -1;
744 	}
745 
746 	if (perf_evlist__apply_filters(evsel_list, &counter)) {
747 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
748 			counter->filter, evsel__name(counter), errno,
749 			str_error_r(errno, msg, sizeof(msg)));
750 		return -1;
751 	}
752 
753 	if (STAT_RECORD) {
754 		int err, fd = perf_data__fd(&perf_stat.data);
755 
756 		if (is_pipe) {
757 			err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
758 		} else {
759 			err = perf_session__write_header(perf_stat.session, evsel_list,
760 							 fd, false);
761 		}
762 
763 		if (err < 0)
764 			return err;
765 
766 		err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list,
767 							 process_synthesized_event, is_pipe);
768 		if (err < 0)
769 			return err;
770 	}
771 
772 	/*
773 	 * Enable counters and exec the command:
774 	 */
775 	t0 = rdclock();
776 	clock_gettime(CLOCK_MONOTONIC, &ref_time);
777 
778 	if (forks) {
779 		perf_evlist__start_workload(evsel_list);
780 		enable_counters();
781 
782 		if (interval || timeout) {
783 			while (!waitpid(child_pid, &status, WNOHANG)) {
784 				nanosleep(&ts, NULL);
785 				if (timeout)
786 					break;
787 				process_interval();
788 				if (interval_count && !(--times))
789 					break;
790 			}
791 		}
792 		if (child_pid != -1) {
793 			if (timeout)
794 				kill(child_pid, SIGTERM);
795 			wait4(child_pid, &status, 0, &stat_config.ru_data);
796 		}
797 
798 		if (workload_exec_errno) {
799 			const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
800 			pr_err("Workload failed: %s\n", emsg);
801 			return -1;
802 		}
803 
804 		if (WIFSIGNALED(status))
805 			psignal(WTERMSIG(status), argv[0]);
806 	} else {
807 		enable_counters();
808 		while (!done) {
809 			nanosleep(&ts, NULL);
810 			if (!is_target_alive(&target, evsel_list->core.threads))
811 				break;
812 			if (timeout)
813 				break;
814 			if (interval) {
815 				process_interval();
816 				if (interval_count && !(--times))
817 					break;
818 			}
819 		}
820 	}
821 
822 	disable_counters();
823 
824 	t1 = rdclock();
825 
826 	if (stat_config.walltime_run_table)
827 		stat_config.walltime_run[run_idx] = t1 - t0;
828 
829 	if (interval) {
830 		stat_config.interval = 0;
831 		stat_config.summary = true;
832 		init_stats(&walltime_nsecs_stats);
833 		update_stats(&walltime_nsecs_stats, t1 - t0);
834 
835 		if (stat_config.aggr_mode == AGGR_GLOBAL)
836 			perf_evlist__save_aggr_prev_raw_counts(evsel_list);
837 
838 		perf_evlist__copy_prev_raw_counts(evsel_list);
839 		perf_evlist__reset_prev_raw_counts(evsel_list);
840 		runtime_stat_reset(&stat_config);
841 		perf_stat__reset_shadow_per_stat(&rt_stat);
842 	} else
843 		update_stats(&walltime_nsecs_stats, t1 - t0);
844 
845 	/*
846 	 * Closing a group leader splits the group, and as we only disable
847 	 * group leaders, results in remaining events becoming enabled. To
848 	 * avoid arbitrary skew, we must read all counters before closing any
849 	 * group leaders.
850 	 */
851 	read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
852 
853 	/*
854 	 * We need to keep evsel_list alive, because it's processed
855 	 * later the evsel_list will be closed after.
856 	 */
857 	if (!STAT_RECORD)
858 		evlist__close(evsel_list);
859 
860 	return WEXITSTATUS(status);
861 }
862 
863 static int run_perf_stat(int argc, const char **argv, int run_idx)
864 {
865 	int ret;
866 
867 	if (pre_cmd) {
868 		ret = system(pre_cmd);
869 		if (ret)
870 			return ret;
871 	}
872 
873 	if (sync_run)
874 		sync();
875 
876 	ret = __run_perf_stat(argc, argv, run_idx);
877 	if (ret)
878 		return ret;
879 
880 	if (post_cmd) {
881 		ret = system(post_cmd);
882 		if (ret)
883 			return ret;
884 	}
885 
886 	return ret;
887 }
888 
889 static void print_counters(struct timespec *ts, int argc, const char **argv)
890 {
891 	/* Do not print anything if we record to the pipe. */
892 	if (STAT_RECORD && perf_stat.data.is_pipe)
893 		return;
894 
895 	perf_evlist__print_counters(evsel_list, &stat_config, &target,
896 				    ts, argc, argv);
897 }
898 
899 static volatile int signr = -1;
900 
901 static void skip_signal(int signo)
902 {
903 	if ((child_pid == -1) || stat_config.interval)
904 		done = 1;
905 
906 	signr = signo;
907 	/*
908 	 * render child_pid harmless
909 	 * won't send SIGTERM to a random
910 	 * process in case of race condition
911 	 * and fast PID recycling
912 	 */
913 	child_pid = -1;
914 }
915 
916 static void sig_atexit(void)
917 {
918 	sigset_t set, oset;
919 
920 	/*
921 	 * avoid race condition with SIGCHLD handler
922 	 * in skip_signal() which is modifying child_pid
923 	 * goal is to avoid send SIGTERM to a random
924 	 * process
925 	 */
926 	sigemptyset(&set);
927 	sigaddset(&set, SIGCHLD);
928 	sigprocmask(SIG_BLOCK, &set, &oset);
929 
930 	if (child_pid != -1)
931 		kill(child_pid, SIGTERM);
932 
933 	sigprocmask(SIG_SETMASK, &oset, NULL);
934 
935 	if (signr == -1)
936 		return;
937 
938 	signal(signr, SIG_DFL);
939 	kill(getpid(), signr);
940 }
941 
942 void perf_stat__set_big_num(int set)
943 {
944 	stat_config.big_num = (set != 0);
945 }
946 
947 static int stat__set_big_num(const struct option *opt __maybe_unused,
948 			     const char *s __maybe_unused, int unset)
949 {
950 	big_num_opt = unset ? 0 : 1;
951 	perf_stat__set_big_num(!unset);
952 	return 0;
953 }
954 
955 static int enable_metric_only(const struct option *opt __maybe_unused,
956 			      const char *s __maybe_unused, int unset)
957 {
958 	force_metric_only = true;
959 	stat_config.metric_only = !unset;
960 	return 0;
961 }
962 
963 static int parse_metric_groups(const struct option *opt,
964 			       const char *str,
965 			       int unset __maybe_unused)
966 {
967 	return metricgroup__parse_groups(opt, str,
968 					 stat_config.metric_no_group,
969 					 stat_config.metric_no_merge,
970 					 &stat_config.metric_events);
971 }
972 
973 static struct option stat_options[] = {
974 	OPT_BOOLEAN('T', "transaction", &transaction_run,
975 		    "hardware transaction statistics"),
976 	OPT_CALLBACK('e', "event", &evsel_list, "event",
977 		     "event selector. use 'perf list' to list available events",
978 		     parse_events_option),
979 	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
980 		     "event filter", parse_filter),
981 	OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
982 		    "child tasks do not inherit counters"),
983 	OPT_STRING('p', "pid", &target.pid, "pid",
984 		   "stat events on existing process id"),
985 	OPT_STRING('t', "tid", &target.tid, "tid",
986 		   "stat events on existing thread id"),
987 	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
988 		    "system-wide collection from all CPUs"),
989 	OPT_BOOLEAN('g', "group", &group,
990 		    "put the counters into a counter group"),
991 	OPT_BOOLEAN(0, "scale", &stat_config.scale,
992 		    "Use --no-scale to disable counter scaling for multiplexing"),
993 	OPT_INCR('v', "verbose", &verbose,
994 		    "be more verbose (show counter open errors, etc)"),
995 	OPT_INTEGER('r', "repeat", &stat_config.run_count,
996 		    "repeat command and print average + stddev (max: 100, forever: 0)"),
997 	OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table,
998 		    "display details about each run (only with -r option)"),
999 	OPT_BOOLEAN('n', "null", &stat_config.null_run,
1000 		    "null run - dont start any counters"),
1001 	OPT_INCR('d', "detailed", &detailed_run,
1002 		    "detailed run - start a lot of events"),
1003 	OPT_BOOLEAN('S', "sync", &sync_run,
1004 		    "call sync() before starting a run"),
1005 	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
1006 			   "print large numbers with thousands\' separators",
1007 			   stat__set_big_num),
1008 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1009 		    "list of cpus to monitor in system-wide"),
1010 	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
1011 		    "disable CPU count aggregation", AGGR_NONE),
1012 	OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
1013 	OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
1014 		   "print counts with custom separator"),
1015 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1016 		     "monitor event in cgroup name only", parse_cgroups),
1017 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
1018 	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1019 	OPT_INTEGER(0, "log-fd", &output_fd,
1020 		    "log output to fd, instead of stderr"),
1021 	OPT_STRING(0, "pre", &pre_cmd, "command",
1022 			"command to run prior to the measured command"),
1023 	OPT_STRING(0, "post", &post_cmd, "command",
1024 			"command to run after to the measured command"),
1025 	OPT_UINTEGER('I', "interval-print", &stat_config.interval,
1026 		    "print counts at regular interval in ms "
1027 		    "(overhead is possible for values <= 100ms)"),
1028 	OPT_INTEGER(0, "interval-count", &stat_config.times,
1029 		    "print counts for fixed number of times"),
1030 	OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
1031 		    "clear screen in between new interval"),
1032 	OPT_UINTEGER(0, "timeout", &stat_config.timeout,
1033 		    "stop workload and print counts after a timeout period in ms (>= 10ms)"),
1034 	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
1035 		     "aggregate counts per processor socket", AGGR_SOCKET),
1036 	OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
1037 		     "aggregate counts per processor die", AGGR_DIE),
1038 	OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
1039 		     "aggregate counts per physical processor core", AGGR_CORE),
1040 	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
1041 		     "aggregate counts per thread", AGGR_THREAD),
1042 	OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
1043 		     "aggregate counts per numa node", AGGR_NODE),
1044 	OPT_UINTEGER('D', "delay", &stat_config.initial_delay,
1045 		     "ms to wait before starting measurement after program start"),
1046 	OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
1047 			"Only print computed metrics. No raw values", enable_metric_only),
1048 	OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
1049 		       "don't group metric events, impacts multiplexing"),
1050 	OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
1051 		       "don't try to share events between metrics in a group"),
1052 	OPT_BOOLEAN(0, "topdown", &topdown_run,
1053 			"measure topdown level 1 statistics"),
1054 	OPT_BOOLEAN(0, "smi-cost", &smi_cost,
1055 			"measure SMI cost"),
1056 	OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
1057 		     "monitor specified metrics or metric groups (separated by ,)",
1058 		     parse_metric_groups),
1059 	OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel,
1060 			 "Configure all used events to run in kernel space.",
1061 			 PARSE_OPT_EXCLUSIVE),
1062 	OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
1063 			 "Configure all used events to run in user space.",
1064 			 PARSE_OPT_EXCLUSIVE),
1065 	OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
1066 		    "Use with 'percore' event qualifier to show the event "
1067 		    "counts of one hardware thread by sum up total hardware "
1068 		    "threads of same physical core"),
1069 #ifdef HAVE_LIBPFM
1070 	OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
1071 		"libpfm4 event selector. use 'perf list' to list available events",
1072 		parse_libpfm_events_option),
1073 #endif
1074 	OPT_END()
1075 };
1076 
1077 static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
1078 				 struct perf_cpu_map *map, int cpu)
1079 {
1080 	return cpu_map__get_socket(map, cpu, NULL);
1081 }
1082 
1083 static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
1084 			      struct perf_cpu_map *map, int cpu)
1085 {
1086 	return cpu_map__get_die(map, cpu, NULL);
1087 }
1088 
1089 static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
1090 			       struct perf_cpu_map *map, int cpu)
1091 {
1092 	return cpu_map__get_core(map, cpu, NULL);
1093 }
1094 
1095 static int perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
1096 			       struct perf_cpu_map *map, int cpu)
1097 {
1098 	return cpu_map__get_node(map, cpu, NULL);
1099 }
1100 
1101 static int perf_stat__get_aggr(struct perf_stat_config *config,
1102 			       aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
1103 {
1104 	int cpu;
1105 
1106 	if (idx >= map->nr)
1107 		return -1;
1108 
1109 	cpu = map->map[idx];
1110 
1111 	if (config->cpus_aggr_map->map[cpu] == -1)
1112 		config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
1113 
1114 	return config->cpus_aggr_map->map[cpu];
1115 }
1116 
1117 static int perf_stat__get_socket_cached(struct perf_stat_config *config,
1118 					struct perf_cpu_map *map, int idx)
1119 {
1120 	return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
1121 }
1122 
1123 static int perf_stat__get_die_cached(struct perf_stat_config *config,
1124 					struct perf_cpu_map *map, int idx)
1125 {
1126 	return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
1127 }
1128 
1129 static int perf_stat__get_core_cached(struct perf_stat_config *config,
1130 				      struct perf_cpu_map *map, int idx)
1131 {
1132 	return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
1133 }
1134 
1135 static int perf_stat__get_node_cached(struct perf_stat_config *config,
1136 				      struct perf_cpu_map *map, int idx)
1137 {
1138 	return perf_stat__get_aggr(config, perf_stat__get_node, map, idx);
1139 }
1140 
1141 static bool term_percore_set(void)
1142 {
1143 	struct evsel *counter;
1144 
1145 	evlist__for_each_entry(evsel_list, counter) {
1146 		if (counter->percore)
1147 			return true;
1148 	}
1149 
1150 	return false;
1151 }
1152 
1153 static int perf_stat_init_aggr_mode(void)
1154 {
1155 	int nr;
1156 
1157 	switch (stat_config.aggr_mode) {
1158 	case AGGR_SOCKET:
1159 		if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1160 			perror("cannot build socket map");
1161 			return -1;
1162 		}
1163 		stat_config.aggr_get_id = perf_stat__get_socket_cached;
1164 		break;
1165 	case AGGR_DIE:
1166 		if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1167 			perror("cannot build die map");
1168 			return -1;
1169 		}
1170 		stat_config.aggr_get_id = perf_stat__get_die_cached;
1171 		break;
1172 	case AGGR_CORE:
1173 		if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1174 			perror("cannot build core map");
1175 			return -1;
1176 		}
1177 		stat_config.aggr_get_id = perf_stat__get_core_cached;
1178 		break;
1179 	case AGGR_NODE:
1180 		if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1181 			perror("cannot build core map");
1182 			return -1;
1183 		}
1184 		stat_config.aggr_get_id = perf_stat__get_node_cached;
1185 		break;
1186 	case AGGR_NONE:
1187 		if (term_percore_set()) {
1188 			if (cpu_map__build_core_map(evsel_list->core.cpus,
1189 						    &stat_config.aggr_map)) {
1190 				perror("cannot build core map");
1191 				return -1;
1192 			}
1193 			stat_config.aggr_get_id = perf_stat__get_core_cached;
1194 		}
1195 		break;
1196 	case AGGR_GLOBAL:
1197 	case AGGR_THREAD:
1198 	case AGGR_UNSET:
1199 	default:
1200 		break;
1201 	}
1202 
1203 	/*
1204 	 * The evsel_list->cpus is the base we operate on,
1205 	 * taking the highest cpu number to be the size of
1206 	 * the aggregation translate cpumap.
1207 	 */
1208 	nr = perf_cpu_map__max(evsel_list->core.cpus);
1209 	stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1);
1210 	return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
1211 }
1212 
1213 static void perf_stat__exit_aggr_mode(void)
1214 {
1215 	perf_cpu_map__put(stat_config.aggr_map);
1216 	perf_cpu_map__put(stat_config.cpus_aggr_map);
1217 	stat_config.aggr_map = NULL;
1218 	stat_config.cpus_aggr_map = NULL;
1219 }
1220 
1221 static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx)
1222 {
1223 	int cpu;
1224 
1225 	if (idx > map->nr)
1226 		return -1;
1227 
1228 	cpu = map->map[idx];
1229 
1230 	if (cpu >= env->nr_cpus_avail)
1231 		return -1;
1232 
1233 	return cpu;
1234 }
1235 
1236 static int perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data)
1237 {
1238 	struct perf_env *env = data;
1239 	int cpu = perf_env__get_cpu(env, map, idx);
1240 
1241 	return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
1242 }
1243 
1244 static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data)
1245 {
1246 	struct perf_env *env = data;
1247 	int die_id = -1, cpu = perf_env__get_cpu(env, map, idx);
1248 
1249 	if (cpu != -1) {
1250 		/*
1251 		 * Encode socket in bit range 15:8
1252 		 * die_id is relative to socket,
1253 		 * we need a global id. So we combine
1254 		 * socket + die id
1255 		 */
1256 		if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
1257 			return -1;
1258 
1259 		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
1260 			return -1;
1261 
1262 		die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff);
1263 	}
1264 
1265 	return die_id;
1266 }
1267 
1268 static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
1269 {
1270 	struct perf_env *env = data;
1271 	int core = -1, cpu = perf_env__get_cpu(env, map, idx);
1272 
1273 	if (cpu != -1) {
1274 		/*
1275 		 * Encode socket in bit range 31:24
1276 		 * encode die id in bit range 23:16
1277 		 * core_id is relative to socket and die,
1278 		 * we need a global id. So we combine
1279 		 * socket + die id + core id
1280 		 */
1281 		if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
1282 			return -1;
1283 
1284 		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
1285 			return -1;
1286 
1287 		if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
1288 			return -1;
1289 
1290 		core = (env->cpu[cpu].socket_id << 24) |
1291 		       (env->cpu[cpu].die_id << 16) |
1292 		       (env->cpu[cpu].core_id & 0xffff);
1293 	}
1294 
1295 	return core;
1296 }
1297 
1298 static int perf_env__get_node(struct perf_cpu_map *map, int idx, void *data)
1299 {
1300 	int cpu = perf_env__get_cpu(data, map, idx);
1301 
1302 	return perf_env__numa_node(data, cpu);
1303 }
1304 
1305 static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus,
1306 				      struct perf_cpu_map **sockp)
1307 {
1308 	return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
1309 }
1310 
1311 static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus,
1312 				   struct perf_cpu_map **diep)
1313 {
1314 	return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
1315 }
1316 
1317 static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus,
1318 				    struct perf_cpu_map **corep)
1319 {
1320 	return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
1321 }
1322 
1323 static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus,
1324 				    struct perf_cpu_map **nodep)
1325 {
1326 	return cpu_map__build_map(cpus, nodep, perf_env__get_node, env);
1327 }
1328 
1329 static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
1330 				      struct perf_cpu_map *map, int idx)
1331 {
1332 	return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
1333 }
1334 static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
1335 				   struct perf_cpu_map *map, int idx)
1336 {
1337 	return perf_env__get_die(map, idx, &perf_stat.session->header.env);
1338 }
1339 
1340 static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
1341 				    struct perf_cpu_map *map, int idx)
1342 {
1343 	return perf_env__get_core(map, idx, &perf_stat.session->header.env);
1344 }
1345 
1346 static int perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
1347 				    struct perf_cpu_map *map, int idx)
1348 {
1349 	return perf_env__get_node(map, idx, &perf_stat.session->header.env);
1350 }
1351 
1352 static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
1353 {
1354 	struct perf_env *env = &st->session->header.env;
1355 
1356 	switch (stat_config.aggr_mode) {
1357 	case AGGR_SOCKET:
1358 		if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1359 			perror("cannot build socket map");
1360 			return -1;
1361 		}
1362 		stat_config.aggr_get_id = perf_stat__get_socket_file;
1363 		break;
1364 	case AGGR_DIE:
1365 		if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1366 			perror("cannot build die map");
1367 			return -1;
1368 		}
1369 		stat_config.aggr_get_id = perf_stat__get_die_file;
1370 		break;
1371 	case AGGR_CORE:
1372 		if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1373 			perror("cannot build core map");
1374 			return -1;
1375 		}
1376 		stat_config.aggr_get_id = perf_stat__get_core_file;
1377 		break;
1378 	case AGGR_NODE:
1379 		if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1380 			perror("cannot build core map");
1381 			return -1;
1382 		}
1383 		stat_config.aggr_get_id = perf_stat__get_node_file;
1384 		break;
1385 	case AGGR_NONE:
1386 	case AGGR_GLOBAL:
1387 	case AGGR_THREAD:
1388 	case AGGR_UNSET:
1389 	default:
1390 		break;
1391 	}
1392 
1393 	return 0;
1394 }
1395 
1396 static int topdown_filter_events(const char **attr, char **str, bool use_group)
1397 {
1398 	int off = 0;
1399 	int i;
1400 	int len = 0;
1401 	char *s;
1402 
1403 	for (i = 0; attr[i]; i++) {
1404 		if (pmu_have_event("cpu", attr[i])) {
1405 			len += strlen(attr[i]) + 1;
1406 			attr[i - off] = attr[i];
1407 		} else
1408 			off++;
1409 	}
1410 	attr[i - off] = NULL;
1411 
1412 	*str = malloc(len + 1 + 2);
1413 	if (!*str)
1414 		return -1;
1415 	s = *str;
1416 	if (i - off == 0) {
1417 		*s = 0;
1418 		return 0;
1419 	}
1420 	if (use_group)
1421 		*s++ = '{';
1422 	for (i = 0; attr[i]; i++) {
1423 		strcpy(s, attr[i]);
1424 		s += strlen(s);
1425 		*s++ = ',';
1426 	}
1427 	if (use_group) {
1428 		s[-1] = '}';
1429 		*s = 0;
1430 	} else
1431 		s[-1] = 0;
1432 	return 0;
1433 }
1434 
1435 __weak bool arch_topdown_check_group(bool *warn)
1436 {
1437 	*warn = false;
1438 	return false;
1439 }
1440 
1441 __weak void arch_topdown_group_warn(void)
1442 {
1443 }
1444 
1445 /*
1446  * Add default attributes, if there were no attributes specified or
1447  * if -d/--detailed, -d -d or -d -d -d is used:
1448  */
1449 static int add_default_attributes(void)
1450 {
1451 	int err;
1452 	struct perf_event_attr default_attrs0[] = {
1453 
1454   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
1455   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
1456   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
1457   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
1458 
1459   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
1460 };
1461 	struct perf_event_attr frontend_attrs[] = {
1462   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
1463 };
1464 	struct perf_event_attr backend_attrs[] = {
1465   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
1466 };
1467 	struct perf_event_attr default_attrs1[] = {
1468   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
1469   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
1470   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
1471 
1472 };
1473 
1474 /*
1475  * Detailed stats (-d), covering the L1 and last level data caches:
1476  */
1477 	struct perf_event_attr detailed_attrs[] = {
1478 
1479   { .type = PERF_TYPE_HW_CACHE,
1480     .config =
1481 	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1482 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1483 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1484 
1485   { .type = PERF_TYPE_HW_CACHE,
1486     .config =
1487 	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1488 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1489 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1490 
1491   { .type = PERF_TYPE_HW_CACHE,
1492     .config =
1493 	 PERF_COUNT_HW_CACHE_LL			<<  0  |
1494 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1495 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1496 
1497   { .type = PERF_TYPE_HW_CACHE,
1498     .config =
1499 	 PERF_COUNT_HW_CACHE_LL			<<  0  |
1500 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1501 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1502 };
1503 
1504 /*
1505  * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1506  */
1507 	struct perf_event_attr very_detailed_attrs[] = {
1508 
1509   { .type = PERF_TYPE_HW_CACHE,
1510     .config =
1511 	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
1512 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1513 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1514 
1515   { .type = PERF_TYPE_HW_CACHE,
1516     .config =
1517 	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
1518 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1519 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1520 
1521   { .type = PERF_TYPE_HW_CACHE,
1522     .config =
1523 	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
1524 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1525 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1526 
1527   { .type = PERF_TYPE_HW_CACHE,
1528     .config =
1529 	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
1530 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1531 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1532 
1533   { .type = PERF_TYPE_HW_CACHE,
1534     .config =
1535 	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
1536 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1537 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1538 
1539   { .type = PERF_TYPE_HW_CACHE,
1540     .config =
1541 	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
1542 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1543 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1544 
1545 };
1546 
1547 /*
1548  * Very, very detailed stats (-d -d -d), adding prefetch events:
1549  */
1550 	struct perf_event_attr very_very_detailed_attrs[] = {
1551 
1552   { .type = PERF_TYPE_HW_CACHE,
1553     .config =
1554 	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1555 	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
1556 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1557 
1558   { .type = PERF_TYPE_HW_CACHE,
1559     .config =
1560 	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1561 	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
1562 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1563 };
1564 	struct parse_events_error errinfo;
1565 
1566 	/* Set attrs if no event is selected and !null_run: */
1567 	if (stat_config.null_run)
1568 		return 0;
1569 
1570 	bzero(&errinfo, sizeof(errinfo));
1571 	if (transaction_run) {
1572 		/* Handle -T as -M transaction. Once platform specific metrics
1573 		 * support has been added to the json files, all archictures
1574 		 * will use this approach. To determine transaction support
1575 		 * on an architecture test for such a metric name.
1576 		 */
1577 		if (metricgroup__has_metric("transaction")) {
1578 			struct option opt = { .value = &evsel_list };
1579 
1580 			return metricgroup__parse_groups(&opt, "transaction",
1581 							 stat_config.metric_no_group,
1582 							stat_config.metric_no_merge,
1583 							 &stat_config.metric_events);
1584 		}
1585 
1586 		if (pmu_have_event("cpu", "cycles-ct") &&
1587 		    pmu_have_event("cpu", "el-start"))
1588 			err = parse_events(evsel_list, transaction_attrs,
1589 					   &errinfo);
1590 		else
1591 			err = parse_events(evsel_list,
1592 					   transaction_limited_attrs,
1593 					   &errinfo);
1594 		if (err) {
1595 			fprintf(stderr, "Cannot set up transaction events\n");
1596 			parse_events_print_error(&errinfo, transaction_attrs);
1597 			return -1;
1598 		}
1599 		return 0;
1600 	}
1601 
1602 	if (smi_cost) {
1603 		int smi;
1604 
1605 		if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
1606 			fprintf(stderr, "freeze_on_smi is not supported.\n");
1607 			return -1;
1608 		}
1609 
1610 		if (!smi) {
1611 			if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
1612 				fprintf(stderr, "Failed to set freeze_on_smi.\n");
1613 				return -1;
1614 			}
1615 			smi_reset = true;
1616 		}
1617 
1618 		if (pmu_have_event("msr", "aperf") &&
1619 		    pmu_have_event("msr", "smi")) {
1620 			if (!force_metric_only)
1621 				stat_config.metric_only = true;
1622 			err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
1623 		} else {
1624 			fprintf(stderr, "To measure SMI cost, it needs "
1625 				"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
1626 			parse_events_print_error(&errinfo, smi_cost_attrs);
1627 			return -1;
1628 		}
1629 		if (err) {
1630 			parse_events_print_error(&errinfo, smi_cost_attrs);
1631 			fprintf(stderr, "Cannot set up SMI cost events\n");
1632 			return -1;
1633 		}
1634 		return 0;
1635 	}
1636 
1637 	if (topdown_run) {
1638 		char *str = NULL;
1639 		bool warn = false;
1640 
1641 		if (stat_config.aggr_mode != AGGR_GLOBAL &&
1642 		    stat_config.aggr_mode != AGGR_CORE) {
1643 			pr_err("top down event configuration requires --per-core mode\n");
1644 			return -1;
1645 		}
1646 		stat_config.aggr_mode = AGGR_CORE;
1647 		if (nr_cgroups || !target__has_cpu(&target)) {
1648 			pr_err("top down event configuration requires system-wide mode (-a)\n");
1649 			return -1;
1650 		}
1651 
1652 		if (!force_metric_only)
1653 			stat_config.metric_only = true;
1654 		if (topdown_filter_events(topdown_attrs, &str,
1655 				arch_topdown_check_group(&warn)) < 0) {
1656 			pr_err("Out of memory\n");
1657 			return -1;
1658 		}
1659 		if (topdown_attrs[0] && str) {
1660 			if (warn)
1661 				arch_topdown_group_warn();
1662 			err = parse_events(evsel_list, str, &errinfo);
1663 			if (err) {
1664 				fprintf(stderr,
1665 					"Cannot set up top down events %s: %d\n",
1666 					str, err);
1667 				parse_events_print_error(&errinfo, str);
1668 				free(str);
1669 				return -1;
1670 			}
1671 		} else {
1672 			fprintf(stderr, "System does not support topdown\n");
1673 			return -1;
1674 		}
1675 		free(str);
1676 	}
1677 
1678 	if (!evsel_list->core.nr_entries) {
1679 		if (target__has_cpu(&target))
1680 			default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
1681 
1682 		if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
1683 			return -1;
1684 		if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
1685 			if (perf_evlist__add_default_attrs(evsel_list,
1686 						frontend_attrs) < 0)
1687 				return -1;
1688 		}
1689 		if (pmu_have_event("cpu", "stalled-cycles-backend")) {
1690 			if (perf_evlist__add_default_attrs(evsel_list,
1691 						backend_attrs) < 0)
1692 				return -1;
1693 		}
1694 		if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
1695 			return -1;
1696 	}
1697 
1698 	/* Detailed events get appended to the event list: */
1699 
1700 	if (detailed_run <  1)
1701 		return 0;
1702 
1703 	/* Append detailed run extra attributes: */
1704 	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1705 		return -1;
1706 
1707 	if (detailed_run < 2)
1708 		return 0;
1709 
1710 	/* Append very detailed run extra attributes: */
1711 	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1712 		return -1;
1713 
1714 	if (detailed_run < 3)
1715 		return 0;
1716 
1717 	/* Append very, very detailed run extra attributes: */
1718 	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1719 }
1720 
1721 static const char * const stat_record_usage[] = {
1722 	"perf stat record [<options>]",
1723 	NULL,
1724 };
1725 
1726 static void init_features(struct perf_session *session)
1727 {
1728 	int feat;
1729 
1730 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1731 		perf_header__set_feat(&session->header, feat);
1732 
1733 	perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1734 	perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1735 	perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1736 	perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1737 	perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1738 }
1739 
1740 static int __cmd_record(int argc, const char **argv)
1741 {
1742 	struct perf_session *session;
1743 	struct perf_data *data = &perf_stat.data;
1744 
1745 	argc = parse_options(argc, argv, stat_options, stat_record_usage,
1746 			     PARSE_OPT_STOP_AT_NON_OPTION);
1747 
1748 	if (output_name)
1749 		data->path = output_name;
1750 
1751 	if (stat_config.run_count != 1 || forever) {
1752 		pr_err("Cannot use -r option with perf stat record.\n");
1753 		return -1;
1754 	}
1755 
1756 	session = perf_session__new(data, false, NULL);
1757 	if (IS_ERR(session)) {
1758 		pr_err("Perf session creation failed\n");
1759 		return PTR_ERR(session);
1760 	}
1761 
1762 	init_features(session);
1763 
1764 	session->evlist   = evsel_list;
1765 	perf_stat.session = session;
1766 	perf_stat.record  = true;
1767 	return argc;
1768 }
1769 
1770 static int process_stat_round_event(struct perf_session *session,
1771 				    union perf_event *event)
1772 {
1773 	struct perf_record_stat_round *stat_round = &event->stat_round;
1774 	struct evsel *counter;
1775 	struct timespec tsh, *ts = NULL;
1776 	const char **argv = session->header.env.cmdline_argv;
1777 	int argc = session->header.env.nr_cmdline;
1778 
1779 	evlist__for_each_entry(evsel_list, counter)
1780 		perf_stat_process_counter(&stat_config, counter);
1781 
1782 	if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
1783 		update_stats(&walltime_nsecs_stats, stat_round->time);
1784 
1785 	if (stat_config.interval && stat_round->time) {
1786 		tsh.tv_sec  = stat_round->time / NSEC_PER_SEC;
1787 		tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
1788 		ts = &tsh;
1789 	}
1790 
1791 	print_counters(ts, argc, argv);
1792 	return 0;
1793 }
1794 
1795 static
1796 int process_stat_config_event(struct perf_session *session,
1797 			      union perf_event *event)
1798 {
1799 	struct perf_tool *tool = session->tool;
1800 	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
1801 
1802 	perf_event__read_stat_config(&stat_config, &event->stat_config);
1803 
1804 	if (perf_cpu_map__empty(st->cpus)) {
1805 		if (st->aggr_mode != AGGR_UNSET)
1806 			pr_warning("warning: processing task data, aggregation mode not set\n");
1807 		return 0;
1808 	}
1809 
1810 	if (st->aggr_mode != AGGR_UNSET)
1811 		stat_config.aggr_mode = st->aggr_mode;
1812 
1813 	if (perf_stat.data.is_pipe)
1814 		perf_stat_init_aggr_mode();
1815 	else
1816 		perf_stat_init_aggr_mode_file(st);
1817 
1818 	return 0;
1819 }
1820 
1821 static int set_maps(struct perf_stat *st)
1822 {
1823 	if (!st->cpus || !st->threads)
1824 		return 0;
1825 
1826 	if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
1827 		return -EINVAL;
1828 
1829 	perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
1830 
1831 	if (perf_evlist__alloc_stats(evsel_list, true))
1832 		return -ENOMEM;
1833 
1834 	st->maps_allocated = true;
1835 	return 0;
1836 }
1837 
1838 static
1839 int process_thread_map_event(struct perf_session *session,
1840 			     union perf_event *event)
1841 {
1842 	struct perf_tool *tool = session->tool;
1843 	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
1844 
1845 	if (st->threads) {
1846 		pr_warning("Extra thread map event, ignoring.\n");
1847 		return 0;
1848 	}
1849 
1850 	st->threads = thread_map__new_event(&event->thread_map);
1851 	if (!st->threads)
1852 		return -ENOMEM;
1853 
1854 	return set_maps(st);
1855 }
1856 
1857 static
1858 int process_cpu_map_event(struct perf_session *session,
1859 			  union perf_event *event)
1860 {
1861 	struct perf_tool *tool = session->tool;
1862 	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
1863 	struct perf_cpu_map *cpus;
1864 
1865 	if (st->cpus) {
1866 		pr_warning("Extra cpu map event, ignoring.\n");
1867 		return 0;
1868 	}
1869 
1870 	cpus = cpu_map__new_data(&event->cpu_map.data);
1871 	if (!cpus)
1872 		return -ENOMEM;
1873 
1874 	st->cpus = cpus;
1875 	return set_maps(st);
1876 }
1877 
1878 static const char * const stat_report_usage[] = {
1879 	"perf stat report [<options>]",
1880 	NULL,
1881 };
1882 
1883 static struct perf_stat perf_stat = {
1884 	.tool = {
1885 		.attr		= perf_event__process_attr,
1886 		.event_update	= perf_event__process_event_update,
1887 		.thread_map	= process_thread_map_event,
1888 		.cpu_map	= process_cpu_map_event,
1889 		.stat_config	= process_stat_config_event,
1890 		.stat		= perf_event__process_stat_event,
1891 		.stat_round	= process_stat_round_event,
1892 	},
1893 	.aggr_mode = AGGR_UNSET,
1894 };
1895 
1896 static int __cmd_report(int argc, const char **argv)
1897 {
1898 	struct perf_session *session;
1899 	const struct option options[] = {
1900 	OPT_STRING('i', "input", &input_name, "file", "input file name"),
1901 	OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
1902 		     "aggregate counts per processor socket", AGGR_SOCKET),
1903 	OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
1904 		     "aggregate counts per processor die", AGGR_DIE),
1905 	OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
1906 		     "aggregate counts per physical processor core", AGGR_CORE),
1907 	OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode,
1908 		     "aggregate counts per numa node", AGGR_NODE),
1909 	OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
1910 		     "disable CPU count aggregation", AGGR_NONE),
1911 	OPT_END()
1912 	};
1913 	struct stat st;
1914 	int ret;
1915 
1916 	argc = parse_options(argc, argv, options, stat_report_usage, 0);
1917 
1918 	if (!input_name || !strlen(input_name)) {
1919 		if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
1920 			input_name = "-";
1921 		else
1922 			input_name = "perf.data";
1923 	}
1924 
1925 	perf_stat.data.path = input_name;
1926 	perf_stat.data.mode = PERF_DATA_MODE_READ;
1927 
1928 	session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
1929 	if (IS_ERR(session))
1930 		return PTR_ERR(session);
1931 
1932 	perf_stat.session  = session;
1933 	stat_config.output = stderr;
1934 	evsel_list         = session->evlist;
1935 
1936 	ret = perf_session__process_events(session);
1937 	if (ret)
1938 		return ret;
1939 
1940 	perf_session__delete(session);
1941 	return 0;
1942 }
1943 
1944 static void setup_system_wide(int forks)
1945 {
1946 	/*
1947 	 * Make system wide (-a) the default target if
1948 	 * no target was specified and one of following
1949 	 * conditions is met:
1950 	 *
1951 	 *   - there's no workload specified
1952 	 *   - there is workload specified but all requested
1953 	 *     events are system wide events
1954 	 */
1955 	if (!target__none(&target))
1956 		return;
1957 
1958 	if (!forks)
1959 		target.system_wide = true;
1960 	else {
1961 		struct evsel *counter;
1962 
1963 		evlist__for_each_entry(evsel_list, counter) {
1964 			if (!counter->core.system_wide)
1965 				return;
1966 		}
1967 
1968 		if (evsel_list->core.nr_entries)
1969 			target.system_wide = true;
1970 	}
1971 }
1972 
1973 int cmd_stat(int argc, const char **argv)
1974 {
1975 	const char * const stat_usage[] = {
1976 		"perf stat [<options>] [<command>]",
1977 		NULL
1978 	};
1979 	int status = -EINVAL, run_idx;
1980 	const char *mode;
1981 	FILE *output = stderr;
1982 	unsigned int interval, timeout;
1983 	const char * const stat_subcommands[] = { "record", "report" };
1984 
1985 	setlocale(LC_ALL, "");
1986 
1987 	evsel_list = evlist__new();
1988 	if (evsel_list == NULL)
1989 		return -ENOMEM;
1990 
1991 	parse_events__shrink_config_terms();
1992 
1993 	/* String-parsing callback-based options would segfault when negated */
1994 	set_option_flag(stat_options, 'e', "event", PARSE_OPT_NONEG);
1995 	set_option_flag(stat_options, 'M', "metrics", PARSE_OPT_NONEG);
1996 	set_option_flag(stat_options, 'G', "cgroup", PARSE_OPT_NONEG);
1997 
1998 	argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
1999 					(const char **) stat_usage,
2000 					PARSE_OPT_STOP_AT_NON_OPTION);
2001 	perf_stat__collect_metric_expr(evsel_list);
2002 	perf_stat__init_shadow_stats();
2003 
2004 	if (stat_config.csv_sep) {
2005 		stat_config.csv_output = true;
2006 		if (!strcmp(stat_config.csv_sep, "\\t"))
2007 			stat_config.csv_sep = "\t";
2008 	} else
2009 		stat_config.csv_sep = DEFAULT_SEPARATOR;
2010 
2011 	if (argc && !strncmp(argv[0], "rec", 3)) {
2012 		argc = __cmd_record(argc, argv);
2013 		if (argc < 0)
2014 			return -1;
2015 	} else if (argc && !strncmp(argv[0], "rep", 3))
2016 		return __cmd_report(argc, argv);
2017 
2018 	interval = stat_config.interval;
2019 	timeout = stat_config.timeout;
2020 
2021 	/*
2022 	 * For record command the -o is already taken care of.
2023 	 */
2024 	if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
2025 		output = NULL;
2026 
2027 	if (output_name && output_fd) {
2028 		fprintf(stderr, "cannot use both --output and --log-fd\n");
2029 		parse_options_usage(stat_usage, stat_options, "o", 1);
2030 		parse_options_usage(NULL, stat_options, "log-fd", 0);
2031 		goto out;
2032 	}
2033 
2034 	if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) {
2035 		fprintf(stderr, "--metric-only is not supported with --per-thread\n");
2036 		goto out;
2037 	}
2038 
2039 	if (stat_config.metric_only && stat_config.run_count > 1) {
2040 		fprintf(stderr, "--metric-only is not supported with -r\n");
2041 		goto out;
2042 	}
2043 
2044 	if (stat_config.walltime_run_table && stat_config.run_count <= 1) {
2045 		fprintf(stderr, "--table is only supported with -r\n");
2046 		parse_options_usage(stat_usage, stat_options, "r", 1);
2047 		parse_options_usage(NULL, stat_options, "table", 0);
2048 		goto out;
2049 	}
2050 
2051 	if (output_fd < 0) {
2052 		fprintf(stderr, "argument to --log-fd must be a > 0\n");
2053 		parse_options_usage(stat_usage, stat_options, "log-fd", 0);
2054 		goto out;
2055 	}
2056 
2057 	if (!output) {
2058 		struct timespec tm;
2059 		mode = append_file ? "a" : "w";
2060 
2061 		output = fopen(output_name, mode);
2062 		if (!output) {
2063 			perror("failed to create output file");
2064 			return -1;
2065 		}
2066 		clock_gettime(CLOCK_REALTIME, &tm);
2067 		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
2068 	} else if (output_fd > 0) {
2069 		mode = append_file ? "a" : "w";
2070 		output = fdopen(output_fd, mode);
2071 		if (!output) {
2072 			perror("Failed opening logfd");
2073 			return -errno;
2074 		}
2075 	}
2076 
2077 	stat_config.output = output;
2078 
2079 	/*
2080 	 * let the spreadsheet do the pretty-printing
2081 	 */
2082 	if (stat_config.csv_output) {
2083 		/* User explicitly passed -B? */
2084 		if (big_num_opt == 1) {
2085 			fprintf(stderr, "-B option not supported with -x\n");
2086 			parse_options_usage(stat_usage, stat_options, "B", 1);
2087 			parse_options_usage(NULL, stat_options, "x", 1);
2088 			goto out;
2089 		} else /* Nope, so disable big number formatting */
2090 			stat_config.big_num = false;
2091 	} else if (big_num_opt == 0) /* User passed --no-big-num */
2092 		stat_config.big_num = false;
2093 
2094 	setup_system_wide(argc);
2095 
2096 	/*
2097 	 * Display user/system times only for single
2098 	 * run and when there's specified tracee.
2099 	 */
2100 	if ((stat_config.run_count == 1) && target__none(&target))
2101 		stat_config.ru_display = true;
2102 
2103 	if (stat_config.run_count < 0) {
2104 		pr_err("Run count must be a positive number\n");
2105 		parse_options_usage(stat_usage, stat_options, "r", 1);
2106 		goto out;
2107 	} else if (stat_config.run_count == 0) {
2108 		forever = true;
2109 		stat_config.run_count = 1;
2110 	}
2111 
2112 	if (stat_config.walltime_run_table) {
2113 		stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0]));
2114 		if (!stat_config.walltime_run) {
2115 			pr_err("failed to setup -r option");
2116 			goto out;
2117 		}
2118 	}
2119 
2120 	if ((stat_config.aggr_mode == AGGR_THREAD) &&
2121 		!target__has_task(&target)) {
2122 		if (!target.system_wide || target.cpu_list) {
2123 			fprintf(stderr, "The --per-thread option is only "
2124 				"available when monitoring via -p -t -a "
2125 				"options or only --per-thread.\n");
2126 			parse_options_usage(NULL, stat_options, "p", 1);
2127 			parse_options_usage(NULL, stat_options, "t", 1);
2128 			goto out;
2129 		}
2130 	}
2131 
2132 	/*
2133 	 * no_aggr, cgroup are for system-wide only
2134 	 * --per-thread is aggregated per thread, we dont mix it with cpu mode
2135 	 */
2136 	if (((stat_config.aggr_mode != AGGR_GLOBAL &&
2137 	      stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
2138 	    !target__has_cpu(&target)) {
2139 		fprintf(stderr, "both cgroup and no-aggregation "
2140 			"modes only available in system-wide mode\n");
2141 
2142 		parse_options_usage(stat_usage, stat_options, "G", 1);
2143 		parse_options_usage(NULL, stat_options, "A", 1);
2144 		parse_options_usage(NULL, stat_options, "a", 1);
2145 		goto out;
2146 	}
2147 
2148 	if (add_default_attributes())
2149 		goto out;
2150 
2151 	target__validate(&target);
2152 
2153 	if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
2154 		target.per_thread = true;
2155 
2156 	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
2157 		if (target__has_task(&target)) {
2158 			pr_err("Problems finding threads of monitor\n");
2159 			parse_options_usage(stat_usage, stat_options, "p", 1);
2160 			parse_options_usage(NULL, stat_options, "t", 1);
2161 		} else if (target__has_cpu(&target)) {
2162 			perror("failed to parse CPUs map");
2163 			parse_options_usage(stat_usage, stat_options, "C", 1);
2164 			parse_options_usage(NULL, stat_options, "a", 1);
2165 		}
2166 		goto out;
2167 	}
2168 
2169 	evlist__check_cpu_maps(evsel_list);
2170 
2171 	/*
2172 	 * Initialize thread_map with comm names,
2173 	 * so we could print it out on output.
2174 	 */
2175 	if (stat_config.aggr_mode == AGGR_THREAD) {
2176 		thread_map__read_comms(evsel_list->core.threads);
2177 		if (target.system_wide) {
2178 			if (runtime_stat_new(&stat_config,
2179 				perf_thread_map__nr(evsel_list->core.threads))) {
2180 				goto out;
2181 			}
2182 		}
2183 	}
2184 
2185 	if (stat_config.aggr_mode == AGGR_NODE)
2186 		cpu__setup_cpunode_map();
2187 
2188 	if (stat_config.times && interval)
2189 		interval_count = true;
2190 	else if (stat_config.times && !interval) {
2191 		pr_err("interval-count option should be used together with "
2192 				"interval-print.\n");
2193 		parse_options_usage(stat_usage, stat_options, "interval-count", 0);
2194 		parse_options_usage(stat_usage, stat_options, "I", 1);
2195 		goto out;
2196 	}
2197 
2198 	if (timeout && timeout < 100) {
2199 		if (timeout < 10) {
2200 			pr_err("timeout must be >= 10ms.\n");
2201 			parse_options_usage(stat_usage, stat_options, "timeout", 0);
2202 			goto out;
2203 		} else
2204 			pr_warning("timeout < 100ms. "
2205 				   "The overhead percentage could be high in some cases. "
2206 				   "Please proceed with caution.\n");
2207 	}
2208 	if (timeout && interval) {
2209 		pr_err("timeout option is not supported with interval-print.\n");
2210 		parse_options_usage(stat_usage, stat_options, "timeout", 0);
2211 		parse_options_usage(stat_usage, stat_options, "I", 1);
2212 		goto out;
2213 	}
2214 
2215 	if (perf_evlist__alloc_stats(evsel_list, interval))
2216 		goto out;
2217 
2218 	if (perf_stat_init_aggr_mode())
2219 		goto out;
2220 
2221 	/*
2222 	 * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
2223 	 * while avoiding that older tools show confusing messages.
2224 	 *
2225 	 * However for pipe sessions we need to keep it zero,
2226 	 * because script's perf_evsel__check_attr is triggered
2227 	 * by attr->sample_type != 0, and we can't run it on
2228 	 * stat sessions.
2229 	 */
2230 	stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe);
2231 
2232 	/*
2233 	 * We dont want to block the signals - that would cause
2234 	 * child tasks to inherit that and Ctrl-C would not work.
2235 	 * What we want is for Ctrl-C to work in the exec()-ed
2236 	 * task, but being ignored by perf stat itself:
2237 	 */
2238 	atexit(sig_atexit);
2239 	if (!forever)
2240 		signal(SIGINT,  skip_signal);
2241 	signal(SIGCHLD, skip_signal);
2242 	signal(SIGALRM, skip_signal);
2243 	signal(SIGABRT, skip_signal);
2244 
2245 	status = 0;
2246 	for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
2247 		if (stat_config.run_count != 1 && verbose > 0)
2248 			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
2249 				run_idx + 1);
2250 
2251 		if (run_idx != 0)
2252 			perf_evlist__reset_prev_raw_counts(evsel_list);
2253 
2254 		status = run_perf_stat(argc, argv, run_idx);
2255 		if (forever && status != -1 && !interval) {
2256 			print_counters(NULL, argc, argv);
2257 			perf_stat__reset_stats();
2258 		}
2259 	}
2260 
2261 	if (!forever && status != -1 && (!interval || stat_config.summary))
2262 		print_counters(NULL, argc, argv);
2263 
2264 	if (STAT_RECORD) {
2265 		/*
2266 		 * We synthesize the kernel mmap record just so that older tools
2267 		 * don't emit warnings about not being able to resolve symbols
2268 		 * due to /proc/sys/kernel/kptr_restrict settings and instear provide
2269 		 * a saner message about no samples being in the perf.data file.
2270 		 *
2271 		 * This also serves to suppress a warning about f_header.data.size == 0
2272 		 * in header.c at the moment 'perf stat record' gets introduced, which
2273 		 * is not really needed once we start adding the stat specific PERF_RECORD_
2274 		 * records, but the need to suppress the kptr_restrict messages in older
2275 		 * tools remain  -acme
2276 		 */
2277 		int fd = perf_data__fd(&perf_stat.data);
2278 		int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
2279 							     process_synthesized_event,
2280 							     &perf_stat.session->machines.host);
2281 		if (err) {
2282 			pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
2283 				   "older tools may produce warnings about this file\n.");
2284 		}
2285 
2286 		if (!interval) {
2287 			if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
2288 				pr_err("failed to write stat round event\n");
2289 		}
2290 
2291 		if (!perf_stat.data.is_pipe) {
2292 			perf_stat.session->header.data_size += perf_stat.bytes_written;
2293 			perf_session__write_header(perf_stat.session, evsel_list, fd, true);
2294 		}
2295 
2296 		evlist__close(evsel_list);
2297 		perf_session__delete(perf_stat.session);
2298 	}
2299 
2300 	perf_stat__exit_aggr_mode();
2301 	perf_evlist__free_stats(evsel_list);
2302 out:
2303 	zfree(&stat_config.walltime_run);
2304 
2305 	if (smi_cost && smi_reset)
2306 		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
2307 
2308 	evlist__delete(evsel_list);
2309 
2310 	runtime_stat_delete(&stat_config);
2311 
2312 	return status;
2313 }
2314