xref: /openbmc/linux/tools/perf/builtin-stat.c (revision 4f3db074)
1 /*
2  * builtin-stat.c
3  *
4  * Builtin stat command: Give a precise performance counters summary
5  * overview about any workload, CPU or specific PID.
6  *
7  * Sample output:
8 
9    $ perf stat ./hackbench 10
10 
11   Time: 0.118
12 
13   Performance counter stats for './hackbench 10':
14 
15        1708.761321 task-clock                #   11.037 CPUs utilized
16             41,190 context-switches          #    0.024 M/sec
17              6,735 CPU-migrations            #    0.004 M/sec
18             17,318 page-faults               #    0.010 M/sec
19      5,205,202,243 cycles                    #    3.046 GHz
20      3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
21      1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
22      2,603,501,247 instructions              #    0.50  insns per cycle
23                                              #    1.48  stalled cycles per insn
24        484,357,498 branches                  #  283.455 M/sec
25          6,388,934 branch-misses             #    1.32% of all branches
26 
27         0.154822978  seconds time elapsed
28 
29  *
30  * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31  *
32  * Improvements and fixes by:
33  *
34  *   Arjan van de Ven <arjan@linux.intel.com>
35  *   Yanmin Zhang <yanmin.zhang@intel.com>
36  *   Wu Fengguang <fengguang.wu@intel.com>
37  *   Mike Galbraith <efault@gmx.de>
38  *   Paul Mackerras <paulus@samba.org>
39  *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40  *
41  * Released under the GPL v2. (and only v2, not any later version)
42  */
43 
44 #include "perf.h"
45 #include "builtin.h"
46 #include "util/cgroup.h"
47 #include "util/util.h"
48 #include "util/parse-options.h"
49 #include "util/parse-events.h"
50 #include "util/pmu.h"
51 #include "util/event.h"
52 #include "util/evlist.h"
53 #include "util/evsel.h"
54 #include "util/debug.h"
55 #include "util/color.h"
56 #include "util/stat.h"
57 #include "util/header.h"
58 #include "util/cpumap.h"
59 #include "util/thread.h"
60 #include "util/thread_map.h"
61 
62 #include <stdlib.h>
63 #include <sys/prctl.h>
64 #include <locale.h>
65 
66 #define DEFAULT_SEPARATOR	" "
67 #define CNTR_NOT_SUPPORTED	"<not supported>"
68 #define CNTR_NOT_COUNTED	"<not counted>"
69 
70 static void print_stat(int argc, const char **argv);
71 static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
72 static void print_counter(struct perf_evsel *counter, char *prefix);
73 static void print_aggr(char *prefix);
74 
75 /* Default events used for perf stat -T */
76 static const char * const transaction_attrs[] = {
77 	"task-clock",
78 	"{"
79 	"instructions,"
80 	"cycles,"
81 	"cpu/cycles-t/,"
82 	"cpu/tx-start/,"
83 	"cpu/el-start/,"
84 	"cpu/cycles-ct/"
85 	"}"
86 };
87 
88 /* More limited version when the CPU does not have all events. */
89 static const char * const transaction_limited_attrs[] = {
90 	"task-clock",
91 	"{"
92 	"instructions,"
93 	"cycles,"
94 	"cpu/cycles-t/,"
95 	"cpu/tx-start/"
96 	"}"
97 };
98 
99 /* must match transaction_attrs and the beginning limited_attrs */
100 enum {
101 	T_TASK_CLOCK,
102 	T_INSTRUCTIONS,
103 	T_CYCLES,
104 	T_CYCLES_IN_TX,
105 	T_TRANSACTION_START,
106 	T_ELISION_START,
107 	T_CYCLES_IN_TX_CP,
108 };
109 
110 static struct perf_evlist	*evsel_list;
111 
112 static struct target target = {
113 	.uid	= UINT_MAX,
114 };
115 
116 enum aggr_mode {
117 	AGGR_NONE,
118 	AGGR_GLOBAL,
119 	AGGR_SOCKET,
120 	AGGR_CORE,
121 };
122 
123 static int			run_count			=  1;
124 static bool			no_inherit			= false;
125 static bool			scale				=  true;
126 static enum aggr_mode		aggr_mode			= AGGR_GLOBAL;
127 static volatile pid_t		child_pid			= -1;
128 static bool			null_run			=  false;
129 static int			detailed_run			=  0;
130 static bool			transaction_run;
131 static bool			big_num				=  true;
132 static int			big_num_opt			=  -1;
133 static const char		*csv_sep			= NULL;
134 static bool			csv_output			= false;
135 static bool			group				= false;
136 static FILE			*output				= NULL;
137 static const char		*pre_cmd			= NULL;
138 static const char		*post_cmd			= NULL;
139 static bool			sync_run			= false;
140 static unsigned int		interval			= 0;
141 static unsigned int		initial_delay			= 0;
142 static unsigned int		unit_width			= 4; /* strlen("unit") */
143 static bool			forever				= false;
144 static struct timespec		ref_time;
145 static struct cpu_map		*aggr_map;
146 static int			(*aggr_get_id)(struct cpu_map *m, int cpu);
147 
148 static volatile int done = 0;
149 
150 struct perf_stat {
151 	struct stats	  res_stats[3];
152 };
153 
154 static inline void diff_timespec(struct timespec *r, struct timespec *a,
155 				 struct timespec *b)
156 {
157 	r->tv_sec = a->tv_sec - b->tv_sec;
158 	if (a->tv_nsec < b->tv_nsec) {
159 		r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
160 		r->tv_sec--;
161 	} else {
162 		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
163 	}
164 }
165 
166 static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
167 {
168 	return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
169 }
170 
171 static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
172 {
173 	return perf_evsel__cpus(evsel)->nr;
174 }
175 
176 static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
177 {
178 	int i;
179 	struct perf_stat *ps = evsel->priv;
180 
181 	for (i = 0; i < 3; i++)
182 		init_stats(&ps->res_stats[i]);
183 }
184 
185 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
186 {
187 	evsel->priv = zalloc(sizeof(struct perf_stat));
188 	if (evsel->priv == NULL)
189 		return -ENOMEM;
190 	perf_evsel__reset_stat_priv(evsel);
191 	return 0;
192 }
193 
194 static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
195 {
196 	zfree(&evsel->priv);
197 }
198 
199 static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
200 {
201 	void *addr;
202 	size_t sz;
203 
204 	sz = sizeof(*evsel->counts) +
205 	     (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
206 
207 	addr = zalloc(sz);
208 	if (!addr)
209 		return -ENOMEM;
210 
211 	evsel->prev_raw_counts =  addr;
212 
213 	return 0;
214 }
215 
216 static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
217 {
218 	zfree(&evsel->prev_raw_counts);
219 }
220 
221 static void perf_evlist__free_stats(struct perf_evlist *evlist)
222 {
223 	struct perf_evsel *evsel;
224 
225 	evlist__for_each(evlist, evsel) {
226 		perf_evsel__free_stat_priv(evsel);
227 		perf_evsel__free_counts(evsel);
228 		perf_evsel__free_prev_raw_counts(evsel);
229 	}
230 }
231 
232 static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
233 {
234 	struct perf_evsel *evsel;
235 
236 	evlist__for_each(evlist, evsel) {
237 		if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
238 		    perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
239 		    (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
240 			goto out_free;
241 	}
242 
243 	return 0;
244 
245 out_free:
246 	perf_evlist__free_stats(evlist);
247 	return -1;
248 }
249 
250 enum {
251 	CTX_BIT_USER	= 1 << 0,
252 	CTX_BIT_KERNEL	= 1 << 1,
253 	CTX_BIT_HV	= 1 << 2,
254 	CTX_BIT_HOST	= 1 << 3,
255 	CTX_BIT_IDLE	= 1 << 4,
256 	CTX_BIT_MAX	= 1 << 5,
257 };
258 
259 #define NUM_CTX CTX_BIT_MAX
260 
261 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
262 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
263 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
264 static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
265 static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
266 static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
267 static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
268 static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
269 static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
270 static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
271 static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
272 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
273 static struct stats walltime_nsecs_stats;
274 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
275 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
276 
277 static int evsel_context(struct perf_evsel *evsel)
278 {
279 	int ctx = 0;
280 
281 	if (evsel->attr.exclude_kernel)
282 		ctx |= CTX_BIT_KERNEL;
283 	if (evsel->attr.exclude_user)
284 		ctx |= CTX_BIT_USER;
285 	if (evsel->attr.exclude_hv)
286 		ctx |= CTX_BIT_HV;
287 	if (evsel->attr.exclude_host)
288 		ctx |= CTX_BIT_HOST;
289 	if (evsel->attr.exclude_idle)
290 		ctx |= CTX_BIT_IDLE;
291 
292 	return ctx;
293 }
294 
295 static void perf_stat__reset_stats(struct perf_evlist *evlist)
296 {
297 	struct perf_evsel *evsel;
298 
299 	evlist__for_each(evlist, evsel) {
300 		perf_evsel__reset_stat_priv(evsel);
301 		perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
302 	}
303 
304 	memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
305 	memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
306 	memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
307 	memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
308 	memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
309 	memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
310 	memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
311 	memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
312 	memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
313 	memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
314 	memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
315 	memset(runtime_cycles_in_tx_stats, 0,
316 			sizeof(runtime_cycles_in_tx_stats));
317 	memset(runtime_transaction_stats, 0,
318 		sizeof(runtime_transaction_stats));
319 	memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
320 	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
321 }
322 
323 static int create_perf_stat_counter(struct perf_evsel *evsel)
324 {
325 	struct perf_event_attr *attr = &evsel->attr;
326 
327 	if (scale)
328 		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
329 				    PERF_FORMAT_TOTAL_TIME_RUNNING;
330 
331 	attr->inherit = !no_inherit;
332 
333 	if (target__has_cpu(&target))
334 		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
335 
336 	if (!target__has_task(&target) && perf_evsel__is_group_leader(evsel)) {
337 		attr->disabled = 1;
338 		if (!initial_delay)
339 			attr->enable_on_exec = 1;
340 	}
341 
342 	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
343 }
344 
345 /*
346  * Does the counter have nsecs as a unit?
347  */
348 static inline int nsec_counter(struct perf_evsel *evsel)
349 {
350 	if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
351 	    perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
352 		return 1;
353 
354 	return 0;
355 }
356 
357 static struct perf_evsel *nth_evsel(int n)
358 {
359 	static struct perf_evsel **array;
360 	static int array_len;
361 	struct perf_evsel *ev;
362 	int j;
363 
364 	/* Assumes this only called when evsel_list does not change anymore. */
365 	if (!array) {
366 		evlist__for_each(evsel_list, ev)
367 			array_len++;
368 		array = malloc(array_len * sizeof(void *));
369 		if (!array)
370 			exit(ENOMEM);
371 		j = 0;
372 		evlist__for_each(evsel_list, ev)
373 			array[j++] = ev;
374 	}
375 	if (n < array_len)
376 		return array[n];
377 	return NULL;
378 }
379 
380 /*
381  * Update various tracking values we maintain to print
382  * more semantic information such as miss/hit ratios,
383  * instruction rates, etc:
384  */
385 static void update_shadow_stats(struct perf_evsel *counter, u64 *count,
386 				int cpu)
387 {
388 	int ctx = evsel_context(counter);
389 
390 	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
391 		update_stats(&runtime_nsecs_stats[cpu], count[0]);
392 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
393 		update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
394 	else if (transaction_run &&
395 		 perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
396 		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
397 	else if (transaction_run &&
398 		 perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
399 		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
400 	else if (transaction_run &&
401 		 perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
402 		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
403 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
404 		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
405 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
406 		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
407 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
408 		update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
409 	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
410 		update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
411 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
412 		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
413 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
414 		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
415 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
416 		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
417 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
418 		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
419 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
420 		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
421 }
422 
423 static void zero_per_pkg(struct perf_evsel *counter)
424 {
425 	if (counter->per_pkg_mask)
426 		memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
427 }
428 
429 static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
430 {
431 	unsigned long *mask = counter->per_pkg_mask;
432 	struct cpu_map *cpus = perf_evsel__cpus(counter);
433 	int s;
434 
435 	*skip = false;
436 
437 	if (!counter->per_pkg)
438 		return 0;
439 
440 	if (cpu_map__empty(cpus))
441 		return 0;
442 
443 	if (!mask) {
444 		mask = zalloc(MAX_NR_CPUS);
445 		if (!mask)
446 			return -ENOMEM;
447 
448 		counter->per_pkg_mask = mask;
449 	}
450 
451 	s = cpu_map__get_socket(cpus, cpu);
452 	if (s < 0)
453 		return -1;
454 
455 	*skip = test_and_set_bit(s, mask) == 1;
456 	return 0;
457 }
458 
459 static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused,
460 		   struct perf_counts_values *count)
461 {
462 	struct perf_counts_values *aggr = &evsel->counts->aggr;
463 	static struct perf_counts_values zero;
464 	bool skip = false;
465 
466 	if (check_per_pkg(evsel, cpu, &skip)) {
467 		pr_err("failed to read per-pkg counter\n");
468 		return -1;
469 	}
470 
471 	if (skip)
472 		count = &zero;
473 
474 	switch (aggr_mode) {
475 	case AGGR_CORE:
476 	case AGGR_SOCKET:
477 	case AGGR_NONE:
478 		if (!evsel->snapshot)
479 			perf_evsel__compute_deltas(evsel, cpu, count);
480 		perf_counts_values__scale(count, scale, NULL);
481 		evsel->counts->cpu[cpu] = *count;
482 		if (aggr_mode == AGGR_NONE)
483 			update_shadow_stats(evsel, count->values, cpu);
484 		break;
485 	case AGGR_GLOBAL:
486 		aggr->val += count->val;
487 		if (scale) {
488 			aggr->ena += count->ena;
489 			aggr->run += count->run;
490 		}
491 	default:
492 		break;
493 	}
494 
495 	return 0;
496 }
497 
498 static int read_counter(struct perf_evsel *counter);
499 
500 /*
501  * Read out the results of a single counter:
502  * aggregate counts across CPUs in system-wide mode
503  */
504 static int read_counter_aggr(struct perf_evsel *counter)
505 {
506 	struct perf_counts_values *aggr = &counter->counts->aggr;
507 	struct perf_stat *ps = counter->priv;
508 	u64 *count = counter->counts->aggr.values;
509 	int i;
510 
511 	aggr->val = aggr->ena = aggr->run = 0;
512 
513 	if (read_counter(counter))
514 		return -1;
515 
516 	if (!counter->snapshot)
517 		perf_evsel__compute_deltas(counter, -1, aggr);
518 	perf_counts_values__scale(aggr, scale, &counter->counts->scaled);
519 
520 	for (i = 0; i < 3; i++)
521 		update_stats(&ps->res_stats[i], count[i]);
522 
523 	if (verbose) {
524 		fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
525 			perf_evsel__name(counter), count[0], count[1], count[2]);
526 	}
527 
528 	/*
529 	 * Save the full runtime - to allow normalization during printout:
530 	 */
531 	update_shadow_stats(counter, count, 0);
532 
533 	return 0;
534 }
535 
536 /*
537  * Read out the results of a single counter:
538  * do not aggregate counts across CPUs in system-wide mode
539  */
540 static int read_counter(struct perf_evsel *counter)
541 {
542 	int nthreads = thread_map__nr(evsel_list->threads);
543 	int ncpus = perf_evsel__nr_cpus(counter);
544 	int cpu, thread;
545 
546 	if (!counter->supported)
547 		return -ENOENT;
548 
549 	if (counter->system_wide)
550 		nthreads = 1;
551 
552 	if (counter->per_pkg)
553 		zero_per_pkg(counter);
554 
555 	for (thread = 0; thread < nthreads; thread++) {
556 		for (cpu = 0; cpu < ncpus; cpu++) {
557 			if (perf_evsel__read_cb(counter, cpu, thread, read_cb))
558 				return -1;
559 		}
560 	}
561 
562 	return 0;
563 }
564 
565 static void print_interval(void)
566 {
567 	static int num_print_interval;
568 	struct perf_evsel *counter;
569 	struct perf_stat *ps;
570 	struct timespec ts, rs;
571 	char prefix[64];
572 
573 	if (aggr_mode == AGGR_GLOBAL) {
574 		evlist__for_each(evsel_list, counter) {
575 			ps = counter->priv;
576 			memset(ps->res_stats, 0, sizeof(ps->res_stats));
577 			read_counter_aggr(counter);
578 		}
579 	} else	{
580 		evlist__for_each(evsel_list, counter) {
581 			ps = counter->priv;
582 			memset(ps->res_stats, 0, sizeof(ps->res_stats));
583 			read_counter(counter);
584 		}
585 	}
586 
587 	clock_gettime(CLOCK_MONOTONIC, &ts);
588 	diff_timespec(&rs, &ts, &ref_time);
589 	sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
590 
591 	if (num_print_interval == 0 && !csv_output) {
592 		switch (aggr_mode) {
593 		case AGGR_SOCKET:
594 			fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
595 			break;
596 		case AGGR_CORE:
597 			fprintf(output, "#           time core         cpus             counts %*s events\n", unit_width, "unit");
598 			break;
599 		case AGGR_NONE:
600 			fprintf(output, "#           time CPU                counts %*s events\n", unit_width, "unit");
601 			break;
602 		case AGGR_GLOBAL:
603 		default:
604 			fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
605 		}
606 	}
607 
608 	if (++num_print_interval == 25)
609 		num_print_interval = 0;
610 
611 	switch (aggr_mode) {
612 	case AGGR_CORE:
613 	case AGGR_SOCKET:
614 		print_aggr(prefix);
615 		break;
616 	case AGGR_NONE:
617 		evlist__for_each(evsel_list, counter)
618 			print_counter(counter, prefix);
619 		break;
620 	case AGGR_GLOBAL:
621 	default:
622 		evlist__for_each(evsel_list, counter)
623 			print_counter_aggr(counter, prefix);
624 	}
625 
626 	fflush(output);
627 }
628 
629 static void handle_initial_delay(void)
630 {
631 	struct perf_evsel *counter;
632 
633 	if (initial_delay) {
634 		const int ncpus = cpu_map__nr(evsel_list->cpus),
635 			nthreads = thread_map__nr(evsel_list->threads);
636 
637 		usleep(initial_delay * 1000);
638 		evlist__for_each(evsel_list, counter)
639 			perf_evsel__enable(counter, ncpus, nthreads);
640 	}
641 }
642 
643 static volatile int workload_exec_errno;
644 
645 /*
646  * perf_evlist__prepare_workload will send a SIGUSR1
647  * if the fork fails, since we asked by setting its
648  * want_signal to true.
649  */
650 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
651 					void *ucontext __maybe_unused)
652 {
653 	workload_exec_errno = info->si_value.sival_int;
654 }
655 
656 static int __run_perf_stat(int argc, const char **argv)
657 {
658 	char msg[512];
659 	unsigned long long t0, t1;
660 	struct perf_evsel *counter;
661 	struct timespec ts;
662 	size_t l;
663 	int status = 0;
664 	const bool forks = (argc > 0);
665 
666 	if (interval) {
667 		ts.tv_sec  = interval / 1000;
668 		ts.tv_nsec = (interval % 1000) * 1000000;
669 	} else {
670 		ts.tv_sec  = 1;
671 		ts.tv_nsec = 0;
672 	}
673 
674 	if (forks) {
675 		if (perf_evlist__prepare_workload(evsel_list, &target, argv, false,
676 						  workload_exec_failed_signal) < 0) {
677 			perror("failed to prepare workload");
678 			return -1;
679 		}
680 		child_pid = evsel_list->workload.pid;
681 	}
682 
683 	if (group)
684 		perf_evlist__set_leader(evsel_list);
685 
686 	evlist__for_each(evsel_list, counter) {
687 		if (create_perf_stat_counter(counter) < 0) {
688 			/*
689 			 * PPC returns ENXIO for HW counters until 2.6.37
690 			 * (behavior changed with commit b0a873e).
691 			 */
692 			if (errno == EINVAL || errno == ENOSYS ||
693 			    errno == ENOENT || errno == EOPNOTSUPP ||
694 			    errno == ENXIO) {
695 				if (verbose)
696 					ui__warning("%s event is not supported by the kernel.\n",
697 						    perf_evsel__name(counter));
698 				counter->supported = false;
699 				continue;
700 			}
701 
702 			perf_evsel__open_strerror(counter, &target,
703 						  errno, msg, sizeof(msg));
704 			ui__error("%s\n", msg);
705 
706 			if (child_pid != -1)
707 				kill(child_pid, SIGTERM);
708 
709 			return -1;
710 		}
711 		counter->supported = true;
712 
713 		l = strlen(counter->unit);
714 		if (l > unit_width)
715 			unit_width = l;
716 	}
717 
718 	if (perf_evlist__apply_filters(evsel_list, &counter)) {
719 		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
720 			counter->filter, perf_evsel__name(counter), errno,
721 			strerror_r(errno, msg, sizeof(msg)));
722 		return -1;
723 	}
724 
725 	/*
726 	 * Enable counters and exec the command:
727 	 */
728 	t0 = rdclock();
729 	clock_gettime(CLOCK_MONOTONIC, &ref_time);
730 
731 	if (forks) {
732 		perf_evlist__start_workload(evsel_list);
733 		handle_initial_delay();
734 
735 		if (interval) {
736 			while (!waitpid(child_pid, &status, WNOHANG)) {
737 				nanosleep(&ts, NULL);
738 				print_interval();
739 			}
740 		}
741 		wait(&status);
742 
743 		if (workload_exec_errno) {
744 			const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
745 			pr_err("Workload failed: %s\n", emsg);
746 			return -1;
747 		}
748 
749 		if (WIFSIGNALED(status))
750 			psignal(WTERMSIG(status), argv[0]);
751 	} else {
752 		handle_initial_delay();
753 		while (!done) {
754 			nanosleep(&ts, NULL);
755 			if (interval)
756 				print_interval();
757 		}
758 	}
759 
760 	t1 = rdclock();
761 
762 	update_stats(&walltime_nsecs_stats, t1 - t0);
763 
764 	if (aggr_mode == AGGR_GLOBAL) {
765 		evlist__for_each(evsel_list, counter) {
766 			read_counter_aggr(counter);
767 			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
768 					     thread_map__nr(evsel_list->threads));
769 		}
770 	} else {
771 		evlist__for_each(evsel_list, counter) {
772 			read_counter(counter);
773 			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
774 		}
775 	}
776 
777 	return WEXITSTATUS(status);
778 }
779 
780 static int run_perf_stat(int argc, const char **argv)
781 {
782 	int ret;
783 
784 	if (pre_cmd) {
785 		ret = system(pre_cmd);
786 		if (ret)
787 			return ret;
788 	}
789 
790 	if (sync_run)
791 		sync();
792 
793 	ret = __run_perf_stat(argc, argv);
794 	if (ret)
795 		return ret;
796 
797 	if (post_cmd) {
798 		ret = system(post_cmd);
799 		if (ret)
800 			return ret;
801 	}
802 
803 	return ret;
804 }
805 
806 static void print_running(u64 run, u64 ena)
807 {
808 	if (csv_output) {
809 		fprintf(output, "%s%" PRIu64 "%s%.2f",
810 					csv_sep,
811 					run,
812 					csv_sep,
813 					ena ? 100.0 * run / ena : 100.0);
814 	} else if (run != ena) {
815 		fprintf(output, "  (%.2f%%)", 100.0 * run / ena);
816 	}
817 }
818 
819 static void print_noise_pct(double total, double avg)
820 {
821 	double pct = rel_stddev_stats(total, avg);
822 
823 	if (csv_output)
824 		fprintf(output, "%s%.2f%%", csv_sep, pct);
825 	else if (pct)
826 		fprintf(output, "  ( +-%6.2f%% )", pct);
827 }
828 
829 static void print_noise(struct perf_evsel *evsel, double avg)
830 {
831 	struct perf_stat *ps;
832 
833 	if (run_count == 1)
834 		return;
835 
836 	ps = evsel->priv;
837 	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
838 }
839 
840 static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
841 {
842 	switch (aggr_mode) {
843 	case AGGR_CORE:
844 		fprintf(output, "S%d-C%*d%s%*d%s",
845 			cpu_map__id_to_socket(id),
846 			csv_output ? 0 : -8,
847 			cpu_map__id_to_cpu(id),
848 			csv_sep,
849 			csv_output ? 0 : 4,
850 			nr,
851 			csv_sep);
852 		break;
853 	case AGGR_SOCKET:
854 		fprintf(output, "S%*d%s%*d%s",
855 			csv_output ? 0 : -5,
856 			id,
857 			csv_sep,
858 			csv_output ? 0 : 4,
859 			nr,
860 			csv_sep);
861 			break;
862 	case AGGR_NONE:
863 		fprintf(output, "CPU%*d%s",
864 			csv_output ? 0 : -4,
865 			perf_evsel__cpus(evsel)->map[id], csv_sep);
866 		break;
867 	case AGGR_GLOBAL:
868 	default:
869 		break;
870 	}
871 }
872 
873 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
874 {
875 	double msecs = avg / 1e6;
876 	const char *fmt_v, *fmt_n;
877 	char name[25];
878 
879 	fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
880 	fmt_n = csv_output ? "%s" : "%-25s";
881 
882 	aggr_printout(evsel, id, nr);
883 
884 	scnprintf(name, sizeof(name), "%s%s",
885 		  perf_evsel__name(evsel), csv_output ? "" : " (msec)");
886 
887 	fprintf(output, fmt_v, msecs, csv_sep);
888 
889 	if (csv_output)
890 		fprintf(output, "%s%s", evsel->unit, csv_sep);
891 	else
892 		fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);
893 
894 	fprintf(output, fmt_n, name);
895 
896 	if (evsel->cgrp)
897 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
898 
899 	if (csv_output || interval)
900 		return;
901 
902 	if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
903 		fprintf(output, " # %8.3f CPUs utilized          ",
904 			avg / avg_stats(&walltime_nsecs_stats));
905 	else
906 		fprintf(output, "                                   ");
907 }
908 
909 /* used for get_ratio_color() */
910 enum grc_type {
911 	GRC_STALLED_CYCLES_FE,
912 	GRC_STALLED_CYCLES_BE,
913 	GRC_CACHE_MISSES,
914 	GRC_MAX_NR
915 };
916 
917 static const char *get_ratio_color(enum grc_type type, double ratio)
918 {
919 	static const double grc_table[GRC_MAX_NR][3] = {
920 		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
921 		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
922 		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
923 	};
924 	const char *color = PERF_COLOR_NORMAL;
925 
926 	if (ratio > grc_table[type][0])
927 		color = PERF_COLOR_RED;
928 	else if (ratio > grc_table[type][1])
929 		color = PERF_COLOR_MAGENTA;
930 	else if (ratio > grc_table[type][2])
931 		color = PERF_COLOR_YELLOW;
932 
933 	return color;
934 }
935 
936 static void print_stalled_cycles_frontend(int cpu,
937 					  struct perf_evsel *evsel
938 					  __maybe_unused, double avg)
939 {
940 	double total, ratio = 0.0;
941 	const char *color;
942 	int ctx = evsel_context(evsel);
943 
944 	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
945 
946 	if (total)
947 		ratio = avg / total * 100.0;
948 
949 	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
950 
951 	fprintf(output, " #  ");
952 	color_fprintf(output, color, "%6.2f%%", ratio);
953 	fprintf(output, " frontend cycles idle   ");
954 }
955 
956 static void print_stalled_cycles_backend(int cpu,
957 					 struct perf_evsel *evsel
958 					 __maybe_unused, double avg)
959 {
960 	double total, ratio = 0.0;
961 	const char *color;
962 	int ctx = evsel_context(evsel);
963 
964 	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
965 
966 	if (total)
967 		ratio = avg / total * 100.0;
968 
969 	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
970 
971 	fprintf(output, " #  ");
972 	color_fprintf(output, color, "%6.2f%%", ratio);
973 	fprintf(output, " backend  cycles idle   ");
974 }
975 
976 static void print_branch_misses(int cpu,
977 				struct perf_evsel *evsel __maybe_unused,
978 				double avg)
979 {
980 	double total, ratio = 0.0;
981 	const char *color;
982 	int ctx = evsel_context(evsel);
983 
984 	total = avg_stats(&runtime_branches_stats[ctx][cpu]);
985 
986 	if (total)
987 		ratio = avg / total * 100.0;
988 
989 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
990 
991 	fprintf(output, " #  ");
992 	color_fprintf(output, color, "%6.2f%%", ratio);
993 	fprintf(output, " of all branches        ");
994 }
995 
996 static void print_l1_dcache_misses(int cpu,
997 				   struct perf_evsel *evsel __maybe_unused,
998 				   double avg)
999 {
1000 	double total, ratio = 0.0;
1001 	const char *color;
1002 	int ctx = evsel_context(evsel);
1003 
1004 	total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
1005 
1006 	if (total)
1007 		ratio = avg / total * 100.0;
1008 
1009 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1010 
1011 	fprintf(output, " #  ");
1012 	color_fprintf(output, color, "%6.2f%%", ratio);
1013 	fprintf(output, " of all L1-dcache hits  ");
1014 }
1015 
1016 static void print_l1_icache_misses(int cpu,
1017 				   struct perf_evsel *evsel __maybe_unused,
1018 				   double avg)
1019 {
1020 	double total, ratio = 0.0;
1021 	const char *color;
1022 	int ctx = evsel_context(evsel);
1023 
1024 	total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
1025 
1026 	if (total)
1027 		ratio = avg / total * 100.0;
1028 
1029 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1030 
1031 	fprintf(output, " #  ");
1032 	color_fprintf(output, color, "%6.2f%%", ratio);
1033 	fprintf(output, " of all L1-icache hits  ");
1034 }
1035 
1036 static void print_dtlb_cache_misses(int cpu,
1037 				    struct perf_evsel *evsel __maybe_unused,
1038 				    double avg)
1039 {
1040 	double total, ratio = 0.0;
1041 	const char *color;
1042 	int ctx = evsel_context(evsel);
1043 
1044 	total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
1045 
1046 	if (total)
1047 		ratio = avg / total * 100.0;
1048 
1049 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1050 
1051 	fprintf(output, " #  ");
1052 	color_fprintf(output, color, "%6.2f%%", ratio);
1053 	fprintf(output, " of all dTLB cache hits ");
1054 }
1055 
1056 static void print_itlb_cache_misses(int cpu,
1057 				    struct perf_evsel *evsel __maybe_unused,
1058 				    double avg)
1059 {
1060 	double total, ratio = 0.0;
1061 	const char *color;
1062 	int ctx = evsel_context(evsel);
1063 
1064 	total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
1065 
1066 	if (total)
1067 		ratio = avg / total * 100.0;
1068 
1069 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1070 
1071 	fprintf(output, " #  ");
1072 	color_fprintf(output, color, "%6.2f%%", ratio);
1073 	fprintf(output, " of all iTLB cache hits ");
1074 }
1075 
1076 static void print_ll_cache_misses(int cpu,
1077 				  struct perf_evsel *evsel __maybe_unused,
1078 				  double avg)
1079 {
1080 	double total, ratio = 0.0;
1081 	const char *color;
1082 	int ctx = evsel_context(evsel);
1083 
1084 	total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
1085 
1086 	if (total)
1087 		ratio = avg / total * 100.0;
1088 
1089 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1090 
1091 	fprintf(output, " #  ");
1092 	color_fprintf(output, color, "%6.2f%%", ratio);
1093 	fprintf(output, " of all LL-cache hits   ");
1094 }
1095 
1096 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
1097 {
1098 	double total, ratio = 0.0, total2;
1099 	double sc =  evsel->scale;
1100 	const char *fmt;
1101 	int cpu = cpu_map__id_to_cpu(id);
1102 	int ctx = evsel_context(evsel);
1103 
1104 	if (csv_output) {
1105 		fmt = sc != 1.0 ?  "%.2f%s" : "%.0f%s";
1106 	} else {
1107 		if (big_num)
1108 			fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s";
1109 		else
1110 			fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s";
1111 	}
1112 
1113 	aggr_printout(evsel, id, nr);
1114 
1115 	if (aggr_mode == AGGR_GLOBAL)
1116 		cpu = 0;
1117 
1118 	fprintf(output, fmt, avg, csv_sep);
1119 
1120 	if (evsel->unit)
1121 		fprintf(output, "%-*s%s",
1122 			csv_output ? 0 : unit_width,
1123 			evsel->unit, csv_sep);
1124 
1125 	fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
1126 
1127 	if (evsel->cgrp)
1128 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
1129 
1130 	if (csv_output || interval)
1131 		return;
1132 
1133 	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
1134 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
1135 		if (total) {
1136 			ratio = avg / total;
1137 			fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
1138 		} else {
1139 			fprintf(output, "                                   ");
1140 		}
1141 		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
1142 		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
1143 
1144 		if (total && avg) {
1145 			ratio = total / avg;
1146 			fprintf(output, "\n");
1147 			if (aggr_mode == AGGR_NONE)
1148 				fprintf(output, "        ");
1149 			fprintf(output, "                                                  #   %5.2f  stalled cycles per insn", ratio);
1150 		}
1151 
1152 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
1153 			runtime_branches_stats[ctx][cpu].n != 0) {
1154 		print_branch_misses(cpu, evsel, avg);
1155 	} else if (
1156 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
1157 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
1158 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1159 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1160 			runtime_l1_dcache_stats[ctx][cpu].n != 0) {
1161 		print_l1_dcache_misses(cpu, evsel, avg);
1162 	} else if (
1163 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
1164 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
1165 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1166 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1167 			runtime_l1_icache_stats[ctx][cpu].n != 0) {
1168 		print_l1_icache_misses(cpu, evsel, avg);
1169 	} else if (
1170 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
1171 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
1172 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1173 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1174 			runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
1175 		print_dtlb_cache_misses(cpu, evsel, avg);
1176 	} else if (
1177 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
1178 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
1179 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1180 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1181 			runtime_itlb_cache_stats[ctx][cpu].n != 0) {
1182 		print_itlb_cache_misses(cpu, evsel, avg);
1183 	} else if (
1184 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
1185 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
1186 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1187 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1188 			runtime_ll_cache_stats[ctx][cpu].n != 0) {
1189 		print_ll_cache_misses(cpu, evsel, avg);
1190 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
1191 			runtime_cacherefs_stats[ctx][cpu].n != 0) {
1192 		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
1193 
1194 		if (total)
1195 			ratio = avg * 100 / total;
1196 
1197 		fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
1198 
1199 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
1200 		print_stalled_cycles_frontend(cpu, evsel, avg);
1201 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
1202 		print_stalled_cycles_backend(cpu, evsel, avg);
1203 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
1204 		total = avg_stats(&runtime_nsecs_stats[cpu]);
1205 
1206 		if (total) {
1207 			ratio = avg / total;
1208 			fprintf(output, " # %8.3f GHz                    ", ratio);
1209 		} else {
1210 			fprintf(output, "                                   ");
1211 		}
1212 	} else if (transaction_run &&
1213 		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
1214 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
1215 		if (total)
1216 			fprintf(output,
1217 				" #   %5.2f%% transactional cycles   ",
1218 				100.0 * (avg / total));
1219 	} else if (transaction_run &&
1220 		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
1221 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
1222 		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
1223 		if (total2 < avg)
1224 			total2 = avg;
1225 		if (total)
1226 			fprintf(output,
1227 				" #   %5.2f%% aborted cycles         ",
1228 				100.0 * ((total2-avg) / total));
1229 	} else if (transaction_run &&
1230 		   perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
1231 		   avg > 0 &&
1232 		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
1233 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
1234 
1235 		if (total)
1236 			ratio = total / avg;
1237 
1238 		fprintf(output, " # %8.0f cycles / transaction   ", ratio);
1239 	} else if (transaction_run &&
1240 		   perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
1241 		   avg > 0 &&
1242 		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
1243 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
1244 
1245 		if (total)
1246 			ratio = total / avg;
1247 
1248 		fprintf(output, " # %8.0f cycles / elision       ", ratio);
1249 	} else if (runtime_nsecs_stats[cpu].n != 0) {
1250 		char unit = 'M';
1251 
1252 		total = avg_stats(&runtime_nsecs_stats[cpu]);
1253 
1254 		if (total)
1255 			ratio = 1000.0 * avg / total;
1256 		if (ratio < 0.001) {
1257 			ratio *= 1000;
1258 			unit = 'K';
1259 		}
1260 
1261 		fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
1262 	} else {
1263 		fprintf(output, "                                   ");
1264 	}
1265 }
1266 
1267 static void print_aggr(char *prefix)
1268 {
1269 	struct perf_evsel *counter;
1270 	int cpu, cpu2, s, s2, id, nr;
1271 	double uval;
1272 	u64 ena, run, val;
1273 
1274 	if (!(aggr_map || aggr_get_id))
1275 		return;
1276 
1277 	for (s = 0; s < aggr_map->nr; s++) {
1278 		id = aggr_map->map[s];
1279 		evlist__for_each(evsel_list, counter) {
1280 			val = ena = run = 0;
1281 			nr = 0;
1282 			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1283 				cpu2 = perf_evsel__cpus(counter)->map[cpu];
1284 				s2 = aggr_get_id(evsel_list->cpus, cpu2);
1285 				if (s2 != id)
1286 					continue;
1287 				val += counter->counts->cpu[cpu].val;
1288 				ena += counter->counts->cpu[cpu].ena;
1289 				run += counter->counts->cpu[cpu].run;
1290 				nr++;
1291 			}
1292 			if (prefix)
1293 				fprintf(output, "%s", prefix);
1294 
1295 			if (run == 0 || ena == 0) {
1296 				aggr_printout(counter, id, nr);
1297 
1298 				fprintf(output, "%*s%s",
1299 					csv_output ? 0 : 18,
1300 					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1301 					csv_sep);
1302 
1303 				fprintf(output, "%-*s%s",
1304 					csv_output ? 0 : unit_width,
1305 					counter->unit, csv_sep);
1306 
1307 				fprintf(output, "%*s",
1308 					csv_output ? 0 : -25,
1309 					perf_evsel__name(counter));
1310 
1311 				if (counter->cgrp)
1312 					fprintf(output, "%s%s",
1313 						csv_sep, counter->cgrp->name);
1314 
1315 				print_running(run, ena);
1316 				fputc('\n', output);
1317 				continue;
1318 			}
1319 			uval = val * counter->scale;
1320 
1321 			if (nsec_counter(counter))
1322 				nsec_printout(id, nr, counter, uval);
1323 			else
1324 				abs_printout(id, nr, counter, uval);
1325 
1326 			if (!csv_output)
1327 				print_noise(counter, 1.0);
1328 
1329 			print_running(run, ena);
1330 			fputc('\n', output);
1331 		}
1332 	}
1333 }
1334 
1335 /*
1336  * Print out the results of a single counter:
1337  * aggregated counts in system-wide mode
1338  */
1339 static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
1340 {
1341 	struct perf_stat *ps = counter->priv;
1342 	double avg = avg_stats(&ps->res_stats[0]);
1343 	int scaled = counter->counts->scaled;
1344 	double uval;
1345 	double avg_enabled, avg_running;
1346 
1347 	avg_enabled = avg_stats(&ps->res_stats[1]);
1348 	avg_running = avg_stats(&ps->res_stats[2]);
1349 
1350 	if (prefix)
1351 		fprintf(output, "%s", prefix);
1352 
1353 	if (scaled == -1 || !counter->supported) {
1354 		fprintf(output, "%*s%s",
1355 			csv_output ? 0 : 18,
1356 			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1357 			csv_sep);
1358 		fprintf(output, "%-*s%s",
1359 			csv_output ? 0 : unit_width,
1360 			counter->unit, csv_sep);
1361 		fprintf(output, "%*s",
1362 			csv_output ? 0 : -25,
1363 			perf_evsel__name(counter));
1364 
1365 		if (counter->cgrp)
1366 			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
1367 
1368 		print_running(avg_running, avg_enabled);
1369 		fputc('\n', output);
1370 		return;
1371 	}
1372 
1373 	uval = avg * counter->scale;
1374 
1375 	if (nsec_counter(counter))
1376 		nsec_printout(-1, 0, counter, uval);
1377 	else
1378 		abs_printout(-1, 0, counter, uval);
1379 
1380 	print_noise(counter, avg);
1381 
1382 	print_running(avg_running, avg_enabled);
1383 	fprintf(output, "\n");
1384 }
1385 
1386 /*
1387  * Print out the results of a single counter:
1388  * does not use aggregated count in system-wide
1389  */
1390 static void print_counter(struct perf_evsel *counter, char *prefix)
1391 {
1392 	u64 ena, run, val;
1393 	double uval;
1394 	int cpu;
1395 
1396 	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1397 		val = counter->counts->cpu[cpu].val;
1398 		ena = counter->counts->cpu[cpu].ena;
1399 		run = counter->counts->cpu[cpu].run;
1400 
1401 		if (prefix)
1402 			fprintf(output, "%s", prefix);
1403 
1404 		if (run == 0 || ena == 0) {
1405 			fprintf(output, "CPU%*d%s%*s%s",
1406 				csv_output ? 0 : -4,
1407 				perf_evsel__cpus(counter)->map[cpu], csv_sep,
1408 				csv_output ? 0 : 18,
1409 				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1410 				csv_sep);
1411 
1412 				fprintf(output, "%-*s%s",
1413 					csv_output ? 0 : unit_width,
1414 					counter->unit, csv_sep);
1415 
1416 				fprintf(output, "%*s",
1417 					csv_output ? 0 : -25,
1418 					perf_evsel__name(counter));
1419 
1420 			if (counter->cgrp)
1421 				fprintf(output, "%s%s",
1422 					csv_sep, counter->cgrp->name);
1423 
1424 			print_running(run, ena);
1425 			fputc('\n', output);
1426 			continue;
1427 		}
1428 
1429 		uval = val * counter->scale;
1430 
1431 		if (nsec_counter(counter))
1432 			nsec_printout(cpu, 0, counter, uval);
1433 		else
1434 			abs_printout(cpu, 0, counter, uval);
1435 
1436 		if (!csv_output)
1437 			print_noise(counter, 1.0);
1438 		print_running(run, ena);
1439 
1440 		fputc('\n', output);
1441 	}
1442 }
1443 
1444 static void print_stat(int argc, const char **argv)
1445 {
1446 	struct perf_evsel *counter;
1447 	int i;
1448 
1449 	fflush(stdout);
1450 
1451 	if (!csv_output) {
1452 		fprintf(output, "\n");
1453 		fprintf(output, " Performance counter stats for ");
1454 		if (target.system_wide)
1455 			fprintf(output, "\'system wide");
1456 		else if (target.cpu_list)
1457 			fprintf(output, "\'CPU(s) %s", target.cpu_list);
1458 		else if (!target__has_task(&target)) {
1459 			fprintf(output, "\'%s", argv[0]);
1460 			for (i = 1; i < argc; i++)
1461 				fprintf(output, " %s", argv[i]);
1462 		} else if (target.pid)
1463 			fprintf(output, "process id \'%s", target.pid);
1464 		else
1465 			fprintf(output, "thread id \'%s", target.tid);
1466 
1467 		fprintf(output, "\'");
1468 		if (run_count > 1)
1469 			fprintf(output, " (%d runs)", run_count);
1470 		fprintf(output, ":\n\n");
1471 	}
1472 
1473 	switch (aggr_mode) {
1474 	case AGGR_CORE:
1475 	case AGGR_SOCKET:
1476 		print_aggr(NULL);
1477 		break;
1478 	case AGGR_GLOBAL:
1479 		evlist__for_each(evsel_list, counter)
1480 			print_counter_aggr(counter, NULL);
1481 		break;
1482 	case AGGR_NONE:
1483 		evlist__for_each(evsel_list, counter)
1484 			print_counter(counter, NULL);
1485 		break;
1486 	default:
1487 		break;
1488 	}
1489 
1490 	if (!csv_output) {
1491 		if (!null_run)
1492 			fprintf(output, "\n");
1493 		fprintf(output, " %17.9f seconds time elapsed",
1494 				avg_stats(&walltime_nsecs_stats)/1e9);
1495 		if (run_count > 1) {
1496 			fprintf(output, "                                        ");
1497 			print_noise_pct(stddev_stats(&walltime_nsecs_stats),
1498 					avg_stats(&walltime_nsecs_stats));
1499 		}
1500 		fprintf(output, "\n\n");
1501 	}
1502 }
1503 
1504 static volatile int signr = -1;
1505 
1506 static void skip_signal(int signo)
1507 {
1508 	if ((child_pid == -1) || interval)
1509 		done = 1;
1510 
1511 	signr = signo;
1512 	/*
1513 	 * render child_pid harmless
1514 	 * won't send SIGTERM to a random
1515 	 * process in case of race condition
1516 	 * and fast PID recycling
1517 	 */
1518 	child_pid = -1;
1519 }
1520 
1521 static void sig_atexit(void)
1522 {
1523 	sigset_t set, oset;
1524 
1525 	/*
1526 	 * avoid race condition with SIGCHLD handler
1527 	 * in skip_signal() which is modifying child_pid
1528 	 * goal is to avoid send SIGTERM to a random
1529 	 * process
1530 	 */
1531 	sigemptyset(&set);
1532 	sigaddset(&set, SIGCHLD);
1533 	sigprocmask(SIG_BLOCK, &set, &oset);
1534 
1535 	if (child_pid != -1)
1536 		kill(child_pid, SIGTERM);
1537 
1538 	sigprocmask(SIG_SETMASK, &oset, NULL);
1539 
1540 	if (signr == -1)
1541 		return;
1542 
1543 	signal(signr, SIG_DFL);
1544 	kill(getpid(), signr);
1545 }
1546 
1547 static int stat__set_big_num(const struct option *opt __maybe_unused,
1548 			     const char *s __maybe_unused, int unset)
1549 {
1550 	big_num_opt = unset ? 0 : 1;
1551 	return 0;
1552 }
1553 
1554 static int perf_stat_init_aggr_mode(void)
1555 {
1556 	switch (aggr_mode) {
1557 	case AGGR_SOCKET:
1558 		if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1559 			perror("cannot build socket map");
1560 			return -1;
1561 		}
1562 		aggr_get_id = cpu_map__get_socket;
1563 		break;
1564 	case AGGR_CORE:
1565 		if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
1566 			perror("cannot build core map");
1567 			return -1;
1568 		}
1569 		aggr_get_id = cpu_map__get_core;
1570 		break;
1571 	case AGGR_NONE:
1572 	case AGGR_GLOBAL:
1573 	default:
1574 		break;
1575 	}
1576 	return 0;
1577 }
1578 
1579 static int setup_events(const char * const *attrs, unsigned len)
1580 {
1581 	unsigned i;
1582 
1583 	for (i = 0; i < len; i++) {
1584 		if (parse_events(evsel_list, attrs[i], NULL))
1585 			return -1;
1586 	}
1587 	return 0;
1588 }
1589 
1590 /*
1591  * Add default attributes, if there were no attributes specified or
1592  * if -d/--detailed, -d -d or -d -d -d is used:
1593  */
1594 static int add_default_attributes(void)
1595 {
1596 	struct perf_event_attr default_attrs[] = {
1597 
1598   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
1599   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
1600   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
1601   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
1602 
1603   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
1604   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
1605   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
1606   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
1607   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
1608   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
1609 
1610 };
1611 
1612 /*
1613  * Detailed stats (-d), covering the L1 and last level data caches:
1614  */
1615 	struct perf_event_attr detailed_attrs[] = {
1616 
1617   { .type = PERF_TYPE_HW_CACHE,
1618     .config =
1619 	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1620 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1621 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1622 
1623   { .type = PERF_TYPE_HW_CACHE,
1624     .config =
1625 	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1626 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1627 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1628 
1629   { .type = PERF_TYPE_HW_CACHE,
1630     .config =
1631 	 PERF_COUNT_HW_CACHE_LL			<<  0  |
1632 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1633 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1634 
1635   { .type = PERF_TYPE_HW_CACHE,
1636     .config =
1637 	 PERF_COUNT_HW_CACHE_LL			<<  0  |
1638 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1639 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1640 };
1641 
1642 /*
1643  * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1644  */
1645 	struct perf_event_attr very_detailed_attrs[] = {
1646 
1647   { .type = PERF_TYPE_HW_CACHE,
1648     .config =
1649 	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
1650 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1651 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1652 
1653   { .type = PERF_TYPE_HW_CACHE,
1654     .config =
1655 	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
1656 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1657 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1658 
1659   { .type = PERF_TYPE_HW_CACHE,
1660     .config =
1661 	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
1662 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1663 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1664 
1665   { .type = PERF_TYPE_HW_CACHE,
1666     .config =
1667 	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
1668 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1669 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1670 
1671   { .type = PERF_TYPE_HW_CACHE,
1672     .config =
1673 	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
1674 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1675 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1676 
1677   { .type = PERF_TYPE_HW_CACHE,
1678     .config =
1679 	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
1680 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1681 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1682 
1683 };
1684 
1685 /*
1686  * Very, very detailed stats (-d -d -d), adding prefetch events:
1687  */
1688 	struct perf_event_attr very_very_detailed_attrs[] = {
1689 
1690   { .type = PERF_TYPE_HW_CACHE,
1691     .config =
1692 	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1693 	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
1694 	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1695 
1696   { .type = PERF_TYPE_HW_CACHE,
1697     .config =
1698 	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1699 	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
1700 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1701 };
1702 
1703 	/* Set attrs if no event is selected and !null_run: */
1704 	if (null_run)
1705 		return 0;
1706 
1707 	if (transaction_run) {
1708 		int err;
1709 		if (pmu_have_event("cpu", "cycles-ct") &&
1710 		    pmu_have_event("cpu", "el-start"))
1711 			err = setup_events(transaction_attrs,
1712 					ARRAY_SIZE(transaction_attrs));
1713 		else
1714 			err = setup_events(transaction_limited_attrs,
1715 				 ARRAY_SIZE(transaction_limited_attrs));
1716 		if (err < 0) {
1717 			fprintf(stderr, "Cannot set up transaction events\n");
1718 			return -1;
1719 		}
1720 		return 0;
1721 	}
1722 
1723 	if (!evsel_list->nr_entries) {
1724 		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1725 			return -1;
1726 	}
1727 
1728 	/* Detailed events get appended to the event list: */
1729 
1730 	if (detailed_run <  1)
1731 		return 0;
1732 
1733 	/* Append detailed run extra attributes: */
1734 	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1735 		return -1;
1736 
1737 	if (detailed_run < 2)
1738 		return 0;
1739 
1740 	/* Append very detailed run extra attributes: */
1741 	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1742 		return -1;
1743 
1744 	if (detailed_run < 3)
1745 		return 0;
1746 
1747 	/* Append very, very detailed run extra attributes: */
1748 	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1749 }
1750 
1751 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1752 {
1753 	bool append_file = false;
1754 	int output_fd = 0;
1755 	const char *output_name	= NULL;
1756 	const struct option options[] = {
1757 	OPT_BOOLEAN('T', "transaction", &transaction_run,
1758 		    "hardware transaction statistics"),
1759 	OPT_CALLBACK('e', "event", &evsel_list, "event",
1760 		     "event selector. use 'perf list' to list available events",
1761 		     parse_events_option),
1762 	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1763 		     "event filter", parse_filter),
1764 	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1765 		    "child tasks do not inherit counters"),
1766 	OPT_STRING('p', "pid", &target.pid, "pid",
1767 		   "stat events on existing process id"),
1768 	OPT_STRING('t', "tid", &target.tid, "tid",
1769 		   "stat events on existing thread id"),
1770 	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1771 		    "system-wide collection from all CPUs"),
1772 	OPT_BOOLEAN('g', "group", &group,
1773 		    "put the counters into a counter group"),
1774 	OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
1775 	OPT_INCR('v', "verbose", &verbose,
1776 		    "be more verbose (show counter open errors, etc)"),
1777 	OPT_INTEGER('r', "repeat", &run_count,
1778 		    "repeat command and print average + stddev (max: 100, forever: 0)"),
1779 	OPT_BOOLEAN('n', "null", &null_run,
1780 		    "null run - dont start any counters"),
1781 	OPT_INCR('d', "detailed", &detailed_run,
1782 		    "detailed run - start a lot of events"),
1783 	OPT_BOOLEAN('S', "sync", &sync_run,
1784 		    "call sync() before starting a run"),
1785 	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
1786 			   "print large numbers with thousands\' separators",
1787 			   stat__set_big_num),
1788 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1789 		    "list of cpus to monitor in system-wide"),
1790 	OPT_SET_UINT('A', "no-aggr", &aggr_mode,
1791 		    "disable CPU count aggregation", AGGR_NONE),
1792 	OPT_STRING('x', "field-separator", &csv_sep, "separator",
1793 		   "print counts with custom separator"),
1794 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1795 		     "monitor event in cgroup name only", parse_cgroups),
1796 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
1797 	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1798 	OPT_INTEGER(0, "log-fd", &output_fd,
1799 		    "log output to fd, instead of stderr"),
1800 	OPT_STRING(0, "pre", &pre_cmd, "command",
1801 			"command to run prior to the measured command"),
1802 	OPT_STRING(0, "post", &post_cmd, "command",
1803 			"command to run after to the measured command"),
1804 	OPT_UINTEGER('I', "interval-print", &interval,
1805 		    "print counts at regular interval in ms (>= 100)"),
1806 	OPT_SET_UINT(0, "per-socket", &aggr_mode,
1807 		     "aggregate counts per processor socket", AGGR_SOCKET),
1808 	OPT_SET_UINT(0, "per-core", &aggr_mode,
1809 		     "aggregate counts per physical processor core", AGGR_CORE),
1810 	OPT_UINTEGER('D', "delay", &initial_delay,
1811 		     "ms to wait before starting measurement after program start"),
1812 	OPT_END()
1813 	};
1814 	const char * const stat_usage[] = {
1815 		"perf stat [<options>] [<command>]",
1816 		NULL
1817 	};
1818 	int status = -EINVAL, run_idx;
1819 	const char *mode;
1820 
1821 	setlocale(LC_ALL, "");
1822 
1823 	evsel_list = perf_evlist__new();
1824 	if (evsel_list == NULL)
1825 		return -ENOMEM;
1826 
1827 	argc = parse_options(argc, argv, options, stat_usage,
1828 		PARSE_OPT_STOP_AT_NON_OPTION);
1829 
1830 	output = stderr;
1831 	if (output_name && strcmp(output_name, "-"))
1832 		output = NULL;
1833 
1834 	if (output_name && output_fd) {
1835 		fprintf(stderr, "cannot use both --output and --log-fd\n");
1836 		parse_options_usage(stat_usage, options, "o", 1);
1837 		parse_options_usage(NULL, options, "log-fd", 0);
1838 		goto out;
1839 	}
1840 
1841 	if (output_fd < 0) {
1842 		fprintf(stderr, "argument to --log-fd must be a > 0\n");
1843 		parse_options_usage(stat_usage, options, "log-fd", 0);
1844 		goto out;
1845 	}
1846 
1847 	if (!output) {
1848 		struct timespec tm;
1849 		mode = append_file ? "a" : "w";
1850 
1851 		output = fopen(output_name, mode);
1852 		if (!output) {
1853 			perror("failed to create output file");
1854 			return -1;
1855 		}
1856 		clock_gettime(CLOCK_REALTIME, &tm);
1857 		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1858 	} else if (output_fd > 0) {
1859 		mode = append_file ? "a" : "w";
1860 		output = fdopen(output_fd, mode);
1861 		if (!output) {
1862 			perror("Failed opening logfd");
1863 			return -errno;
1864 		}
1865 	}
1866 
1867 	if (csv_sep) {
1868 		csv_output = true;
1869 		if (!strcmp(csv_sep, "\\t"))
1870 			csv_sep = "\t";
1871 	} else
1872 		csv_sep = DEFAULT_SEPARATOR;
1873 
1874 	/*
1875 	 * let the spreadsheet do the pretty-printing
1876 	 */
1877 	if (csv_output) {
1878 		/* User explicitly passed -B? */
1879 		if (big_num_opt == 1) {
1880 			fprintf(stderr, "-B option not supported with -x\n");
1881 			parse_options_usage(stat_usage, options, "B", 1);
1882 			parse_options_usage(NULL, options, "x", 1);
1883 			goto out;
1884 		} else /* Nope, so disable big number formatting */
1885 			big_num = false;
1886 	} else if (big_num_opt == 0) /* User passed --no-big-num */
1887 		big_num = false;
1888 
1889 	if (!argc && target__none(&target))
1890 		usage_with_options(stat_usage, options);
1891 
1892 	if (run_count < 0) {
1893 		pr_err("Run count must be a positive number\n");
1894 		parse_options_usage(stat_usage, options, "r", 1);
1895 		goto out;
1896 	} else if (run_count == 0) {
1897 		forever = true;
1898 		run_count = 1;
1899 	}
1900 
1901 	/* no_aggr, cgroup are for system-wide only */
1902 	if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) &&
1903 	    !target__has_cpu(&target)) {
1904 		fprintf(stderr, "both cgroup and no-aggregation "
1905 			"modes only available in system-wide mode\n");
1906 
1907 		parse_options_usage(stat_usage, options, "G", 1);
1908 		parse_options_usage(NULL, options, "A", 1);
1909 		parse_options_usage(NULL, options, "a", 1);
1910 		goto out;
1911 	}
1912 
1913 	if (add_default_attributes())
1914 		goto out;
1915 
1916 	target__validate(&target);
1917 
1918 	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1919 		if (target__has_task(&target)) {
1920 			pr_err("Problems finding threads of monitor\n");
1921 			parse_options_usage(stat_usage, options, "p", 1);
1922 			parse_options_usage(NULL, options, "t", 1);
1923 		} else if (target__has_cpu(&target)) {
1924 			perror("failed to parse CPUs map");
1925 			parse_options_usage(stat_usage, options, "C", 1);
1926 			parse_options_usage(NULL, options, "a", 1);
1927 		}
1928 		goto out;
1929 	}
1930 	if (interval && interval < 100) {
1931 		pr_err("print interval must be >= 100ms\n");
1932 		parse_options_usage(stat_usage, options, "I", 1);
1933 		goto out;
1934 	}
1935 
1936 	if (perf_evlist__alloc_stats(evsel_list, interval))
1937 		goto out;
1938 
1939 	if (perf_stat_init_aggr_mode())
1940 		goto out;
1941 
1942 	/*
1943 	 * We dont want to block the signals - that would cause
1944 	 * child tasks to inherit that and Ctrl-C would not work.
1945 	 * What we want is for Ctrl-C to work in the exec()-ed
1946 	 * task, but being ignored by perf stat itself:
1947 	 */
1948 	atexit(sig_atexit);
1949 	if (!forever)
1950 		signal(SIGINT,  skip_signal);
1951 	signal(SIGCHLD, skip_signal);
1952 	signal(SIGALRM, skip_signal);
1953 	signal(SIGABRT, skip_signal);
1954 
1955 	status = 0;
1956 	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
1957 		if (run_count != 1 && verbose)
1958 			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
1959 				run_idx + 1);
1960 
1961 		status = run_perf_stat(argc, argv);
1962 		if (forever && status != -1) {
1963 			print_stat(argc, argv);
1964 			perf_stat__reset_stats(evsel_list);
1965 		}
1966 	}
1967 
1968 	if (!forever && status != -1 && !interval)
1969 		print_stat(argc, argv);
1970 
1971 	perf_evlist__free_stats(evsel_list);
1972 out:
1973 	perf_evlist__delete(evsel_list);
1974 	return status;
1975 }
1976