xref: /openbmc/linux/tools/perf/util/stat-shadow.c (revision 0a57b910)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <math.h>
3 #include <stdio.h>
4 #include "evsel.h"
5 #include "stat.h"
6 #include "color.h"
7 #include "debug.h"
8 #include "pmu.h"
9 #include "rblist.h"
10 #include "evlist.h"
11 #include "expr.h"
12 #include "metricgroup.h"
13 #include "cgroup.h"
14 #include "units.h"
15 #include <linux/zalloc.h>
16 #include "iostat.h"
17 #include "util/hashmap.h"
18 
19 /*
20  * AGGR_GLOBAL: Use CPU 0
21  * AGGR_SOCKET: Use first CPU of socket
22  * AGGR_DIE: Use first CPU of die
23  * AGGR_CORE: Use first CPU of core
24  * AGGR_NONE: Use matching CPU
25  * AGGR_THREAD: Not supported?
26  */
27 
28 struct stats walltime_nsecs_stats;
29 struct rusage_stats ru_stats;
30 
31 static struct runtime_stat {
32 	struct rblist value_list;
33 } rt_stat;
34 
35 enum {
36 	CTX_BIT_USER	= 1 << 0,
37 	CTX_BIT_KERNEL	= 1 << 1,
38 	CTX_BIT_HV	= 1 << 2,
39 	CTX_BIT_HOST	= 1 << 3,
40 	CTX_BIT_IDLE	= 1 << 4,
41 	CTX_BIT_MAX	= 1 << 5,
42 };
43 
44 enum stat_type {
45 	STAT_NONE = 0,
46 	STAT_NSECS,
47 	STAT_CYCLES,
48 	STAT_INSTRUCTIONS,
49 	STAT_STALLED_CYCLES_FRONT,
50 	STAT_STALLED_CYCLES_BACK,
51 	STAT_BRANCHES,
52 	STAT_BRANCH_MISS,
53 	STAT_CACHE_REFS,
54 	STAT_CACHE_MISSES,
55 	STAT_L1_DCACHE,
56 	STAT_L1_ICACHE,
57 	STAT_LL_CACHE,
58 	STAT_ITLB_CACHE,
59 	STAT_DTLB_CACHE,
60 	STAT_L1D_MISS,
61 	STAT_L1I_MISS,
62 	STAT_LL_MISS,
63 	STAT_DTLB_MISS,
64 	STAT_ITLB_MISS,
65 	STAT_MAX
66 };
67 
68 struct saved_value {
69 	struct rb_node rb_node;
70 	struct evsel *evsel;
71 	enum stat_type type;
72 	int ctx;
73 	int map_idx;  /* cpu or thread map index */
74 	struct cgroup *cgrp;
75 	struct stats stats;
76 	u64 metric_total;
77 	int metric_other;
78 };
79 
80 static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
81 {
82 	struct saved_value *a = container_of(rb_node,
83 					     struct saved_value,
84 					     rb_node);
85 	const struct saved_value *b = entry;
86 
87 	if (a->map_idx != b->map_idx)
88 		return a->map_idx - b->map_idx;
89 
90 	/*
91 	 * Previously the rbtree was used to link generic metrics.
92 	 * The keys were evsel/cpu. Now the rbtree is extended to support
93 	 * per-thread shadow stats. For shadow stats case, the keys
94 	 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
95 	 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
96 	 */
97 	if (a->type != b->type)
98 		return a->type - b->type;
99 
100 	if (a->ctx != b->ctx)
101 		return a->ctx - b->ctx;
102 
103 	if (a->cgrp != b->cgrp)
104 		return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1;
105 
106 	if (a->evsel == b->evsel)
107 		return 0;
108 	if ((char *)a->evsel < (char *)b->evsel)
109 		return -1;
110 	return +1;
111 }
112 
113 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
114 				     const void *entry)
115 {
116 	struct saved_value *nd = malloc(sizeof(struct saved_value));
117 
118 	if (!nd)
119 		return NULL;
120 	memcpy(nd, entry, sizeof(struct saved_value));
121 	return &nd->rb_node;
122 }
123 
124 static void saved_value_delete(struct rblist *rblist __maybe_unused,
125 			       struct rb_node *rb_node)
126 {
127 	struct saved_value *v;
128 
129 	BUG_ON(!rb_node);
130 	v = container_of(rb_node, struct saved_value, rb_node);
131 	free(v);
132 }
133 
134 static struct saved_value *saved_value_lookup(struct evsel *evsel,
135 					      int map_idx,
136 					      bool create,
137 					      enum stat_type type,
138 					      int ctx,
139 					      struct cgroup *cgrp)
140 {
141 	struct rblist *rblist;
142 	struct rb_node *nd;
143 	struct saved_value dm = {
144 		.map_idx = map_idx,
145 		.evsel = evsel,
146 		.type = type,
147 		.ctx = ctx,
148 		.cgrp = cgrp,
149 	};
150 
151 	rblist = &rt_stat.value_list;
152 
153 	/* don't use context info for clock events */
154 	if (type == STAT_NSECS)
155 		dm.ctx = 0;
156 
157 	nd = rblist__find(rblist, &dm);
158 	if (nd)
159 		return container_of(nd, struct saved_value, rb_node);
160 	if (create) {
161 		rblist__add_node(rblist, &dm);
162 		nd = rblist__find(rblist, &dm);
163 		if (nd)
164 			return container_of(nd, struct saved_value, rb_node);
165 	}
166 	return NULL;
167 }
168 
169 void perf_stat__init_shadow_stats(void)
170 {
171 	struct rblist *rblist = &rt_stat.value_list;
172 
173 	rblist__init(rblist);
174 	rblist->node_cmp = saved_value_cmp;
175 	rblist->node_new = saved_value_new;
176 	rblist->node_delete = saved_value_delete;
177 }
178 
179 static int evsel_context(const struct evsel *evsel)
180 {
181 	int ctx = 0;
182 
183 	if (evsel->core.attr.exclude_kernel)
184 		ctx |= CTX_BIT_KERNEL;
185 	if (evsel->core.attr.exclude_user)
186 		ctx |= CTX_BIT_USER;
187 	if (evsel->core.attr.exclude_hv)
188 		ctx |= CTX_BIT_HV;
189 	if (evsel->core.attr.exclude_host)
190 		ctx |= CTX_BIT_HOST;
191 	if (evsel->core.attr.exclude_idle)
192 		ctx |= CTX_BIT_IDLE;
193 
194 	return ctx;
195 }
196 
197 void perf_stat__reset_shadow_per_stat(void)
198 {
199 	struct rblist *rblist;
200 	struct rb_node *pos, *next;
201 
202 	rblist = &rt_stat.value_list;
203 	next = rb_first_cached(&rblist->entries);
204 	while (next) {
205 		pos = next;
206 		next = rb_next(pos);
207 		memset(&container_of(pos, struct saved_value, rb_node)->stats,
208 		       0,
209 		       sizeof(struct stats));
210 	}
211 }
212 
213 void perf_stat__reset_shadow_stats(void)
214 {
215 	perf_stat__reset_shadow_per_stat();
216 	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
217 	memset(&ru_stats, 0, sizeof(ru_stats));
218 }
219 
220 struct runtime_stat_data {
221 	int ctx;
222 	struct cgroup *cgrp;
223 };
224 
225 static void update_runtime_stat(enum stat_type type,
226 				int map_idx, u64 count,
227 				struct runtime_stat_data *rsd)
228 {
229 	struct saved_value *v = saved_value_lookup(NULL, map_idx, true, type,
230 						   rsd->ctx, rsd->cgrp);
231 
232 	if (v)
233 		update_stats(&v->stats, count);
234 }
235 
236 /*
237  * Update various tracking values we maintain to print
238  * more semantic information such as miss/hit ratios,
239  * instruction rates, etc:
240  */
241 void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
242 				    int aggr_idx)
243 {
244 	u64 count_ns = count;
245 	struct runtime_stat_data rsd = {
246 		.ctx = evsel_context(counter),
247 		.cgrp = counter->cgrp,
248 	};
249 	count *= counter->scale;
250 
251 	if (evsel__is_clock(counter))
252 		update_runtime_stat(STAT_NSECS, aggr_idx, count_ns, &rsd);
253 	else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
254 		update_runtime_stat(STAT_CYCLES, aggr_idx, count, &rsd);
255 	else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
256 		update_runtime_stat(STAT_STALLED_CYCLES_FRONT,
257 				    aggr_idx, count, &rsd);
258 	else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
259 		update_runtime_stat(STAT_STALLED_CYCLES_BACK,
260 				    aggr_idx, count, &rsd);
261 	else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
262 		update_runtime_stat(STAT_BRANCHES, aggr_idx, count, &rsd);
263 	else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
264 		update_runtime_stat(STAT_CACHE_REFS, aggr_idx, count, &rsd);
265 	else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
266 		update_runtime_stat(STAT_L1_DCACHE, aggr_idx, count, &rsd);
267 	else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
268 		update_runtime_stat(STAT_L1_ICACHE, aggr_idx, count, &rsd);
269 	else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
270 		update_runtime_stat(STAT_LL_CACHE, aggr_idx, count, &rsd);
271 	else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
272 		update_runtime_stat(STAT_DTLB_CACHE, aggr_idx, count, &rsd);
273 	else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
274 		update_runtime_stat(STAT_ITLB_CACHE, aggr_idx, count, &rsd);
275 }
276 
277 static enum stat_type evsel__stat_type(const struct evsel *evsel)
278 {
279 	/* Fake perf_hw_cache_op_id values for use with evsel__match. */
280 	u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D |
281 		((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
282 		((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
283 	u64 PERF_COUNT_hw_cache_l1i_miss = PERF_COUNT_HW_CACHE_L1I |
284 		((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
285 		((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
286 	u64 PERF_COUNT_hw_cache_ll_miss = PERF_COUNT_HW_CACHE_LL |
287 		((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
288 		((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
289 	u64 PERF_COUNT_hw_cache_dtlb_miss = PERF_COUNT_HW_CACHE_DTLB |
290 		((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
291 		((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
292 	u64 PERF_COUNT_hw_cache_itlb_miss = PERF_COUNT_HW_CACHE_ITLB |
293 		((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
294 		((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
295 
296 	if (evsel__is_clock(evsel))
297 		return STAT_NSECS;
298 	else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES))
299 		return STAT_CYCLES;
300 	else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS))
301 		return STAT_INSTRUCTIONS;
302 	else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
303 		return STAT_STALLED_CYCLES_FRONT;
304 	else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND))
305 		return STAT_STALLED_CYCLES_BACK;
306 	else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS))
307 		return STAT_BRANCHES;
308 	else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES))
309 		return STAT_BRANCH_MISS;
310 	else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES))
311 		return STAT_CACHE_REFS;
312 	else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES))
313 		return STAT_CACHE_MISSES;
314 	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D))
315 		return STAT_L1_DCACHE;
316 	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I))
317 		return STAT_L1_ICACHE;
318 	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL))
319 		return STAT_LL_CACHE;
320 	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB))
321 		return STAT_DTLB_CACHE;
322 	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB))
323 		return STAT_ITLB_CACHE;
324 	else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss))
325 		return STAT_L1D_MISS;
326 	else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss))
327 		return STAT_L1I_MISS;
328 	else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss))
329 		return STAT_LL_MISS;
330 	else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss))
331 		return STAT_DTLB_MISS;
332 	else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss))
333 		return STAT_ITLB_MISS;
334 	return STAT_NONE;
335 }
336 
337 static const char *get_ratio_color(const double ratios[3], double val)
338 {
339 	const char *color = PERF_COLOR_NORMAL;
340 
341 	if (val > ratios[0])
342 		color = PERF_COLOR_RED;
343 	else if (val > ratios[1])
344 		color = PERF_COLOR_MAGENTA;
345 	else if (val > ratios[2])
346 		color = PERF_COLOR_YELLOW;
347 
348 	return color;
349 }
350 
351 static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type)
352 {
353 	const struct evsel *cur;
354 	int evsel_ctx = evsel_context(evsel);
355 
356 	evlist__for_each_entry(evsel->evlist, cur) {
357 		struct perf_stat_aggr *aggr;
358 
359 		/* Ignore the evsel that is being searched from. */
360 		if (evsel == cur)
361 			continue;
362 
363 		/* Ignore evsels that are part of different groups. */
364 		if (evsel->core.leader->nr_members &&
365 		    evsel->core.leader != cur->core.leader)
366 			continue;
367 		/* Ignore evsels with mismatched modifiers. */
368 		if (evsel_ctx != evsel_context(cur))
369 			continue;
370 		/* Ignore if not the cgroup we're looking for. */
371 		if (evsel->cgrp != cur->cgrp)
372 			continue;
373 		/* Ignore if not the stat we're looking for. */
374 		if (type != evsel__stat_type(cur))
375 			continue;
376 
377 		aggr = &cur->stats->aggr[aggr_idx];
378 		if (type == STAT_NSECS)
379 			return aggr->counts.val;
380 		return aggr->counts.val * cur->scale;
381 	}
382 	return 0.0;
383 }
384 
385 static void print_ratio(struct perf_stat_config *config,
386 			const struct evsel *evsel, int aggr_idx,
387 			double numerator, struct perf_stat_output_ctx *out,
388 			enum stat_type denominator_type,
389 			const double color_ratios[3], const char *unit)
390 {
391 	double denominator = find_stat(evsel, aggr_idx, denominator_type);
392 
393 	if (numerator && denominator) {
394 		double ratio = numerator / denominator * 100.0;
395 		const char *color = get_ratio_color(color_ratios, ratio);
396 
397 		out->print_metric(config, out->ctx, color, "%7.2f%%", unit, ratio);
398 	} else
399 		out->print_metric(config, out->ctx, NULL, NULL, unit, 0);
400 }
401 
402 static void print_stalled_cycles_front(struct perf_stat_config *config,
403 				const struct evsel *evsel,
404 				int aggr_idx, double stalled,
405 				struct perf_stat_output_ctx *out)
406 {
407 	static const double color_ratios[3] = {50.0, 30.0, 10.0};
408 
409 	print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios,
410 		    "frontend cycles idle");
411 }
412 
413 static void print_stalled_cycles_back(struct perf_stat_config *config,
414 				const struct evsel *evsel,
415 				int aggr_idx, double stalled,
416 				struct perf_stat_output_ctx *out)
417 {
418 	static const double color_ratios[3] = {75.0, 50.0, 20.0};
419 
420 	print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios,
421 		    "backend cycles idle");
422 }
423 
424 static void print_branch_miss(struct perf_stat_config *config,
425 			const struct evsel *evsel,
426 			int aggr_idx, double misses,
427 			struct perf_stat_output_ctx *out)
428 {
429 	static const double color_ratios[3] = {20.0, 10.0, 5.0};
430 
431 	print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, color_ratios,
432 		    "of all branches");
433 }
434 
435 static void print_l1d_miss(struct perf_stat_config *config,
436 			const struct evsel *evsel,
437 			int aggr_idx, double misses,
438 			struct perf_stat_output_ctx *out)
439 {
440 	static const double color_ratios[3] = {20.0, 10.0, 5.0};
441 
442 	print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, color_ratios,
443 		    "of all L1-dcache accesses");
444 }
445 
446 static void print_l1i_miss(struct perf_stat_config *config,
447 			const struct evsel *evsel,
448 			int aggr_idx, double misses,
449 			struct perf_stat_output_ctx *out)
450 {
451 	static const double color_ratios[3] = {20.0, 10.0, 5.0};
452 
453 	print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, color_ratios,
454 		    "of all L1-icache accesses");
455 }
456 
457 static void print_ll_miss(struct perf_stat_config *config,
458 			const struct evsel *evsel,
459 			int aggr_idx, double misses,
460 			struct perf_stat_output_ctx *out)
461 {
462 	static const double color_ratios[3] = {20.0, 10.0, 5.0};
463 
464 	print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios,
465 		    "of all L1-icache accesses");
466 }
467 
468 static void print_dtlb_miss(struct perf_stat_config *config,
469 			const struct evsel *evsel,
470 			int aggr_idx, double misses,
471 			struct perf_stat_output_ctx *out)
472 {
473 	static const double color_ratios[3] = {20.0, 10.0, 5.0};
474 
475 	print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, color_ratios,
476 		    "of all dTLB cache accesses");
477 }
478 
479 static void print_itlb_miss(struct perf_stat_config *config,
480 			const struct evsel *evsel,
481 			int aggr_idx, double misses,
482 			struct perf_stat_output_ctx *out)
483 {
484 	static const double color_ratios[3] = {20.0, 10.0, 5.0};
485 
486 	print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, color_ratios,
487 		    "of all iTLB cache accesses");
488 }
489 
490 static void print_cache_miss(struct perf_stat_config *config,
491 			const struct evsel *evsel,
492 			int aggr_idx, double misses,
493 			struct perf_stat_output_ctx *out)
494 {
495 	static const double color_ratios[3] = {20.0, 10.0, 5.0};
496 
497 	print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, color_ratios,
498 		    "of all cache refs");
499 }
500 
501 static void print_instructions(struct perf_stat_config *config,
502 			const struct evsel *evsel,
503 			int aggr_idx, double instructions,
504 			struct perf_stat_output_ctx *out)
505 {
506 	print_metric_t print_metric = out->print_metric;
507 	void *ctxp = out->ctx;
508 	double cycles = find_stat(evsel, aggr_idx, STAT_CYCLES);
509 	double max_stalled = max(find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_FRONT),
510 				find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_BACK));
511 
512 	if (cycles) {
513 		print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle",
514 			instructions / cycles);
515 	} else
516 		print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
517 
518 	if (max_stalled && instructions) {
519 		out->new_line(config, ctxp);
520 		print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn",
521 			max_stalled / instructions);
522 	}
523 }
524 
525 static void print_cycles(struct perf_stat_config *config,
526 			const struct evsel *evsel,
527 			int aggr_idx, double cycles,
528 			struct perf_stat_output_ctx *out)
529 {
530 	double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS);
531 
532 	if (cycles && nsecs) {
533 		double ratio = cycles / nsecs;
534 
535 		out->print_metric(config, out->ctx, NULL, "%8.3f", "GHz", ratio);
536 	} else
537 		out->print_metric(config, out->ctx, NULL, NULL, "GHz", 0);
538 }
539 
540 static void print_nsecs(struct perf_stat_config *config,
541 			const struct evsel *evsel,
542 			int aggr_idx __maybe_unused, double nsecs,
543 			struct perf_stat_output_ctx *out)
544 {
545 	print_metric_t print_metric = out->print_metric;
546 	void *ctxp = out->ctx;
547 	double wall_time = avg_stats(&walltime_nsecs_stats);
548 
549 	if (wall_time) {
550 		print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
551 			nsecs / (wall_time * evsel->scale));
552 	} else
553 		print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
554 }
555 
556 static int prepare_metric(struct evsel **metric_events,
557 			  struct metric_ref *metric_refs,
558 			  struct expr_parse_ctx *pctx,
559 			  int aggr_idx)
560 {
561 	int i;
562 
563 	for (i = 0; metric_events[i]; i++) {
564 		char *n;
565 		double val;
566 		int source_count = 0;
567 
568 		if (evsel__is_tool(metric_events[i])) {
569 			struct stats *stats;
570 			double scale;
571 
572 			switch (metric_events[i]->tool_event) {
573 			case PERF_TOOL_DURATION_TIME:
574 				stats = &walltime_nsecs_stats;
575 				scale = 1e-9;
576 				break;
577 			case PERF_TOOL_USER_TIME:
578 				stats = &ru_stats.ru_utime_usec_stat;
579 				scale = 1e-6;
580 				break;
581 			case PERF_TOOL_SYSTEM_TIME:
582 				stats = &ru_stats.ru_stime_usec_stat;
583 				scale = 1e-6;
584 				break;
585 			case PERF_TOOL_NONE:
586 				pr_err("Invalid tool event 'none'");
587 				abort();
588 			case PERF_TOOL_MAX:
589 				pr_err("Invalid tool event 'max'");
590 				abort();
591 			default:
592 				pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
593 				abort();
594 			}
595 			val = avg_stats(stats) * scale;
596 			source_count = 1;
597 		} else {
598 			struct perf_stat_evsel *ps = metric_events[i]->stats;
599 			struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
600 
601 			if (!aggr)
602 				break;
603 
604 			/*
605 			 * If an event was scaled during stat gathering, reverse
606 			 * the scale before computing the metric.
607 			 */
608 			val = aggr->counts.val * (1.0 / metric_events[i]->scale);
609 			source_count = evsel__source_count(metric_events[i]);
610 		}
611 		n = strdup(evsel__metric_id(metric_events[i]));
612 		if (!n)
613 			return -ENOMEM;
614 
615 		expr__add_id_val_source_count(pctx, n, val, source_count);
616 	}
617 
618 	for (int j = 0; metric_refs && metric_refs[j].metric_name; j++) {
619 		int ret = expr__add_ref(pctx, &metric_refs[j]);
620 
621 		if (ret)
622 			return ret;
623 	}
624 
625 	return i;
626 }
627 
628 static void generic_metric(struct perf_stat_config *config,
629 			   const char *metric_expr,
630 			   const char *metric_threshold,
631 			   struct evsel **metric_events,
632 			   struct metric_ref *metric_refs,
633 			   char *name,
634 			   const char *metric_name,
635 			   const char *metric_unit,
636 			   int runtime,
637 			   int aggr_idx,
638 			   struct perf_stat_output_ctx *out)
639 {
640 	print_metric_t print_metric = out->print_metric;
641 	struct expr_parse_ctx *pctx;
642 	double ratio, scale, threshold;
643 	int i;
644 	void *ctxp = out->ctx;
645 	const char *color = NULL;
646 
647 	pctx = expr__ctx_new();
648 	if (!pctx)
649 		return;
650 
651 	if (config->user_requested_cpu_list)
652 		pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
653 	pctx->sctx.runtime = runtime;
654 	pctx->sctx.system_wide = config->system_wide;
655 	i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx);
656 	if (i < 0) {
657 		expr__ctx_free(pctx);
658 		return;
659 	}
660 	if (!metric_events[i]) {
661 		if (expr__parse(&ratio, pctx, metric_expr) == 0) {
662 			char *unit;
663 			char metric_bf[64];
664 
665 			if (metric_threshold &&
666 			    expr__parse(&threshold, pctx, metric_threshold) == 0 &&
667 			    !isnan(threshold)) {
668 				color = fpclassify(threshold) == FP_ZERO
669 					? PERF_COLOR_GREEN : PERF_COLOR_RED;
670 			}
671 
672 			if (metric_unit && metric_name) {
673 				if (perf_pmu__convert_scale(metric_unit,
674 					&unit, &scale) >= 0) {
675 					ratio *= scale;
676 				}
677 				if (strstr(metric_expr, "?"))
678 					scnprintf(metric_bf, sizeof(metric_bf),
679 					  "%s  %s_%d", unit, metric_name, runtime);
680 				else
681 					scnprintf(metric_bf, sizeof(metric_bf),
682 					  "%s  %s", unit, metric_name);
683 
684 				print_metric(config, ctxp, color, "%8.1f",
685 					     metric_bf, ratio);
686 			} else {
687 				print_metric(config, ctxp, color, "%8.2f",
688 					metric_name ?
689 					metric_name :
690 					out->force_header ?  name : "",
691 					ratio);
692 			}
693 		} else {
694 			print_metric(config, ctxp, color, /*unit=*/NULL,
695 				     out->force_header ?
696 				     (metric_name ? metric_name : name) : "", 0);
697 		}
698 	} else {
699 		print_metric(config, ctxp, color, /*unit=*/NULL,
700 			     out->force_header ?
701 			     (metric_name ? metric_name : name) : "", 0);
702 	}
703 
704 	expr__ctx_free(pctx);
705 }
706 
707 double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
708 {
709 	struct expr_parse_ctx *pctx;
710 	double ratio = 0.0;
711 
712 	pctx = expr__ctx_new();
713 	if (!pctx)
714 		return NAN;
715 
716 	if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0)
717 		goto out;
718 
719 	if (expr__parse(&ratio, pctx, mexp->metric_expr))
720 		ratio = 0.0;
721 
722 out:
723 	expr__ctx_free(pctx);
724 	return ratio;
725 }
726 
727 void perf_stat__print_shadow_stats(struct perf_stat_config *config,
728 				   struct evsel *evsel,
729 				   double avg, int aggr_idx,
730 				   struct perf_stat_output_ctx *out,
731 				   struct rblist *metric_events)
732 {
733 	typedef void (*stat_print_function_t)(struct perf_stat_config *config,
734 					const struct evsel *evsel,
735 					int aggr_idx, double misses,
736 					struct perf_stat_output_ctx *out);
737 	static const stat_print_function_t stat_print_function[STAT_MAX] = {
738 		[STAT_INSTRUCTIONS] = print_instructions,
739 		[STAT_BRANCH_MISS] = print_branch_miss,
740 		[STAT_L1D_MISS] = print_l1d_miss,
741 		[STAT_L1I_MISS] = print_l1i_miss,
742 		[STAT_DTLB_MISS] = print_dtlb_miss,
743 		[STAT_ITLB_MISS] = print_itlb_miss,
744 		[STAT_LL_MISS] = print_ll_miss,
745 		[STAT_CACHE_MISSES] = print_cache_miss,
746 		[STAT_STALLED_CYCLES_FRONT] = print_stalled_cycles_front,
747 		[STAT_STALLED_CYCLES_BACK] = print_stalled_cycles_back,
748 		[STAT_CYCLES] = print_cycles,
749 		[STAT_NSECS] = print_nsecs,
750 	};
751 	print_metric_t print_metric = out->print_metric;
752 	void *ctxp = out->ctx;
753 	struct metric_event *me;
754 	int num = 1;
755 
756 	if (config->iostat_run) {
757 		iostat_print_metric(config, evsel, out);
758 	} else {
759 		stat_print_function_t fn = stat_print_function[evsel__stat_type(evsel)];
760 
761 		if (fn)
762 			fn(config, evsel, aggr_idx, avg, out);
763 		else {
764 			double nsecs =	find_stat(evsel, aggr_idx, STAT_NSECS);
765 
766 			if (nsecs) {
767 				char unit = ' ';
768 				char unit_buf[10] = "/sec";
769 				double ratio = convert_unit_double(1000000000.0 * avg / nsecs,
770 								   &unit);
771 
772 				if (unit != ' ')
773 					snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
774 				print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
775 			} else
776 				num = 0;
777 		}
778 	}
779 
780 	if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
781 		struct metric_expr *mexp;
782 
783 		list_for_each_entry (mexp, &me->head, nd) {
784 			if (num++ > 0)
785 				out->new_line(config, ctxp);
786 			generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
787 				       mexp->metric_events, mexp->metric_refs, evsel->name,
788 				       mexp->metric_name, mexp->metric_unit, mexp->runtime,
789 				       aggr_idx, out);
790 		}
791 	}
792 	if (num == 0)
793 		print_metric(config, ctxp, NULL, NULL, NULL, 0);
794 }
795