xref: /openbmc/linux/tools/perf/util/stat-shadow.c (revision b24413180f5600bcb3bb70fbed5cf186b60864bd)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <stdio.h>
3 #include "evsel.h"
4 #include "stat.h"
5 #include "color.h"
6 #include "pmu.h"
7 #include "rblist.h"
8 #include "evlist.h"
9 #include "expr.h"
10 
11 enum {
12 	CTX_BIT_USER	= 1 << 0,
13 	CTX_BIT_KERNEL	= 1 << 1,
14 	CTX_BIT_HV	= 1 << 2,
15 	CTX_BIT_HOST	= 1 << 3,
16 	CTX_BIT_IDLE	= 1 << 4,
17 	CTX_BIT_MAX	= 1 << 5,
18 };
19 
20 #define NUM_CTX CTX_BIT_MAX
21 
22 /*
23  * AGGR_GLOBAL: Use CPU 0
24  * AGGR_SOCKET: Use first CPU of socket
25  * AGGR_CORE: Use first CPU of core
26  * AGGR_NONE: Use matching CPU
27  * AGGR_THREAD: Not supported?
28  */
29 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
30 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
31 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
32 static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
33 static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
34 static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
35 static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
36 static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
37 static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
38 static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
39 static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
40 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
41 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
42 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
43 static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS];
44 static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
45 static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
46 static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
47 static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
48 static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
49 static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
50 static struct rblist runtime_saved_values;
51 static bool have_frontend_stalled;
52 
53 struct stats walltime_nsecs_stats;
54 
55 struct saved_value {
56 	struct rb_node rb_node;
57 	struct perf_evsel *evsel;
58 	int cpu;
59 	int ctx;
60 	struct stats stats;
61 };
62 
63 static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
64 {
65 	struct saved_value *a = container_of(rb_node,
66 					     struct saved_value,
67 					     rb_node);
68 	const struct saved_value *b = entry;
69 
70 	if (a->ctx != b->ctx)
71 		return a->ctx - b->ctx;
72 	if (a->cpu != b->cpu)
73 		return a->cpu - b->cpu;
74 	if (a->evsel == b->evsel)
75 		return 0;
76 	if ((char *)a->evsel < (char *)b->evsel)
77 		return -1;
78 	return +1;
79 }
80 
81 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
82 				     const void *entry)
83 {
84 	struct saved_value *nd = malloc(sizeof(struct saved_value));
85 
86 	if (!nd)
87 		return NULL;
88 	memcpy(nd, entry, sizeof(struct saved_value));
89 	return &nd->rb_node;
90 }
91 
92 static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
93 					      int cpu, int ctx,
94 					      bool create)
95 {
96 	struct rb_node *nd;
97 	struct saved_value dm = {
98 		.cpu = cpu,
99 		.ctx = ctx,
100 		.evsel = evsel,
101 	};
102 	nd = rblist__find(&runtime_saved_values, &dm);
103 	if (nd)
104 		return container_of(nd, struct saved_value, rb_node);
105 	if (create) {
106 		rblist__add_node(&runtime_saved_values, &dm);
107 		nd = rblist__find(&runtime_saved_values, &dm);
108 		if (nd)
109 			return container_of(nd, struct saved_value, rb_node);
110 	}
111 	return NULL;
112 }
113 
114 void perf_stat__init_shadow_stats(void)
115 {
116 	have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
117 	rblist__init(&runtime_saved_values);
118 	runtime_saved_values.node_cmp = saved_value_cmp;
119 	runtime_saved_values.node_new = saved_value_new;
120 	/* No delete for now */
121 }
122 
123 static int evsel_context(struct perf_evsel *evsel)
124 {
125 	int ctx = 0;
126 
127 	if (evsel->attr.exclude_kernel)
128 		ctx |= CTX_BIT_KERNEL;
129 	if (evsel->attr.exclude_user)
130 		ctx |= CTX_BIT_USER;
131 	if (evsel->attr.exclude_hv)
132 		ctx |= CTX_BIT_HV;
133 	if (evsel->attr.exclude_host)
134 		ctx |= CTX_BIT_HOST;
135 	if (evsel->attr.exclude_idle)
136 		ctx |= CTX_BIT_IDLE;
137 
138 	return ctx;
139 }
140 
141 void perf_stat__reset_shadow_stats(void)
142 {
143 	struct rb_node *pos, *next;
144 
145 	memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
146 	memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
147 	memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
148 	memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
149 	memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
150 	memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
151 	memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
152 	memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
153 	memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
154 	memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
155 	memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
156 	memset(runtime_cycles_in_tx_stats, 0,
157 			sizeof(runtime_cycles_in_tx_stats));
158 	memset(runtime_transaction_stats, 0,
159 		sizeof(runtime_transaction_stats));
160 	memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
161 	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
162 	memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots));
163 	memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired));
164 	memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
165 	memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
166 	memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
167 	memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
168 	memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
169 
170 	next = rb_first(&runtime_saved_values.entries);
171 	while (next) {
172 		pos = next;
173 		next = rb_next(pos);
174 		memset(&container_of(pos, struct saved_value, rb_node)->stats,
175 		       0,
176 		       sizeof(struct stats));
177 	}
178 }
179 
180 /*
181  * Update various tracking values we maintain to print
182  * more semantic information such as miss/hit ratios,
183  * instruction rates, etc:
184  */
185 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
186 				    int cpu)
187 {
188 	int ctx = evsel_context(counter);
189 
190 	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) ||
191 	    perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK))
192 		update_stats(&runtime_nsecs_stats[cpu], count[0]);
193 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
194 		update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
195 	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
196 		update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]);
197 	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
198 		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
199 	else if (perf_stat_evsel__is(counter, ELISION_START))
200 		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
201 	else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
202 		update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]);
203 	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
204 		update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]);
205 	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
206 		update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]);
207 	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
208 		update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]);
209 	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
210 		update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]);
211 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
212 		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
213 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
214 		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
215 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
216 		update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
217 	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
218 		update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
219 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
220 		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
221 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
222 		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
223 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
224 		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
225 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
226 		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
227 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
228 		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
229 	else if (perf_stat_evsel__is(counter, SMI_NUM))
230 		update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
231 	else if (perf_stat_evsel__is(counter, APERF))
232 		update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
233 
234 	if (counter->collect_stat) {
235 		struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
236 							   true);
237 		update_stats(&v->stats, count[0]);
238 	}
239 }
240 
241 /* used for get_ratio_color() */
242 enum grc_type {
243 	GRC_STALLED_CYCLES_FE,
244 	GRC_STALLED_CYCLES_BE,
245 	GRC_CACHE_MISSES,
246 	GRC_MAX_NR
247 };
248 
249 static const char *get_ratio_color(enum grc_type type, double ratio)
250 {
251 	static const double grc_table[GRC_MAX_NR][3] = {
252 		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
253 		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
254 		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
255 	};
256 	const char *color = PERF_COLOR_NORMAL;
257 
258 	if (ratio > grc_table[type][0])
259 		color = PERF_COLOR_RED;
260 	else if (ratio > grc_table[type][1])
261 		color = PERF_COLOR_MAGENTA;
262 	else if (ratio > grc_table[type][2])
263 		color = PERF_COLOR_YELLOW;
264 
265 	return color;
266 }
267 
268 static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list,
269 						const char *name)
270 {
271 	struct perf_evsel *c2;
272 
273 	evlist__for_each_entry (evsel_list, c2) {
274 		if (!strcasecmp(c2->name, name))
275 			return c2;
276 	}
277 	return NULL;
278 }
279 
280 /* Mark MetricExpr target events and link events using them to them. */
281 void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
282 {
283 	struct perf_evsel *counter, *leader, **metric_events, *oc;
284 	bool found;
285 	const char **metric_names;
286 	int i;
287 	int num_metric_names;
288 
289 	evlist__for_each_entry(evsel_list, counter) {
290 		bool invalid = false;
291 
292 		leader = counter->leader;
293 		if (!counter->metric_expr)
294 			continue;
295 		metric_events = counter->metric_events;
296 		if (!metric_events) {
297 			if (expr__find_other(counter->metric_expr, counter->name,
298 						&metric_names, &num_metric_names) < 0)
299 				continue;
300 
301 			metric_events = calloc(sizeof(struct perf_evsel *),
302 					       num_metric_names + 1);
303 			if (!metric_events)
304 				return;
305 			counter->metric_events = metric_events;
306 		}
307 
308 		for (i = 0; i < num_metric_names; i++) {
309 			found = false;
310 			if (leader) {
311 				/* Search in group */
312 				for_each_group_member (oc, leader) {
313 					if (!strcasecmp(oc->name, metric_names[i])) {
314 						found = true;
315 						break;
316 					}
317 				}
318 			}
319 			if (!found) {
320 				/* Search ignoring groups */
321 				oc = perf_stat__find_event(evsel_list, metric_names[i]);
322 			}
323 			if (!oc) {
324 				/* Deduping one is good enough to handle duplicated PMUs. */
325 				static char *printed;
326 
327 				/*
328 				 * Adding events automatically would be difficult, because
329 				 * it would risk creating groups that are not schedulable.
330 				 * perf stat doesn't understand all the scheduling constraints
331 				 * of events. So we ask the user instead to add the missing
332 				 * events.
333 				 */
334 				if (!printed || strcasecmp(printed, metric_names[i])) {
335 					fprintf(stderr,
336 						"Add %s event to groups to get metric expression for %s\n",
337 						metric_names[i],
338 						counter->name);
339 					printed = strdup(metric_names[i]);
340 				}
341 				invalid = true;
342 				continue;
343 			}
344 			metric_events[i] = oc;
345 			oc->collect_stat = true;
346 		}
347 		metric_events[i] = NULL;
348 		free(metric_names);
349 		if (invalid) {
350 			free(metric_events);
351 			counter->metric_events = NULL;
352 			counter->metric_expr = NULL;
353 		}
354 	}
355 }
356 
357 static void print_stalled_cycles_frontend(int cpu,
358 					  struct perf_evsel *evsel, double avg,
359 					  struct perf_stat_output_ctx *out)
360 {
361 	double total, ratio = 0.0;
362 	const char *color;
363 	int ctx = evsel_context(evsel);
364 
365 	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
366 
367 	if (total)
368 		ratio = avg / total * 100.0;
369 
370 	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
371 
372 	if (ratio)
373 		out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
374 				  ratio);
375 	else
376 		out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
377 }
378 
379 static void print_stalled_cycles_backend(int cpu,
380 					 struct perf_evsel *evsel, double avg,
381 					 struct perf_stat_output_ctx *out)
382 {
383 	double total, ratio = 0.0;
384 	const char *color;
385 	int ctx = evsel_context(evsel);
386 
387 	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
388 
389 	if (total)
390 		ratio = avg / total * 100.0;
391 
392 	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
393 
394 	out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
395 }
396 
397 static void print_branch_misses(int cpu,
398 				struct perf_evsel *evsel,
399 				double avg,
400 				struct perf_stat_output_ctx *out)
401 {
402 	double total, ratio = 0.0;
403 	const char *color;
404 	int ctx = evsel_context(evsel);
405 
406 	total = avg_stats(&runtime_branches_stats[ctx][cpu]);
407 
408 	if (total)
409 		ratio = avg / total * 100.0;
410 
411 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
412 
413 	out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
414 }
415 
416 static void print_l1_dcache_misses(int cpu,
417 				   struct perf_evsel *evsel,
418 				   double avg,
419 				   struct perf_stat_output_ctx *out)
420 {
421 	double total, ratio = 0.0;
422 	const char *color;
423 	int ctx = evsel_context(evsel);
424 
425 	total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
426 
427 	if (total)
428 		ratio = avg / total * 100.0;
429 
430 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
431 
432 	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
433 }
434 
435 static void print_l1_icache_misses(int cpu,
436 				   struct perf_evsel *evsel,
437 				   double avg,
438 				   struct perf_stat_output_ctx *out)
439 {
440 	double total, ratio = 0.0;
441 	const char *color;
442 	int ctx = evsel_context(evsel);
443 
444 	total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
445 
446 	if (total)
447 		ratio = avg / total * 100.0;
448 
449 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
450 	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
451 }
452 
453 static void print_dtlb_cache_misses(int cpu,
454 				    struct perf_evsel *evsel,
455 				    double avg,
456 				    struct perf_stat_output_ctx *out)
457 {
458 	double total, ratio = 0.0;
459 	const char *color;
460 	int ctx = evsel_context(evsel);
461 
462 	total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
463 
464 	if (total)
465 		ratio = avg / total * 100.0;
466 
467 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
468 	out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
469 }
470 
471 static void print_itlb_cache_misses(int cpu,
472 				    struct perf_evsel *evsel,
473 				    double avg,
474 				    struct perf_stat_output_ctx *out)
475 {
476 	double total, ratio = 0.0;
477 	const char *color;
478 	int ctx = evsel_context(evsel);
479 
480 	total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
481 
482 	if (total)
483 		ratio = avg / total * 100.0;
484 
485 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
486 	out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
487 }
488 
489 static void print_ll_cache_misses(int cpu,
490 				  struct perf_evsel *evsel,
491 				  double avg,
492 				  struct perf_stat_output_ctx *out)
493 {
494 	double total, ratio = 0.0;
495 	const char *color;
496 	int ctx = evsel_context(evsel);
497 
498 	total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
499 
500 	if (total)
501 		ratio = avg / total * 100.0;
502 
503 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
504 	out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
505 }
506 
507 /*
508  * High level "TopDown" CPU core pipe line bottleneck break down.
509  *
510  * Basic concept following
511  * Yasin, A Top Down Method for Performance analysis and Counter architecture
512  * ISPASS14
513  *
514  * The CPU pipeline is divided into 4 areas that can be bottlenecks:
515  *
516  * Frontend -> Backend -> Retiring
517  * BadSpeculation in addition means out of order execution that is thrown away
518  * (for example branch mispredictions)
519  * Frontend is instruction decoding.
520  * Backend is execution, like computation and accessing data in memory
521  * Retiring is good execution that is not directly bottlenecked
522  *
523  * The formulas are computed in slots.
524  * A slot is an entry in the pipeline each for the pipeline width
525  * (for example a 4-wide pipeline has 4 slots for each cycle)
526  *
527  * Formulas:
528  * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
529  *			TotalSlots
530  * Retiring = SlotsRetired / TotalSlots
531  * FrontendBound = FetchBubbles / TotalSlots
532  * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
533  *
534  * The kernel provides the mapping to the low level CPU events and any scaling
535  * needed for the CPU pipeline width, for example:
536  *
537  * TotalSlots = Cycles * 4
538  *
539  * The scaling factor is communicated in the sysfs unit.
540  *
541  * In some cases the CPU may not be able to measure all the formulas due to
542  * missing events. In this case multiple formulas are combined, as possible.
543  *
544  * Full TopDown supports more levels to sub-divide each area: for example
545  * BackendBound into computing bound and memory bound. For now we only
546  * support Level 1 TopDown.
547  */
548 
549 static double sanitize_val(double x)
550 {
551 	if (x < 0 && x >= -0.02)
552 		return 0.0;
553 	return x;
554 }
555 
556 static double td_total_slots(int ctx, int cpu)
557 {
558 	return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
559 }
560 
561 static double td_bad_spec(int ctx, int cpu)
562 {
563 	double bad_spec = 0;
564 	double total_slots;
565 	double total;
566 
567 	total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
568 		avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
569 		avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
570 	total_slots = td_total_slots(ctx, cpu);
571 	if (total_slots)
572 		bad_spec = total / total_slots;
573 	return sanitize_val(bad_spec);
574 }
575 
576 static double td_retiring(int ctx, int cpu)
577 {
578 	double retiring = 0;
579 	double total_slots = td_total_slots(ctx, cpu);
580 	double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
581 
582 	if (total_slots)
583 		retiring = ret_slots / total_slots;
584 	return retiring;
585 }
586 
587 static double td_fe_bound(int ctx, int cpu)
588 {
589 	double fe_bound = 0;
590 	double total_slots = td_total_slots(ctx, cpu);
591 	double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
592 
593 	if (total_slots)
594 		fe_bound = fetch_bub / total_slots;
595 	return fe_bound;
596 }
597 
598 static double td_be_bound(int ctx, int cpu)
599 {
600 	double sum = (td_fe_bound(ctx, cpu) +
601 		      td_bad_spec(ctx, cpu) +
602 		      td_retiring(ctx, cpu));
603 	if (sum == 0)
604 		return 0;
605 	return sanitize_val(1.0 - sum);
606 }
607 
608 static void print_smi_cost(int cpu, struct perf_evsel *evsel,
609 			   struct perf_stat_output_ctx *out)
610 {
611 	double smi_num, aperf, cycles, cost = 0.0;
612 	int ctx = evsel_context(evsel);
613 	const char *color = NULL;
614 
615 	smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
616 	aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
617 	cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
618 
619 	if ((cycles == 0) || (aperf == 0))
620 		return;
621 
622 	if (smi_num)
623 		cost = (aperf - cycles) / aperf * 100.00;
624 
625 	if (cost > 10)
626 		color = PERF_COLOR_RED;
627 	out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
628 	out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
629 }
630 
631 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
632 				   double avg, int cpu,
633 				   struct perf_stat_output_ctx *out)
634 {
635 	void *ctxp = out->ctx;
636 	print_metric_t print_metric = out->print_metric;
637 	double total, ratio = 0.0, total2;
638 	const char *color = NULL;
639 	int ctx = evsel_context(evsel);
640 
641 	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
642 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
643 		if (total) {
644 			ratio = avg / total;
645 			print_metric(ctxp, NULL, "%7.2f ",
646 					"insn per cycle", ratio);
647 		} else {
648 			print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
649 		}
650 		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
651 		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
652 
653 		if (total && avg) {
654 			out->new_line(ctxp);
655 			ratio = total / avg;
656 			print_metric(ctxp, NULL, "%7.2f ",
657 					"stalled cycles per insn",
658 					ratio);
659 		} else if (have_frontend_stalled) {
660 			print_metric(ctxp, NULL, NULL,
661 				     "stalled cycles per insn", 0);
662 		}
663 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
664 		if (runtime_branches_stats[ctx][cpu].n != 0)
665 			print_branch_misses(cpu, evsel, avg, out);
666 		else
667 			print_metric(ctxp, NULL, NULL, "of all branches", 0);
668 	} else if (
669 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
670 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
671 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
672 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
673 		if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
674 			print_l1_dcache_misses(cpu, evsel, avg, out);
675 		else
676 			print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
677 	} else if (
678 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
679 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
680 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
681 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
682 		if (runtime_l1_icache_stats[ctx][cpu].n != 0)
683 			print_l1_icache_misses(cpu, evsel, avg, out);
684 		else
685 			print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
686 	} else if (
687 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
688 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
689 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
690 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
691 		if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
692 			print_dtlb_cache_misses(cpu, evsel, avg, out);
693 		else
694 			print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
695 	} else if (
696 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
697 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
698 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
699 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
700 		if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
701 			print_itlb_cache_misses(cpu, evsel, avg, out);
702 		else
703 			print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
704 	} else if (
705 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
706 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
707 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
708 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
709 		if (runtime_ll_cache_stats[ctx][cpu].n != 0)
710 			print_ll_cache_misses(cpu, evsel, avg, out);
711 		else
712 			print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
713 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
714 		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
715 
716 		if (total)
717 			ratio = avg * 100 / total;
718 
719 		if (runtime_cacherefs_stats[ctx][cpu].n != 0)
720 			print_metric(ctxp, NULL, "%8.3f %%",
721 				     "of all cache refs", ratio);
722 		else
723 			print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
724 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
725 		print_stalled_cycles_frontend(cpu, evsel, avg, out);
726 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
727 		print_stalled_cycles_backend(cpu, evsel, avg, out);
728 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
729 		total = avg_stats(&runtime_nsecs_stats[cpu]);
730 
731 		if (total) {
732 			ratio = avg / total;
733 			print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
734 		} else {
735 			print_metric(ctxp, NULL, NULL, "Ghz", 0);
736 		}
737 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
738 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
739 		if (total)
740 			print_metric(ctxp, NULL,
741 					"%7.2f%%", "transactional cycles",
742 					100.0 * (avg / total));
743 		else
744 			print_metric(ctxp, NULL, NULL, "transactional cycles",
745 				     0);
746 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
747 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
748 		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
749 		if (total2 < avg)
750 			total2 = avg;
751 		if (total)
752 			print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
753 				100.0 * ((total2-avg) / total));
754 		else
755 			print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
756 	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
757 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
758 
759 		if (avg)
760 			ratio = total / avg;
761 
762 		if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
763 			print_metric(ctxp, NULL, "%8.0f",
764 				     "cycles / transaction", ratio);
765 		else
766 			print_metric(ctxp, NULL, NULL, "cycles / transaction",
767 				     0);
768 	} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
769 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
770 
771 		if (avg)
772 			ratio = total / avg;
773 
774 		print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
775 	} else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) ||
776 		   perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) {
777 		if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
778 			print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
779 				     avg / ratio);
780 		else
781 			print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
782 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
783 		double fe_bound = td_fe_bound(ctx, cpu);
784 
785 		if (fe_bound > 0.2)
786 			color = PERF_COLOR_RED;
787 		print_metric(ctxp, color, "%8.1f%%", "frontend bound",
788 				fe_bound * 100.);
789 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
790 		double retiring = td_retiring(ctx, cpu);
791 
792 		if (retiring > 0.7)
793 			color = PERF_COLOR_GREEN;
794 		print_metric(ctxp, color, "%8.1f%%", "retiring",
795 				retiring * 100.);
796 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
797 		double bad_spec = td_bad_spec(ctx, cpu);
798 
799 		if (bad_spec > 0.1)
800 			color = PERF_COLOR_RED;
801 		print_metric(ctxp, color, "%8.1f%%", "bad speculation",
802 				bad_spec * 100.);
803 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
804 		double be_bound = td_be_bound(ctx, cpu);
805 		const char *name = "backend bound";
806 		static int have_recovery_bubbles = -1;
807 
808 		/* In case the CPU does not support topdown-recovery-bubbles */
809 		if (have_recovery_bubbles < 0)
810 			have_recovery_bubbles = pmu_have_event("cpu",
811 					"topdown-recovery-bubbles");
812 		if (!have_recovery_bubbles)
813 			name = "backend bound/bad spec";
814 
815 		if (be_bound > 0.2)
816 			color = PERF_COLOR_RED;
817 		if (td_total_slots(ctx, cpu) > 0)
818 			print_metric(ctxp, color, "%8.1f%%", name,
819 					be_bound * 100.);
820 		else
821 			print_metric(ctxp, NULL, NULL, name, 0);
822 	} else if (evsel->metric_expr) {
823 		struct parse_ctx pctx;
824 		int i;
825 
826 		expr__ctx_init(&pctx);
827 		expr__add_id(&pctx, evsel->name, avg);
828 		for (i = 0; evsel->metric_events[i]; i++) {
829 			struct saved_value *v;
830 
831 			v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false);
832 			if (!v)
833 				break;
834 			expr__add_id(&pctx, evsel->metric_events[i]->name,
835 					     avg_stats(&v->stats));
836 		}
837 		if (!evsel->metric_events[i]) {
838 			const char *p = evsel->metric_expr;
839 
840 			if (expr__parse(&ratio, &pctx, &p) == 0)
841 				print_metric(ctxp, NULL, "%8.1f",
842 					evsel->metric_name ?
843 					evsel->metric_name :
844 					out->force_header ?  evsel->name : "",
845 					ratio);
846 			else
847 				print_metric(ctxp, NULL, NULL, "", 0);
848 		} else
849 			print_metric(ctxp, NULL, NULL, "", 0);
850 	} else if (runtime_nsecs_stats[cpu].n != 0) {
851 		char unit = 'M';
852 		char unit_buf[10];
853 
854 		total = avg_stats(&runtime_nsecs_stats[cpu]);
855 
856 		if (total)
857 			ratio = 1000.0 * avg / total;
858 		if (ratio < 0.001) {
859 			ratio *= 1000;
860 			unit = 'K';
861 		}
862 		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
863 		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
864 	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
865 		print_smi_cost(cpu, evsel, out);
866 	} else {
867 		print_metric(ctxp, NULL, NULL, NULL, 0);
868 	}
869 }
870