xref: /openbmc/linux/tools/perf/util/stat-shadow.c (revision a8fe58ce)
1 #include <stdio.h>
2 #include "evsel.h"
3 #include "stat.h"
4 #include "color.h"
5 
6 enum {
7 	CTX_BIT_USER	= 1 << 0,
8 	CTX_BIT_KERNEL	= 1 << 1,
9 	CTX_BIT_HV	= 1 << 2,
10 	CTX_BIT_HOST	= 1 << 3,
11 	CTX_BIT_IDLE	= 1 << 4,
12 	CTX_BIT_MAX	= 1 << 5,
13 };
14 
15 #define NUM_CTX CTX_BIT_MAX
16 
17 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
18 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
19 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
20 static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
21 static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
22 static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
23 static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
24 static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
25 static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
26 static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
27 static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
28 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
29 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
30 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
31 
32 struct stats walltime_nsecs_stats;
33 
34 static int evsel_context(struct perf_evsel *evsel)
35 {
36 	int ctx = 0;
37 
38 	if (evsel->attr.exclude_kernel)
39 		ctx |= CTX_BIT_KERNEL;
40 	if (evsel->attr.exclude_user)
41 		ctx |= CTX_BIT_USER;
42 	if (evsel->attr.exclude_hv)
43 		ctx |= CTX_BIT_HV;
44 	if (evsel->attr.exclude_host)
45 		ctx |= CTX_BIT_HOST;
46 	if (evsel->attr.exclude_idle)
47 		ctx |= CTX_BIT_IDLE;
48 
49 	return ctx;
50 }
51 
52 void perf_stat__reset_shadow_stats(void)
53 {
54 	memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
55 	memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
56 	memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
57 	memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
58 	memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
59 	memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
60 	memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
61 	memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
62 	memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
63 	memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
64 	memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
65 	memset(runtime_cycles_in_tx_stats, 0,
66 			sizeof(runtime_cycles_in_tx_stats));
67 	memset(runtime_transaction_stats, 0,
68 		sizeof(runtime_transaction_stats));
69 	memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
70 	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
71 }
72 
73 /*
74  * Update various tracking values we maintain to print
75  * more semantic information such as miss/hit ratios,
76  * instruction rates, etc:
77  */
78 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
79 				    int cpu)
80 {
81 	int ctx = evsel_context(counter);
82 
83 	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
84 		update_stats(&runtime_nsecs_stats[cpu], count[0]);
85 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
86 		update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
87 	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
88 		update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]);
89 	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
90 		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
91 	else if (perf_stat_evsel__is(counter, ELISION_START))
92 		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
93 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
94 		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
95 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
96 		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
97 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
98 		update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
99 	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
100 		update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
101 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
102 		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
103 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
104 		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
105 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
106 		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
107 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
108 		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
109 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
110 		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
111 }
112 
113 /* used for get_ratio_color() */
114 enum grc_type {
115 	GRC_STALLED_CYCLES_FE,
116 	GRC_STALLED_CYCLES_BE,
117 	GRC_CACHE_MISSES,
118 	GRC_MAX_NR
119 };
120 
121 static const char *get_ratio_color(enum grc_type type, double ratio)
122 {
123 	static const double grc_table[GRC_MAX_NR][3] = {
124 		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
125 		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
126 		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
127 	};
128 	const char *color = PERF_COLOR_NORMAL;
129 
130 	if (ratio > grc_table[type][0])
131 		color = PERF_COLOR_RED;
132 	else if (ratio > grc_table[type][1])
133 		color = PERF_COLOR_MAGENTA;
134 	else if (ratio > grc_table[type][2])
135 		color = PERF_COLOR_YELLOW;
136 
137 	return color;
138 }
139 
140 static void print_stalled_cycles_frontend(FILE *out, int cpu,
141 					  struct perf_evsel *evsel
142 					  __maybe_unused, double avg)
143 {
144 	double total, ratio = 0.0;
145 	const char *color;
146 	int ctx = evsel_context(evsel);
147 
148 	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
149 
150 	if (total)
151 		ratio = avg / total * 100.0;
152 
153 	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
154 
155 	fprintf(out, " #  ");
156 	color_fprintf(out, color, "%6.2f%%", ratio);
157 	fprintf(out, " frontend cycles idle   ");
158 }
159 
160 static void print_stalled_cycles_backend(FILE *out, int cpu,
161 					 struct perf_evsel *evsel
162 					 __maybe_unused, double avg)
163 {
164 	double total, ratio = 0.0;
165 	const char *color;
166 	int ctx = evsel_context(evsel);
167 
168 	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
169 
170 	if (total)
171 		ratio = avg / total * 100.0;
172 
173 	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
174 
175 	fprintf(out, " #  ");
176 	color_fprintf(out, color, "%6.2f%%", ratio);
177 	fprintf(out, " backend  cycles idle   ");
178 }
179 
180 static void print_branch_misses(FILE *out, int cpu,
181 				struct perf_evsel *evsel __maybe_unused,
182 				double avg)
183 {
184 	double total, ratio = 0.0;
185 	const char *color;
186 	int ctx = evsel_context(evsel);
187 
188 	total = avg_stats(&runtime_branches_stats[ctx][cpu]);
189 
190 	if (total)
191 		ratio = avg / total * 100.0;
192 
193 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
194 
195 	fprintf(out, " #  ");
196 	color_fprintf(out, color, "%6.2f%%", ratio);
197 	fprintf(out, " of all branches        ");
198 }
199 
200 static void print_l1_dcache_misses(FILE *out, int cpu,
201 				   struct perf_evsel *evsel __maybe_unused,
202 				   double avg)
203 {
204 	double total, ratio = 0.0;
205 	const char *color;
206 	int ctx = evsel_context(evsel);
207 
208 	total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
209 
210 	if (total)
211 		ratio = avg / total * 100.0;
212 
213 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
214 
215 	fprintf(out, " #  ");
216 	color_fprintf(out, color, "%6.2f%%", ratio);
217 	fprintf(out, " of all L1-dcache hits  ");
218 }
219 
220 static void print_l1_icache_misses(FILE *out, int cpu,
221 				   struct perf_evsel *evsel __maybe_unused,
222 				   double avg)
223 {
224 	double total, ratio = 0.0;
225 	const char *color;
226 	int ctx = evsel_context(evsel);
227 
228 	total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
229 
230 	if (total)
231 		ratio = avg / total * 100.0;
232 
233 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
234 
235 	fprintf(out, " #  ");
236 	color_fprintf(out, color, "%6.2f%%", ratio);
237 	fprintf(out, " of all L1-icache hits  ");
238 }
239 
240 static void print_dtlb_cache_misses(FILE *out, int cpu,
241 				    struct perf_evsel *evsel __maybe_unused,
242 				    double avg)
243 {
244 	double total, ratio = 0.0;
245 	const char *color;
246 	int ctx = evsel_context(evsel);
247 
248 	total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
249 
250 	if (total)
251 		ratio = avg / total * 100.0;
252 
253 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
254 
255 	fprintf(out, " #  ");
256 	color_fprintf(out, color, "%6.2f%%", ratio);
257 	fprintf(out, " of all dTLB cache hits ");
258 }
259 
260 static void print_itlb_cache_misses(FILE *out, int cpu,
261 				    struct perf_evsel *evsel __maybe_unused,
262 				    double avg)
263 {
264 	double total, ratio = 0.0;
265 	const char *color;
266 	int ctx = evsel_context(evsel);
267 
268 	total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
269 
270 	if (total)
271 		ratio = avg / total * 100.0;
272 
273 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
274 
275 	fprintf(out, " #  ");
276 	color_fprintf(out, color, "%6.2f%%", ratio);
277 	fprintf(out, " of all iTLB cache hits ");
278 }
279 
280 static void print_ll_cache_misses(FILE *out, int cpu,
281 				  struct perf_evsel *evsel __maybe_unused,
282 				  double avg)
283 {
284 	double total, ratio = 0.0;
285 	const char *color;
286 	int ctx = evsel_context(evsel);
287 
288 	total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
289 
290 	if (total)
291 		ratio = avg / total * 100.0;
292 
293 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
294 
295 	fprintf(out, " #  ");
296 	color_fprintf(out, color, "%6.2f%%", ratio);
297 	fprintf(out, " of all LL-cache hits   ");
298 }
299 
300 void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
301 				   double avg, int cpu, enum aggr_mode aggr)
302 {
303 	double total, ratio = 0.0, total2;
304 	int ctx = evsel_context(evsel);
305 
306 	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
307 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
308 		if (total) {
309 			ratio = avg / total;
310 			fprintf(out, " #   %5.2f  insns per cycle        ", ratio);
311 		} else {
312 			fprintf(out, "                                   ");
313 		}
314 		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
315 		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
316 
317 		if (total && avg) {
318 			ratio = total / avg;
319 			fprintf(out, "\n");
320 			if (aggr == AGGR_NONE)
321 				fprintf(out, "        ");
322 			fprintf(out, "                                                  #   %5.2f  stalled cycles per insn", ratio);
323 		}
324 
325 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
326 			runtime_branches_stats[ctx][cpu].n != 0) {
327 		print_branch_misses(out, cpu, evsel, avg);
328 	} else if (
329 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
330 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
331 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
332 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
333 			runtime_l1_dcache_stats[ctx][cpu].n != 0) {
334 		print_l1_dcache_misses(out, cpu, evsel, avg);
335 	} else if (
336 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
337 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
338 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
339 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
340 			runtime_l1_icache_stats[ctx][cpu].n != 0) {
341 		print_l1_icache_misses(out, cpu, evsel, avg);
342 	} else if (
343 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
344 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
345 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
346 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
347 			runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
348 		print_dtlb_cache_misses(out, cpu, evsel, avg);
349 	} else if (
350 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
351 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
352 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
353 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
354 			runtime_itlb_cache_stats[ctx][cpu].n != 0) {
355 		print_itlb_cache_misses(out, cpu, evsel, avg);
356 	} else if (
357 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
358 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
359 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
360 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
361 			runtime_ll_cache_stats[ctx][cpu].n != 0) {
362 		print_ll_cache_misses(out, cpu, evsel, avg);
363 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
364 			runtime_cacherefs_stats[ctx][cpu].n != 0) {
365 		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
366 
367 		if (total)
368 			ratio = avg * 100 / total;
369 
370 		fprintf(out, " # %8.3f %% of all cache refs    ", ratio);
371 
372 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
373 		print_stalled_cycles_frontend(out, cpu, evsel, avg);
374 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
375 		print_stalled_cycles_backend(out, cpu, evsel, avg);
376 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
377 		total = avg_stats(&runtime_nsecs_stats[cpu]);
378 
379 		if (total) {
380 			ratio = avg / total;
381 			fprintf(out, " # %8.3f GHz                    ", ratio);
382 		} else {
383 			fprintf(out, "                                   ");
384 		}
385 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
386 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
387 		if (total)
388 			fprintf(out,
389 				" #   %5.2f%% transactional cycles   ",
390 				100.0 * (avg / total));
391 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
392 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
393 		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
394 		if (total2 < avg)
395 			total2 = avg;
396 		if (total)
397 			fprintf(out,
398 				" #   %5.2f%% aborted cycles         ",
399 				100.0 * ((total2-avg) / total));
400 	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
401 		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
402 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
403 
404 		if (avg)
405 			ratio = total / avg;
406 
407 		fprintf(out, " # %8.0f cycles / transaction   ", ratio);
408 	} else if (perf_stat_evsel__is(evsel, ELISION_START) &&
409 		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
410 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
411 
412 		if (avg)
413 			ratio = total / avg;
414 
415 		fprintf(out, " # %8.0f cycles / elision       ", ratio);
416 	} else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) {
417 		if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
418 			fprintf(out, " # %8.3f CPUs utilized          ", avg / ratio);
419 		else
420 			fprintf(out, "                                   ");
421 	} else if (runtime_nsecs_stats[cpu].n != 0) {
422 		char unit = 'M';
423 
424 		total = avg_stats(&runtime_nsecs_stats[cpu]);
425 
426 		if (total)
427 			ratio = 1000.0 * avg / total;
428 		if (ratio < 0.001) {
429 			ratio *= 1000;
430 			unit = 'K';
431 		}
432 
433 		fprintf(out, " # %8.3f %c/sec                  ", ratio, unit);
434 	} else {
435 		fprintf(out, "                                   ");
436 	}
437 }
438