xref: /openbmc/linux/tools/perf/util/mem-events.c (revision f16fe2d3)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <stddef.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <errno.h>
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 #include <unistd.h>
9 #include <api/fs/fs.h>
10 #include <linux/kernel.h>
11 #include "map_symbol.h"
12 #include "mem-events.h"
13 #include "debug.h"
14 #include "symbol.h"
15 #include "pmu.h"
16 #include "pmu-hybrid.h"
17 
18 unsigned int perf_mem_events__loads_ldlat = 30;
19 
20 #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
21 
22 static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
23 	E("ldlat-loads",	"cpu/mem-loads,ldlat=%u/P",	"cpu/events/mem-loads"),
24 	E("ldlat-stores",	"cpu/mem-stores/P",		"cpu/events/mem-stores"),
25 	E(NULL,			NULL,				NULL),
26 };
27 #undef E
28 
29 static char mem_loads_name[100];
30 static bool mem_loads_name__init;
31 
32 struct perf_mem_event * __weak perf_mem_events__ptr(int i)
33 {
34 	if (i >= PERF_MEM_EVENTS__MAX)
35 		return NULL;
36 
37 	return &perf_mem_events[i];
38 }
39 
40 char * __weak perf_mem_events__name(int i, char *pmu_name  __maybe_unused)
41 {
42 	struct perf_mem_event *e = perf_mem_events__ptr(i);
43 
44 	if (!e)
45 		return NULL;
46 
47 	if (i == PERF_MEM_EVENTS__LOAD) {
48 		if (!mem_loads_name__init) {
49 			mem_loads_name__init = true;
50 			scnprintf(mem_loads_name, sizeof(mem_loads_name),
51 				  e->name, perf_mem_events__loads_ldlat);
52 		}
53 		return mem_loads_name;
54 	}
55 
56 	return (char *)e->name;
57 }
58 
59 __weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused)
60 {
61 	return false;
62 }
63 
64 int perf_mem_events__parse(const char *str)
65 {
66 	char *tok, *saveptr = NULL;
67 	bool found = false;
68 	char *buf;
69 	int j;
70 
71 	/* We need buffer that we know we can write to. */
72 	buf = malloc(strlen(str) + 1);
73 	if (!buf)
74 		return -ENOMEM;
75 
76 	strcpy(buf, str);
77 
78 	tok = strtok_r((char *)buf, ",", &saveptr);
79 
80 	while (tok) {
81 		for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
82 			struct perf_mem_event *e = perf_mem_events__ptr(j);
83 
84 			if (!e->tag)
85 				continue;
86 
87 			if (strstr(e->tag, tok))
88 				e->record = found = true;
89 		}
90 
91 		tok = strtok_r(NULL, ",", &saveptr);
92 	}
93 
94 	free(buf);
95 
96 	if (found)
97 		return 0;
98 
99 	pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
100 	return -1;
101 }
102 
103 static bool perf_mem_event__supported(const char *mnt, char *sysfs_name)
104 {
105 	char path[PATH_MAX];
106 	struct stat st;
107 
108 	scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name);
109 	return !stat(path, &st);
110 }
111 
112 int perf_mem_events__init(void)
113 {
114 	const char *mnt = sysfs__mount();
115 	bool found = false;
116 	int j;
117 
118 	if (!mnt)
119 		return -ENOENT;
120 
121 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
122 		struct perf_mem_event *e = perf_mem_events__ptr(j);
123 		struct perf_pmu *pmu;
124 		char sysfs_name[100];
125 
126 		/*
127 		 * If the event entry isn't valid, skip initialization
128 		 * and "e->supported" will keep false.
129 		 */
130 		if (!e->tag)
131 			continue;
132 
133 		if (!perf_pmu__has_hybrid()) {
134 			scnprintf(sysfs_name, sizeof(sysfs_name),
135 				  e->sysfs_name, "cpu");
136 			e->supported = perf_mem_event__supported(mnt, sysfs_name);
137 		} else {
138 			perf_pmu__for_each_hybrid_pmu(pmu) {
139 				scnprintf(sysfs_name, sizeof(sysfs_name),
140 					  e->sysfs_name, pmu->name);
141 				e->supported |= perf_mem_event__supported(mnt, sysfs_name);
142 			}
143 		}
144 
145 		if (e->supported)
146 			found = true;
147 	}
148 
149 	return found ? 0 : -ENOENT;
150 }
151 
152 void perf_mem_events__list(void)
153 {
154 	int j;
155 
156 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
157 		struct perf_mem_event *e = perf_mem_events__ptr(j);
158 
159 		fprintf(stderr, "%-13s%-*s%s\n",
160 			e->tag ?: "",
161 			verbose > 0 ? 25 : 0,
162 			verbose > 0 ? perf_mem_events__name(j, NULL) : "",
163 			e->supported ? ": available" : "");
164 	}
165 }
166 
167 static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
168 						    int idx)
169 {
170 	const char *mnt = sysfs__mount();
171 	char sysfs_name[100];
172 	struct perf_pmu *pmu;
173 
174 	perf_pmu__for_each_hybrid_pmu(pmu) {
175 		scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
176 			  pmu->name);
177 		if (!perf_mem_event__supported(mnt, sysfs_name)) {
178 			pr_err("failed: event '%s' not supported\n",
179 			       perf_mem_events__name(idx, pmu->name));
180 		}
181 	}
182 }
183 
184 int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
185 				 char **rec_tmp, int *tmp_nr)
186 {
187 	int i = *argv_nr, k = 0;
188 	struct perf_mem_event *e;
189 	struct perf_pmu *pmu;
190 	char *s;
191 
192 	for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
193 		e = perf_mem_events__ptr(j);
194 		if (!e->record)
195 			continue;
196 
197 		if (!perf_pmu__has_hybrid()) {
198 			if (!e->supported) {
199 				pr_err("failed: event '%s' not supported\n",
200 				       perf_mem_events__name(j, NULL));
201 				return -1;
202 			}
203 
204 			rec_argv[i++] = "-e";
205 			rec_argv[i++] = perf_mem_events__name(j, NULL);
206 		} else {
207 			if (!e->supported) {
208 				perf_mem_events__print_unsupport_hybrid(e, j);
209 				return -1;
210 			}
211 
212 			perf_pmu__for_each_hybrid_pmu(pmu) {
213 				rec_argv[i++] = "-e";
214 				s = perf_mem_events__name(j, pmu->name);
215 				if (s) {
216 					s = strdup(s);
217 					if (!s)
218 						return -1;
219 
220 					rec_argv[i++] = s;
221 					rec_tmp[k++] = s;
222 				}
223 			}
224 		}
225 	}
226 
227 	*argv_nr = i;
228 	*tmp_nr = k;
229 	return 0;
230 }
231 
232 static const char * const tlb_access[] = {
233 	"N/A",
234 	"HIT",
235 	"MISS",
236 	"L1",
237 	"L2",
238 	"Walker",
239 	"Fault",
240 };
241 
242 int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
243 {
244 	size_t l = 0, i;
245 	u64 m = PERF_MEM_TLB_NA;
246 	u64 hit, miss;
247 
248 	sz -= 1; /* -1 for null termination */
249 	out[0] = '\0';
250 
251 	if (mem_info)
252 		m = mem_info->data_src.mem_dtlb;
253 
254 	hit = m & PERF_MEM_TLB_HIT;
255 	miss = m & PERF_MEM_TLB_MISS;
256 
257 	/* already taken care of */
258 	m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
259 
260 	for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
261 		if (!(m & 0x1))
262 			continue;
263 		if (l) {
264 			strcat(out, " or ");
265 			l += 4;
266 		}
267 		l += scnprintf(out + l, sz - l, tlb_access[i]);
268 	}
269 	if (*out == '\0')
270 		l += scnprintf(out, sz - l, "N/A");
271 	if (hit)
272 		l += scnprintf(out + l, sz - l, " hit");
273 	if (miss)
274 		l += scnprintf(out + l, sz - l, " miss");
275 
276 	return l;
277 }
278 
279 static const char * const mem_lvl[] = {
280 	"N/A",
281 	"HIT",
282 	"MISS",
283 	"L1",
284 	"LFB",
285 	"L2",
286 	"L3",
287 	"Local RAM",
288 	"Remote RAM (1 hop)",
289 	"Remote RAM (2 hops)",
290 	"Remote Cache (1 hop)",
291 	"Remote Cache (2 hops)",
292 	"I/O",
293 	"Uncached",
294 };
295 
296 static const char * const mem_lvlnum[] = {
297 	[PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
298 	[PERF_MEM_LVLNUM_LFB] = "LFB",
299 	[PERF_MEM_LVLNUM_RAM] = "RAM",
300 	[PERF_MEM_LVLNUM_PMEM] = "PMEM",
301 	[PERF_MEM_LVLNUM_NA] = "N/A",
302 };
303 
304 static const char * const mem_hops[] = {
305 	"N/A",
306 	/*
307 	 * While printing, 'Remote' will be added to represent
308 	 * 'Remote core, same node' accesses as remote field need
309 	 * to be set with mem_hops field.
310 	 */
311 	"core, same node",
312 	"node, same socket",
313 	"socket, same board",
314 	"board",
315 };
316 
317 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
318 {
319 	size_t i, l = 0;
320 	u64 m =  PERF_MEM_LVL_NA;
321 	u64 hit, miss;
322 	int printed = 0;
323 
324 	if (mem_info)
325 		m  = mem_info->data_src.mem_lvl;
326 
327 	sz -= 1; /* -1 for null termination */
328 	out[0] = '\0';
329 
330 	hit = m & PERF_MEM_LVL_HIT;
331 	miss = m & PERF_MEM_LVL_MISS;
332 
333 	/* already taken care of */
334 	m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
335 
336 	if (mem_info && mem_info->data_src.mem_remote) {
337 		strcat(out, "Remote ");
338 		l += 7;
339 	}
340 
341 	/*
342 	 * Incase mem_hops field is set, we can skip printing data source via
343 	 * PERF_MEM_LVL namespace.
344 	 */
345 	if (mem_info && mem_info->data_src.mem_hops) {
346 		l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
347 	} else {
348 		for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
349 			if (!(m & 0x1))
350 				continue;
351 			if (printed++) {
352 				strcat(out, " or ");
353 				l += 4;
354 			}
355 			l += scnprintf(out + l, sz - l, mem_lvl[i]);
356 		}
357 	}
358 
359 	if (mem_info && mem_info->data_src.mem_lvl_num) {
360 		int lvl = mem_info->data_src.mem_lvl_num;
361 		if (printed++) {
362 			strcat(out, " or ");
363 			l += 4;
364 		}
365 		if (mem_lvlnum[lvl])
366 			l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]);
367 		else
368 			l += scnprintf(out + l, sz - l, "L%d", lvl);
369 	}
370 
371 	if (l == 0)
372 		l += scnprintf(out + l, sz - l, "N/A");
373 	if (hit)
374 		l += scnprintf(out + l, sz - l, " hit");
375 	if (miss)
376 		l += scnprintf(out + l, sz - l, " miss");
377 
378 	return l;
379 }
380 
381 static const char * const snoop_access[] = {
382 	"N/A",
383 	"None",
384 	"Hit",
385 	"Miss",
386 	"HitM",
387 };
388 
389 int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
390 {
391 	size_t i, l = 0;
392 	u64 m = PERF_MEM_SNOOP_NA;
393 
394 	sz -= 1; /* -1 for null termination */
395 	out[0] = '\0';
396 
397 	if (mem_info)
398 		m = mem_info->data_src.mem_snoop;
399 
400 	for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
401 		if (!(m & 0x1))
402 			continue;
403 		if (l) {
404 			strcat(out, " or ");
405 			l += 4;
406 		}
407 		l += scnprintf(out + l, sz - l, snoop_access[i]);
408 	}
409 	if (mem_info &&
410 	     (mem_info->data_src.mem_snoopx & PERF_MEM_SNOOPX_FWD)) {
411 		if (l) {
412 			strcat(out, " or ");
413 			l += 4;
414 		}
415 		l += scnprintf(out + l, sz - l, "Fwd");
416 	}
417 
418 	if (*out == '\0')
419 		l += scnprintf(out, sz - l, "N/A");
420 
421 	return l;
422 }
423 
424 int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
425 {
426 	u64 mask = PERF_MEM_LOCK_NA;
427 	int l;
428 
429 	if (mem_info)
430 		mask = mem_info->data_src.mem_lock;
431 
432 	if (mask & PERF_MEM_LOCK_NA)
433 		l = scnprintf(out, sz, "N/A");
434 	else if (mask & PERF_MEM_LOCK_LOCKED)
435 		l = scnprintf(out, sz, "Yes");
436 	else
437 		l = scnprintf(out, sz, "No");
438 
439 	return l;
440 }
441 
442 int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
443 {
444 	size_t l = 0;
445 	u64 mask = PERF_MEM_BLK_NA;
446 
447 	sz -= 1; /* -1 for null termination */
448 	out[0] = '\0';
449 
450 	if (mem_info)
451 		mask = mem_info->data_src.mem_blk;
452 
453 	if (!mask || (mask & PERF_MEM_BLK_NA)) {
454 		l += scnprintf(out + l, sz - l, " N/A");
455 		return l;
456 	}
457 	if (mask & PERF_MEM_BLK_DATA)
458 		l += scnprintf(out + l, sz - l, " Data");
459 	if (mask & PERF_MEM_BLK_ADDR)
460 		l += scnprintf(out + l, sz - l, " Addr");
461 
462 	return l;
463 }
464 
465 int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
466 {
467 	int i = 0;
468 
469 	i += perf_mem__lvl_scnprintf(out, sz, mem_info);
470 	i += scnprintf(out + i, sz - i, "|SNP ");
471 	i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
472 	i += scnprintf(out + i, sz - i, "|TLB ");
473 	i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
474 	i += scnprintf(out + i, sz - i, "|LCK ");
475 	i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
476 	i += scnprintf(out + i, sz - i, "|BLK ");
477 	i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info);
478 
479 	return i;
480 }
481 
482 int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
483 {
484 	union perf_mem_data_src *data_src = &mi->data_src;
485 	u64 daddr  = mi->daddr.addr;
486 	u64 op     = data_src->mem_op;
487 	u64 lvl    = data_src->mem_lvl;
488 	u64 snoop  = data_src->mem_snoop;
489 	u64 lock   = data_src->mem_lock;
490 	u64 blk    = data_src->mem_blk;
491 	/*
492 	 * Skylake might report unknown remote level via this
493 	 * bit, consider it when evaluating remote HITMs.
494 	 *
495 	 * Incase of power, remote field can also be used to denote cache
496 	 * accesses from the another core of same node. Hence, setting
497 	 * mrem only when HOPS is zero along with set remote field.
498 	 */
499 	bool mrem  = (data_src->mem_remote && !data_src->mem_hops);
500 	int err = 0;
501 
502 #define HITM_INC(__f)		\
503 do {				\
504 	stats->__f++;		\
505 	stats->tot_hitm++;	\
506 } while (0)
507 
508 #define P(a, b) PERF_MEM_##a##_##b
509 
510 	stats->nr_entries++;
511 
512 	if (lock & P(LOCK, LOCKED)) stats->locks++;
513 
514 	if (blk & P(BLK, DATA)) stats->blk_data++;
515 	if (blk & P(BLK, ADDR)) stats->blk_addr++;
516 
517 	if (op & P(OP, LOAD)) {
518 		/* load */
519 		stats->load++;
520 
521 		if (!daddr) {
522 			stats->ld_noadrs++;
523 			return -1;
524 		}
525 
526 		if (lvl & P(LVL, HIT)) {
527 			if (lvl & P(LVL, UNC)) stats->ld_uncache++;
528 			if (lvl & P(LVL, IO))  stats->ld_io++;
529 			if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
530 			if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
531 			if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
532 			if (lvl & P(LVL, L3 )) {
533 				if (snoop & P(SNOOP, HITM))
534 					HITM_INC(lcl_hitm);
535 				else
536 					stats->ld_llchit++;
537 			}
538 
539 			if (lvl & P(LVL, LOC_RAM)) {
540 				stats->lcl_dram++;
541 				if (snoop & P(SNOOP, HIT))
542 					stats->ld_shared++;
543 				else
544 					stats->ld_excl++;
545 			}
546 
547 			if ((lvl & P(LVL, REM_RAM1)) ||
548 			    (lvl & P(LVL, REM_RAM2)) ||
549 			     mrem) {
550 				stats->rmt_dram++;
551 				if (snoop & P(SNOOP, HIT))
552 					stats->ld_shared++;
553 				else
554 					stats->ld_excl++;
555 			}
556 		}
557 
558 		if ((lvl & P(LVL, REM_CCE1)) ||
559 		    (lvl & P(LVL, REM_CCE2)) ||
560 		     mrem) {
561 			if (snoop & P(SNOOP, HIT))
562 				stats->rmt_hit++;
563 			else if (snoop & P(SNOOP, HITM))
564 				HITM_INC(rmt_hitm);
565 		}
566 
567 		if ((lvl & P(LVL, MISS)))
568 			stats->ld_miss++;
569 
570 	} else if (op & P(OP, STORE)) {
571 		/* store */
572 		stats->store++;
573 
574 		if (!daddr) {
575 			stats->st_noadrs++;
576 			return -1;
577 		}
578 
579 		if (lvl & P(LVL, HIT)) {
580 			if (lvl & P(LVL, UNC)) stats->st_uncache++;
581 			if (lvl & P(LVL, L1 )) stats->st_l1hit++;
582 		}
583 		if (lvl & P(LVL, MISS))
584 			if (lvl & P(LVL, L1)) stats->st_l1miss++;
585 	} else {
586 		/* unparsable data_src? */
587 		stats->noparse++;
588 		return -1;
589 	}
590 
591 	if (!mi->daddr.ms.map || !mi->iaddr.ms.map) {
592 		stats->nomap++;
593 		return -1;
594 	}
595 
596 #undef P
597 #undef HITM_INC
598 	return err;
599 }
600 
601 void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
602 {
603 	stats->nr_entries	+= add->nr_entries;
604 
605 	stats->locks		+= add->locks;
606 	stats->store		+= add->store;
607 	stats->st_uncache	+= add->st_uncache;
608 	stats->st_noadrs	+= add->st_noadrs;
609 	stats->st_l1hit		+= add->st_l1hit;
610 	stats->st_l1miss	+= add->st_l1miss;
611 	stats->load		+= add->load;
612 	stats->ld_excl		+= add->ld_excl;
613 	stats->ld_shared	+= add->ld_shared;
614 	stats->ld_uncache	+= add->ld_uncache;
615 	stats->ld_io		+= add->ld_io;
616 	stats->ld_miss		+= add->ld_miss;
617 	stats->ld_noadrs	+= add->ld_noadrs;
618 	stats->ld_fbhit		+= add->ld_fbhit;
619 	stats->ld_l1hit		+= add->ld_l1hit;
620 	stats->ld_l2hit		+= add->ld_l2hit;
621 	stats->ld_llchit	+= add->ld_llchit;
622 	stats->lcl_hitm		+= add->lcl_hitm;
623 	stats->rmt_hitm		+= add->rmt_hitm;
624 	stats->tot_hitm		+= add->tot_hitm;
625 	stats->rmt_hit		+= add->rmt_hit;
626 	stats->lcl_dram		+= add->lcl_dram;
627 	stats->rmt_dram		+= add->rmt_dram;
628 	stats->blk_data		+= add->blk_data;
629 	stats->blk_addr		+= add->blk_addr;
630 	stats->nomap		+= add->nomap;
631 	stats->noparse		+= add->noparse;
632 }
633