xref: /openbmc/linux/tools/perf/util/mem-events.c (revision 53f9cd5c)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <stddef.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <errno.h>
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 #include <unistd.h>
9 #include <api/fs/fs.h>
10 #include <linux/kernel.h>
11 #include "map_symbol.h"
12 #include "mem-events.h"
13 #include "debug.h"
14 #include "symbol.h"
15 #include "pmu.h"
16 #include "pmu-hybrid.h"
17 
18 unsigned int perf_mem_events__loads_ldlat = 30;
19 
20 #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
21 
22 static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
23 	E("ldlat-loads",	"cpu/mem-loads,ldlat=%u/P",	"cpu/events/mem-loads"),
24 	E("ldlat-stores",	"cpu/mem-stores/P",		"cpu/events/mem-stores"),
25 	E(NULL,			NULL,				NULL),
26 };
27 #undef E
28 
29 static char mem_loads_name[100];
30 static bool mem_loads_name__init;
31 
32 struct perf_mem_event * __weak perf_mem_events__ptr(int i)
33 {
34 	if (i >= PERF_MEM_EVENTS__MAX)
35 		return NULL;
36 
37 	return &perf_mem_events[i];
38 }
39 
40 char * __weak perf_mem_events__name(int i, char *pmu_name  __maybe_unused)
41 {
42 	struct perf_mem_event *e = perf_mem_events__ptr(i);
43 
44 	if (!e)
45 		return NULL;
46 
47 	if (i == PERF_MEM_EVENTS__LOAD) {
48 		if (!mem_loads_name__init) {
49 			mem_loads_name__init = true;
50 			scnprintf(mem_loads_name, sizeof(mem_loads_name),
51 				  e->name, perf_mem_events__loads_ldlat);
52 		}
53 		return mem_loads_name;
54 	}
55 
56 	return (char *)e->name;
57 }
58 
59 __weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused)
60 {
61 	return false;
62 }
63 
64 int perf_mem_events__parse(const char *str)
65 {
66 	char *tok, *saveptr = NULL;
67 	bool found = false;
68 	char *buf;
69 	int j;
70 
71 	/* We need buffer that we know we can write to. */
72 	buf = malloc(strlen(str) + 1);
73 	if (!buf)
74 		return -ENOMEM;
75 
76 	strcpy(buf, str);
77 
78 	tok = strtok_r((char *)buf, ",", &saveptr);
79 
80 	while (tok) {
81 		for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
82 			struct perf_mem_event *e = perf_mem_events__ptr(j);
83 
84 			if (!e->tag)
85 				continue;
86 
87 			if (strstr(e->tag, tok))
88 				e->record = found = true;
89 		}
90 
91 		tok = strtok_r(NULL, ",", &saveptr);
92 	}
93 
94 	free(buf);
95 
96 	if (found)
97 		return 0;
98 
99 	pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
100 	return -1;
101 }
102 
103 static bool perf_mem_event__supported(const char *mnt, char *sysfs_name)
104 {
105 	char path[PATH_MAX];
106 	struct stat st;
107 
108 	scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name);
109 	return !stat(path, &st);
110 }
111 
112 int perf_mem_events__init(void)
113 {
114 	const char *mnt = sysfs__mount();
115 	bool found = false;
116 	int j;
117 
118 	if (!mnt)
119 		return -ENOENT;
120 
121 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
122 		struct perf_mem_event *e = perf_mem_events__ptr(j);
123 		struct perf_pmu *pmu;
124 		char sysfs_name[100];
125 
126 		/*
127 		 * If the event entry isn't valid, skip initialization
128 		 * and "e->supported" will keep false.
129 		 */
130 		if (!e->tag)
131 			continue;
132 
133 		if (!perf_pmu__has_hybrid()) {
134 			scnprintf(sysfs_name, sizeof(sysfs_name),
135 				  e->sysfs_name, "cpu");
136 			e->supported = perf_mem_event__supported(mnt, sysfs_name);
137 		} else {
138 			perf_pmu__for_each_hybrid_pmu(pmu) {
139 				scnprintf(sysfs_name, sizeof(sysfs_name),
140 					  e->sysfs_name, pmu->name);
141 				e->supported |= perf_mem_event__supported(mnt, sysfs_name);
142 			}
143 		}
144 
145 		if (e->supported)
146 			found = true;
147 	}
148 
149 	return found ? 0 : -ENOENT;
150 }
151 
152 void perf_mem_events__list(void)
153 {
154 	int j;
155 
156 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
157 		struct perf_mem_event *e = perf_mem_events__ptr(j);
158 
159 		fprintf(stderr, "%-13s%-*s%s\n",
160 			e->tag ?: "",
161 			verbose > 0 ? 25 : 0,
162 			verbose > 0 ? perf_mem_events__name(j, NULL) : "",
163 			e->supported ? ": available" : "");
164 	}
165 }
166 
167 static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
168 						    int idx)
169 {
170 	const char *mnt = sysfs__mount();
171 	char sysfs_name[100];
172 	struct perf_pmu *pmu;
173 
174 	perf_pmu__for_each_hybrid_pmu(pmu) {
175 		scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
176 			  pmu->name);
177 		if (!perf_mem_event__supported(mnt, sysfs_name)) {
178 			pr_err("failed: event '%s' not supported\n",
179 			       perf_mem_events__name(idx, pmu->name));
180 		}
181 	}
182 }
183 
184 int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
185 				 char **rec_tmp, int *tmp_nr)
186 {
187 	int i = *argv_nr, k = 0;
188 	struct perf_mem_event *e;
189 	struct perf_pmu *pmu;
190 	char *s;
191 
192 	for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
193 		e = perf_mem_events__ptr(j);
194 		if (!e->record)
195 			continue;
196 
197 		if (!perf_pmu__has_hybrid()) {
198 			if (!e->supported) {
199 				pr_err("failed: event '%s' not supported\n",
200 				       perf_mem_events__name(j, NULL));
201 				return -1;
202 			}
203 
204 			rec_argv[i++] = "-e";
205 			rec_argv[i++] = perf_mem_events__name(j, NULL);
206 		} else {
207 			if (!e->supported) {
208 				perf_mem_events__print_unsupport_hybrid(e, j);
209 				return -1;
210 			}
211 
212 			perf_pmu__for_each_hybrid_pmu(pmu) {
213 				rec_argv[i++] = "-e";
214 				s = perf_mem_events__name(j, pmu->name);
215 				if (s) {
216 					s = strdup(s);
217 					if (!s)
218 						return -1;
219 
220 					rec_argv[i++] = s;
221 					rec_tmp[k++] = s;
222 				}
223 			}
224 		}
225 	}
226 
227 	*argv_nr = i;
228 	*tmp_nr = k;
229 	return 0;
230 }
231 
232 static const char * const tlb_access[] = {
233 	"N/A",
234 	"HIT",
235 	"MISS",
236 	"L1",
237 	"L2",
238 	"Walker",
239 	"Fault",
240 };
241 
242 int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
243 {
244 	size_t l = 0, i;
245 	u64 m = PERF_MEM_TLB_NA;
246 	u64 hit, miss;
247 
248 	sz -= 1; /* -1 for null termination */
249 	out[0] = '\0';
250 
251 	if (mem_info)
252 		m = mem_info->data_src.mem_dtlb;
253 
254 	hit = m & PERF_MEM_TLB_HIT;
255 	miss = m & PERF_MEM_TLB_MISS;
256 
257 	/* already taken care of */
258 	m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
259 
260 	for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
261 		if (!(m & 0x1))
262 			continue;
263 		if (l) {
264 			strcat(out, " or ");
265 			l += 4;
266 		}
267 		l += scnprintf(out + l, sz - l, tlb_access[i]);
268 	}
269 	if (*out == '\0')
270 		l += scnprintf(out, sz - l, "N/A");
271 	if (hit)
272 		l += scnprintf(out + l, sz - l, " hit");
273 	if (miss)
274 		l += scnprintf(out + l, sz - l, " miss");
275 
276 	return l;
277 }
278 
279 static const char * const mem_lvl[] = {
280 	"N/A",
281 	"HIT",
282 	"MISS",
283 	"L1",
284 	"LFB",
285 	"L2",
286 	"L3",
287 	"Local RAM",
288 	"Remote RAM (1 hop)",
289 	"Remote RAM (2 hops)",
290 	"Remote Cache (1 hop)",
291 	"Remote Cache (2 hops)",
292 	"I/O",
293 	"Uncached",
294 };
295 
296 static const char * const mem_lvlnum[] = {
297 	[PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
298 	[PERF_MEM_LVLNUM_LFB] = "LFB",
299 	[PERF_MEM_LVLNUM_RAM] = "RAM",
300 	[PERF_MEM_LVLNUM_PMEM] = "PMEM",
301 	[PERF_MEM_LVLNUM_NA] = "N/A",
302 };
303 
304 static const char * const mem_hops[] = {
305 	"N/A",
306 	/*
307 	 * While printing, 'Remote' will be added to represent
308 	 * 'Remote core, same node' accesses as remote field need
309 	 * to be set with mem_hops field.
310 	 */
311 	"core, same node",
312 	"node, same socket",
313 	"socket, same board",
314 	"board",
315 };
316 
317 static int perf_mem__op_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
318 {
319 	u64 op = PERF_MEM_LOCK_NA;
320 	int l;
321 
322 	if (mem_info)
323 		op = mem_info->data_src.mem_op;
324 
325 	if (op & PERF_MEM_OP_NA)
326 		l = scnprintf(out, sz, "N/A");
327 	else if (op & PERF_MEM_OP_LOAD)
328 		l = scnprintf(out, sz, "LOAD");
329 	else if (op & PERF_MEM_OP_STORE)
330 		l = scnprintf(out, sz, "STORE");
331 	else if (op & PERF_MEM_OP_PFETCH)
332 		l = scnprintf(out, sz, "PFETCH");
333 	else if (op & PERF_MEM_OP_EXEC)
334 		l = scnprintf(out, sz, "EXEC");
335 	else
336 		l = scnprintf(out, sz, "No");
337 
338 	return l;
339 }
340 
341 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
342 {
343 	size_t i, l = 0;
344 	u64 m =  PERF_MEM_LVL_NA;
345 	u64 hit, miss;
346 	int printed = 0;
347 
348 	if (mem_info)
349 		m  = mem_info->data_src.mem_lvl;
350 
351 	sz -= 1; /* -1 for null termination */
352 	out[0] = '\0';
353 
354 	hit = m & PERF_MEM_LVL_HIT;
355 	miss = m & PERF_MEM_LVL_MISS;
356 
357 	/* already taken care of */
358 	m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
359 
360 	if (mem_info && mem_info->data_src.mem_remote) {
361 		strcat(out, "Remote ");
362 		l += 7;
363 	}
364 
365 	/*
366 	 * Incase mem_hops field is set, we can skip printing data source via
367 	 * PERF_MEM_LVL namespace.
368 	 */
369 	if (mem_info && mem_info->data_src.mem_hops) {
370 		l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
371 	} else {
372 		for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
373 			if (!(m & 0x1))
374 				continue;
375 			if (printed++) {
376 				strcat(out, " or ");
377 				l += 4;
378 			}
379 			l += scnprintf(out + l, sz - l, mem_lvl[i]);
380 		}
381 	}
382 
383 	if (mem_info && mem_info->data_src.mem_lvl_num) {
384 		int lvl = mem_info->data_src.mem_lvl_num;
385 		if (printed++) {
386 			strcat(out, " or ");
387 			l += 4;
388 		}
389 		if (mem_lvlnum[lvl])
390 			l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]);
391 		else
392 			l += scnprintf(out + l, sz - l, "L%d", lvl);
393 	}
394 
395 	if (l == 0)
396 		l += scnprintf(out + l, sz - l, "N/A");
397 	if (hit)
398 		l += scnprintf(out + l, sz - l, " hit");
399 	if (miss)
400 		l += scnprintf(out + l, sz - l, " miss");
401 
402 	return l;
403 }
404 
405 static const char * const snoop_access[] = {
406 	"N/A",
407 	"None",
408 	"Hit",
409 	"Miss",
410 	"HitM",
411 };
412 
413 static const char * const snoopx_access[] = {
414 	"Fwd",
415 	"Peer",
416 };
417 
418 int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
419 {
420 	size_t i, l = 0;
421 	u64 m = PERF_MEM_SNOOP_NA;
422 
423 	sz -= 1; /* -1 for null termination */
424 	out[0] = '\0';
425 
426 	if (mem_info)
427 		m = mem_info->data_src.mem_snoop;
428 
429 	for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
430 		if (!(m & 0x1))
431 			continue;
432 		if (l) {
433 			strcat(out, " or ");
434 			l += 4;
435 		}
436 		l += scnprintf(out + l, sz - l, snoop_access[i]);
437 	}
438 
439 	m = 0;
440 	if (mem_info)
441 		m = mem_info->data_src.mem_snoopx;
442 
443 	for (i = 0; m && i < ARRAY_SIZE(snoopx_access); i++, m >>= 1) {
444 		if (!(m & 0x1))
445 			continue;
446 
447 		if (l) {
448 			strcat(out, " or ");
449 			l += 4;
450 		}
451 		l += scnprintf(out + l, sz - l, snoopx_access[i]);
452 	}
453 
454 	if (*out == '\0')
455 		l += scnprintf(out, sz - l, "N/A");
456 
457 	return l;
458 }
459 
460 int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
461 {
462 	u64 mask = PERF_MEM_LOCK_NA;
463 	int l;
464 
465 	if (mem_info)
466 		mask = mem_info->data_src.mem_lock;
467 
468 	if (mask & PERF_MEM_LOCK_NA)
469 		l = scnprintf(out, sz, "N/A");
470 	else if (mask & PERF_MEM_LOCK_LOCKED)
471 		l = scnprintf(out, sz, "Yes");
472 	else
473 		l = scnprintf(out, sz, "No");
474 
475 	return l;
476 }
477 
478 int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
479 {
480 	size_t l = 0;
481 	u64 mask = PERF_MEM_BLK_NA;
482 
483 	sz -= 1; /* -1 for null termination */
484 	out[0] = '\0';
485 
486 	if (mem_info)
487 		mask = mem_info->data_src.mem_blk;
488 
489 	if (!mask || (mask & PERF_MEM_BLK_NA)) {
490 		l += scnprintf(out + l, sz - l, " N/A");
491 		return l;
492 	}
493 	if (mask & PERF_MEM_BLK_DATA)
494 		l += scnprintf(out + l, sz - l, " Data");
495 	if (mask & PERF_MEM_BLK_ADDR)
496 		l += scnprintf(out + l, sz - l, " Addr");
497 
498 	return l;
499 }
500 
501 int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
502 {
503 	int i = 0;
504 
505 	i += scnprintf(out, sz, "|OP ");
506 	i += perf_mem__op_scnprintf(out + i, sz - i, mem_info);
507 	i += scnprintf(out + i, sz - i, "|LVL ");
508 	i += perf_mem__lvl_scnprintf(out + i, sz, mem_info);
509 	i += scnprintf(out + i, sz - i, "|SNP ");
510 	i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
511 	i += scnprintf(out + i, sz - i, "|TLB ");
512 	i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
513 	i += scnprintf(out + i, sz - i, "|LCK ");
514 	i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
515 	i += scnprintf(out + i, sz - i, "|BLK ");
516 	i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info);
517 
518 	return i;
519 }
520 
521 int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
522 {
523 	union perf_mem_data_src *data_src = &mi->data_src;
524 	u64 daddr  = mi->daddr.addr;
525 	u64 op     = data_src->mem_op;
526 	u64 lvl    = data_src->mem_lvl;
527 	u64 snoop  = data_src->mem_snoop;
528 	u64 snoopx = data_src->mem_snoopx;
529 	u64 lock   = data_src->mem_lock;
530 	u64 blk    = data_src->mem_blk;
531 	/*
532 	 * Skylake might report unknown remote level via this
533 	 * bit, consider it when evaluating remote HITMs.
534 	 *
535 	 * Incase of power, remote field can also be used to denote cache
536 	 * accesses from the another core of same node. Hence, setting
537 	 * mrem only when HOPS is zero along with set remote field.
538 	 */
539 	bool mrem  = (data_src->mem_remote && !data_src->mem_hops);
540 	int err = 0;
541 
542 #define HITM_INC(__f)		\
543 do {				\
544 	stats->__f++;		\
545 	stats->tot_hitm++;	\
546 } while (0)
547 
548 #define PEER_INC(__f)		\
549 do {				\
550 	stats->__f++;		\
551 	stats->tot_peer++;	\
552 } while (0)
553 
554 #define P(a, b) PERF_MEM_##a##_##b
555 
556 	stats->nr_entries++;
557 
558 	if (lock & P(LOCK, LOCKED)) stats->locks++;
559 
560 	if (blk & P(BLK, DATA)) stats->blk_data++;
561 	if (blk & P(BLK, ADDR)) stats->blk_addr++;
562 
563 	if (op & P(OP, LOAD)) {
564 		/* load */
565 		stats->load++;
566 
567 		if (!daddr) {
568 			stats->ld_noadrs++;
569 			return -1;
570 		}
571 
572 		if (lvl & P(LVL, HIT)) {
573 			if (lvl & P(LVL, UNC)) stats->ld_uncache++;
574 			if (lvl & P(LVL, IO))  stats->ld_io++;
575 			if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
576 			if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
577 			if (lvl & P(LVL, L2)) {
578 				stats->ld_l2hit++;
579 
580 				if (snoopx & P(SNOOPX, PEER))
581 					PEER_INC(lcl_peer);
582 			}
583 			if (lvl & P(LVL, L3 )) {
584 				if (snoop & P(SNOOP, HITM))
585 					HITM_INC(lcl_hitm);
586 				else
587 					stats->ld_llchit++;
588 
589 				if (snoopx & P(SNOOPX, PEER))
590 					PEER_INC(lcl_peer);
591 			}
592 
593 			if (lvl & P(LVL, LOC_RAM)) {
594 				stats->lcl_dram++;
595 				if (snoop & P(SNOOP, HIT))
596 					stats->ld_shared++;
597 				else
598 					stats->ld_excl++;
599 			}
600 
601 			if ((lvl & P(LVL, REM_RAM1)) ||
602 			    (lvl & P(LVL, REM_RAM2)) ||
603 			     mrem) {
604 				stats->rmt_dram++;
605 				if (snoop & P(SNOOP, HIT))
606 					stats->ld_shared++;
607 				else
608 					stats->ld_excl++;
609 			}
610 		}
611 
612 		if ((lvl & P(LVL, REM_CCE1)) ||
613 		    (lvl & P(LVL, REM_CCE2)) ||
614 		     mrem) {
615 			if (snoop & P(SNOOP, HIT)) {
616 				stats->rmt_hit++;
617 			} else if (snoop & P(SNOOP, HITM)) {
618 				HITM_INC(rmt_hitm);
619 			} else if (snoopx & P(SNOOPX, PEER)) {
620 				stats->rmt_hit++;
621 				PEER_INC(rmt_peer);
622 			}
623 		}
624 
625 		if ((lvl & P(LVL, MISS)))
626 			stats->ld_miss++;
627 
628 	} else if (op & P(OP, STORE)) {
629 		/* store */
630 		stats->store++;
631 
632 		if (!daddr) {
633 			stats->st_noadrs++;
634 			return -1;
635 		}
636 
637 		if (lvl & P(LVL, HIT)) {
638 			if (lvl & P(LVL, UNC)) stats->st_uncache++;
639 			if (lvl & P(LVL, L1 )) stats->st_l1hit++;
640 		}
641 		if (lvl & P(LVL, MISS))
642 			if (lvl & P(LVL, L1)) stats->st_l1miss++;
643 		if (lvl & P(LVL, NA))
644 			stats->st_na++;
645 	} else {
646 		/* unparsable data_src? */
647 		stats->noparse++;
648 		return -1;
649 	}
650 
651 	if (!mi->daddr.ms.map || !mi->iaddr.ms.map) {
652 		stats->nomap++;
653 		return -1;
654 	}
655 
656 #undef P
657 #undef HITM_INC
658 	return err;
659 }
660 
661 void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
662 {
663 	stats->nr_entries	+= add->nr_entries;
664 
665 	stats->locks		+= add->locks;
666 	stats->store		+= add->store;
667 	stats->st_uncache	+= add->st_uncache;
668 	stats->st_noadrs	+= add->st_noadrs;
669 	stats->st_l1hit		+= add->st_l1hit;
670 	stats->st_l1miss	+= add->st_l1miss;
671 	stats->st_na		+= add->st_na;
672 	stats->load		+= add->load;
673 	stats->ld_excl		+= add->ld_excl;
674 	stats->ld_shared	+= add->ld_shared;
675 	stats->ld_uncache	+= add->ld_uncache;
676 	stats->ld_io		+= add->ld_io;
677 	stats->ld_miss		+= add->ld_miss;
678 	stats->ld_noadrs	+= add->ld_noadrs;
679 	stats->ld_fbhit		+= add->ld_fbhit;
680 	stats->ld_l1hit		+= add->ld_l1hit;
681 	stats->ld_l2hit		+= add->ld_l2hit;
682 	stats->ld_llchit	+= add->ld_llchit;
683 	stats->lcl_hitm		+= add->lcl_hitm;
684 	stats->rmt_hitm		+= add->rmt_hitm;
685 	stats->tot_hitm		+= add->tot_hitm;
686 	stats->lcl_peer		+= add->lcl_peer;
687 	stats->rmt_peer		+= add->rmt_peer;
688 	stats->tot_peer		+= add->tot_peer;
689 	stats->rmt_hit		+= add->rmt_hit;
690 	stats->lcl_dram		+= add->lcl_dram;
691 	stats->rmt_dram		+= add->rmt_dram;
692 	stats->blk_data		+= add->blk_data;
693 	stats->blk_addr		+= add->blk_addr;
694 	stats->nomap		+= add->nomap;
695 	stats->noparse		+= add->noparse;
696 }
697