xref: /openbmc/linux/tools/perf/util/mem-events.c (revision 240e6d25)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <stddef.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <errno.h>
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 #include <unistd.h>
9 #include <api/fs/fs.h>
10 #include <linux/kernel.h>
11 #include "map_symbol.h"
12 #include "mem-events.h"
13 #include "debug.h"
14 #include "symbol.h"
15 #include "pmu.h"
16 #include "pmu-hybrid.h"
17 
18 unsigned int perf_mem_events__loads_ldlat = 30;
19 
20 #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
21 
22 static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
23 	E("ldlat-loads",	"cpu/mem-loads,ldlat=%u/P",	"cpu/events/mem-loads"),
24 	E("ldlat-stores",	"cpu/mem-stores/P",		"cpu/events/mem-stores"),
25 	E(NULL,			NULL,				NULL),
26 };
27 #undef E
28 
29 static char mem_loads_name[100];
30 static bool mem_loads_name__init;
31 
32 struct perf_mem_event * __weak perf_mem_events__ptr(int i)
33 {
34 	if (i >= PERF_MEM_EVENTS__MAX)
35 		return NULL;
36 
37 	return &perf_mem_events[i];
38 }
39 
40 char * __weak perf_mem_events__name(int i, char *pmu_name  __maybe_unused)
41 {
42 	struct perf_mem_event *e = perf_mem_events__ptr(i);
43 
44 	if (!e)
45 		return NULL;
46 
47 	if (i == PERF_MEM_EVENTS__LOAD) {
48 		if (!mem_loads_name__init) {
49 			mem_loads_name__init = true;
50 			scnprintf(mem_loads_name, sizeof(mem_loads_name),
51 				  e->name, perf_mem_events__loads_ldlat);
52 		}
53 		return mem_loads_name;
54 	}
55 
56 	return (char *)e->name;
57 }
58 
59 __weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused)
60 {
61 	return false;
62 }
63 
64 int perf_mem_events__parse(const char *str)
65 {
66 	char *tok, *saveptr = NULL;
67 	bool found = false;
68 	char *buf;
69 	int j;
70 
71 	/* We need buffer that we know we can write to. */
72 	buf = malloc(strlen(str) + 1);
73 	if (!buf)
74 		return -ENOMEM;
75 
76 	strcpy(buf, str);
77 
78 	tok = strtok_r((char *)buf, ",", &saveptr);
79 
80 	while (tok) {
81 		for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
82 			struct perf_mem_event *e = perf_mem_events__ptr(j);
83 
84 			if (!e->tag)
85 				continue;
86 
87 			if (strstr(e->tag, tok))
88 				e->record = found = true;
89 		}
90 
91 		tok = strtok_r(NULL, ",", &saveptr);
92 	}
93 
94 	free(buf);
95 
96 	if (found)
97 		return 0;
98 
99 	pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
100 	return -1;
101 }
102 
103 static bool perf_mem_event__supported(const char *mnt, char *sysfs_name)
104 {
105 	char path[PATH_MAX];
106 	struct stat st;
107 
108 	scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name);
109 	return !stat(path, &st);
110 }
111 
112 int perf_mem_events__init(void)
113 {
114 	const char *mnt = sysfs__mount();
115 	bool found = false;
116 	int j;
117 
118 	if (!mnt)
119 		return -ENOENT;
120 
121 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
122 		struct perf_mem_event *e = perf_mem_events__ptr(j);
123 		struct perf_pmu *pmu;
124 		char sysfs_name[100];
125 
126 		/*
127 		 * If the event entry isn't valid, skip initialization
128 		 * and "e->supported" will keep false.
129 		 */
130 		if (!e->tag)
131 			continue;
132 
133 		if (!perf_pmu__has_hybrid()) {
134 			scnprintf(sysfs_name, sizeof(sysfs_name),
135 				  e->sysfs_name, "cpu");
136 			e->supported = perf_mem_event__supported(mnt, sysfs_name);
137 		} else {
138 			perf_pmu__for_each_hybrid_pmu(pmu) {
139 				scnprintf(sysfs_name, sizeof(sysfs_name),
140 					  e->sysfs_name, pmu->name);
141 				e->supported |= perf_mem_event__supported(mnt, sysfs_name);
142 			}
143 		}
144 
145 		if (e->supported)
146 			found = true;
147 	}
148 
149 	return found ? 0 : -ENOENT;
150 }
151 
152 void perf_mem_events__list(void)
153 {
154 	int j;
155 
156 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
157 		struct perf_mem_event *e = perf_mem_events__ptr(j);
158 
159 		fprintf(stderr, "%-13s%-*s%s\n",
160 			e->tag ?: "",
161 			verbose > 0 ? 25 : 0,
162 			verbose > 0 ? perf_mem_events__name(j, NULL) : "",
163 			e->supported ? ": available" : "");
164 	}
165 }
166 
167 static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
168 						    int idx)
169 {
170 	const char *mnt = sysfs__mount();
171 	char sysfs_name[100];
172 	struct perf_pmu *pmu;
173 
174 	perf_pmu__for_each_hybrid_pmu(pmu) {
175 		scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
176 			  pmu->name);
177 		if (!perf_mem_event__supported(mnt, sysfs_name)) {
178 			pr_err("failed: event '%s' not supported\n",
179 			       perf_mem_events__name(idx, pmu->name));
180 		}
181 	}
182 }
183 
184 int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
185 				 char **rec_tmp, int *tmp_nr)
186 {
187 	int i = *argv_nr, k = 0;
188 	struct perf_mem_event *e;
189 	struct perf_pmu *pmu;
190 	char *s;
191 
192 	for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
193 		e = perf_mem_events__ptr(j);
194 		if (!e->record)
195 			continue;
196 
197 		if (!perf_pmu__has_hybrid()) {
198 			if (!e->supported) {
199 				pr_err("failed: event '%s' not supported\n",
200 				       perf_mem_events__name(j, NULL));
201 				return -1;
202 			}
203 
204 			rec_argv[i++] = "-e";
205 			rec_argv[i++] = perf_mem_events__name(j, NULL);
206 		} else {
207 			if (!e->supported) {
208 				perf_mem_events__print_unsupport_hybrid(e, j);
209 				return -1;
210 			}
211 
212 			perf_pmu__for_each_hybrid_pmu(pmu) {
213 				rec_argv[i++] = "-e";
214 				s = perf_mem_events__name(j, pmu->name);
215 				if (s) {
216 					s = strdup(s);
217 					if (!s)
218 						return -1;
219 
220 					rec_argv[i++] = s;
221 					rec_tmp[k++] = s;
222 				}
223 			}
224 		}
225 	}
226 
227 	*argv_nr = i;
228 	*tmp_nr = k;
229 	return 0;
230 }
231 
232 static const char * const tlb_access[] = {
233 	"N/A",
234 	"HIT",
235 	"MISS",
236 	"L1",
237 	"L2",
238 	"Walker",
239 	"Fault",
240 };
241 
242 int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
243 {
244 	size_t l = 0, i;
245 	u64 m = PERF_MEM_TLB_NA;
246 	u64 hit, miss;
247 
248 	sz -= 1; /* -1 for null termination */
249 	out[0] = '\0';
250 
251 	if (mem_info)
252 		m = mem_info->data_src.mem_dtlb;
253 
254 	hit = m & PERF_MEM_TLB_HIT;
255 	miss = m & PERF_MEM_TLB_MISS;
256 
257 	/* already taken care of */
258 	m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
259 
260 	for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
261 		if (!(m & 0x1))
262 			continue;
263 		if (l) {
264 			strcat(out, " or ");
265 			l += 4;
266 		}
267 		l += scnprintf(out + l, sz - l, tlb_access[i]);
268 	}
269 	if (*out == '\0')
270 		l += scnprintf(out, sz - l, "N/A");
271 	if (hit)
272 		l += scnprintf(out + l, sz - l, " hit");
273 	if (miss)
274 		l += scnprintf(out + l, sz - l, " miss");
275 
276 	return l;
277 }
278 
279 static const char * const mem_lvl[] = {
280 	"N/A",
281 	"HIT",
282 	"MISS",
283 	"L1",
284 	"LFB",
285 	"L2",
286 	"L3",
287 	"Local RAM",
288 	"Remote RAM (1 hop)",
289 	"Remote RAM (2 hops)",
290 	"Remote Cache (1 hop)",
291 	"Remote Cache (2 hops)",
292 	"I/O",
293 	"Uncached",
294 };
295 
296 static const char * const mem_lvlnum[] = {
297 	[PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
298 	[PERF_MEM_LVLNUM_LFB] = "LFB",
299 	[PERF_MEM_LVLNUM_RAM] = "RAM",
300 	[PERF_MEM_LVLNUM_PMEM] = "PMEM",
301 	[PERF_MEM_LVLNUM_NA] = "N/A",
302 };
303 
304 static const char * const mem_hops[] = {
305 	"N/A",
306 	/*
307 	 * While printing, 'Remote' will be added to represent
308 	 * 'Remote core, same node' accesses as remote field need
309 	 * to be set with mem_hops field.
310 	 */
311 	"core, same node",
312 };
313 
314 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
315 {
316 	size_t i, l = 0;
317 	u64 m =  PERF_MEM_LVL_NA;
318 	u64 hit, miss;
319 	int printed;
320 
321 	if (mem_info)
322 		m  = mem_info->data_src.mem_lvl;
323 
324 	sz -= 1; /* -1 for null termination */
325 	out[0] = '\0';
326 
327 	hit = m & PERF_MEM_LVL_HIT;
328 	miss = m & PERF_MEM_LVL_MISS;
329 
330 	/* already taken care of */
331 	m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
332 
333 	if (mem_info && mem_info->data_src.mem_remote) {
334 		strcat(out, "Remote ");
335 		l += 7;
336 	}
337 
338 	if (mem_info && mem_info->data_src.mem_hops)
339 		l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
340 
341 	printed = 0;
342 	for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
343 		if (!(m & 0x1))
344 			continue;
345 		if (printed++) {
346 			strcat(out, " or ");
347 			l += 4;
348 		}
349 		l += scnprintf(out + l, sz - l, mem_lvl[i]);
350 	}
351 
352 	if (mem_info && mem_info->data_src.mem_lvl_num) {
353 		int lvl = mem_info->data_src.mem_lvl_num;
354 		if (printed++) {
355 			strcat(out, " or ");
356 			l += 4;
357 		}
358 		if (mem_lvlnum[lvl])
359 			l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]);
360 		else
361 			l += scnprintf(out + l, sz - l, "L%d", lvl);
362 	}
363 
364 	if (l == 0)
365 		l += scnprintf(out + l, sz - l, "N/A");
366 	if (hit)
367 		l += scnprintf(out + l, sz - l, " hit");
368 	if (miss)
369 		l += scnprintf(out + l, sz - l, " miss");
370 
371 	return l;
372 }
373 
374 static const char * const snoop_access[] = {
375 	"N/A",
376 	"None",
377 	"Hit",
378 	"Miss",
379 	"HitM",
380 };
381 
382 int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
383 {
384 	size_t i, l = 0;
385 	u64 m = PERF_MEM_SNOOP_NA;
386 
387 	sz -= 1; /* -1 for null termination */
388 	out[0] = '\0';
389 
390 	if (mem_info)
391 		m = mem_info->data_src.mem_snoop;
392 
393 	for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
394 		if (!(m & 0x1))
395 			continue;
396 		if (l) {
397 			strcat(out, " or ");
398 			l += 4;
399 		}
400 		l += scnprintf(out + l, sz - l, snoop_access[i]);
401 	}
402 	if (mem_info &&
403 	     (mem_info->data_src.mem_snoopx & PERF_MEM_SNOOPX_FWD)) {
404 		if (l) {
405 			strcat(out, " or ");
406 			l += 4;
407 		}
408 		l += scnprintf(out + l, sz - l, "Fwd");
409 	}
410 
411 	if (*out == '\0')
412 		l += scnprintf(out, sz - l, "N/A");
413 
414 	return l;
415 }
416 
417 int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
418 {
419 	u64 mask = PERF_MEM_LOCK_NA;
420 	int l;
421 
422 	if (mem_info)
423 		mask = mem_info->data_src.mem_lock;
424 
425 	if (mask & PERF_MEM_LOCK_NA)
426 		l = scnprintf(out, sz, "N/A");
427 	else if (mask & PERF_MEM_LOCK_LOCKED)
428 		l = scnprintf(out, sz, "Yes");
429 	else
430 		l = scnprintf(out, sz, "No");
431 
432 	return l;
433 }
434 
435 int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
436 {
437 	size_t l = 0;
438 	u64 mask = PERF_MEM_BLK_NA;
439 
440 	sz -= 1; /* -1 for null termination */
441 	out[0] = '\0';
442 
443 	if (mem_info)
444 		mask = mem_info->data_src.mem_blk;
445 
446 	if (!mask || (mask & PERF_MEM_BLK_NA)) {
447 		l += scnprintf(out + l, sz - l, " N/A");
448 		return l;
449 	}
450 	if (mask & PERF_MEM_BLK_DATA)
451 		l += scnprintf(out + l, sz - l, " Data");
452 	if (mask & PERF_MEM_BLK_ADDR)
453 		l += scnprintf(out + l, sz - l, " Addr");
454 
455 	return l;
456 }
457 
458 int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
459 {
460 	int i = 0;
461 
462 	i += perf_mem__lvl_scnprintf(out, sz, mem_info);
463 	i += scnprintf(out + i, sz - i, "|SNP ");
464 	i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
465 	i += scnprintf(out + i, sz - i, "|TLB ");
466 	i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
467 	i += scnprintf(out + i, sz - i, "|LCK ");
468 	i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
469 	i += scnprintf(out + i, sz - i, "|BLK ");
470 	i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info);
471 
472 	return i;
473 }
474 
475 int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
476 {
477 	union perf_mem_data_src *data_src = &mi->data_src;
478 	u64 daddr  = mi->daddr.addr;
479 	u64 op     = data_src->mem_op;
480 	u64 lvl    = data_src->mem_lvl;
481 	u64 snoop  = data_src->mem_snoop;
482 	u64 lock   = data_src->mem_lock;
483 	u64 blk    = data_src->mem_blk;
484 	/*
485 	 * Skylake might report unknown remote level via this
486 	 * bit, consider it when evaluating remote HITMs.
487 	 *
488 	 * Incase of power, remote field can also be used to denote cache
489 	 * accesses from the another core of same node. Hence, setting
490 	 * mrem only when HOPS is zero along with set remote field.
491 	 */
492 	bool mrem  = (data_src->mem_remote && !data_src->mem_hops);
493 	int err = 0;
494 
495 #define HITM_INC(__f)		\
496 do {				\
497 	stats->__f++;		\
498 	stats->tot_hitm++;	\
499 } while (0)
500 
501 #define P(a, b) PERF_MEM_##a##_##b
502 
503 	stats->nr_entries++;
504 
505 	if (lock & P(LOCK, LOCKED)) stats->locks++;
506 
507 	if (blk & P(BLK, DATA)) stats->blk_data++;
508 	if (blk & P(BLK, ADDR)) stats->blk_addr++;
509 
510 	if (op & P(OP, LOAD)) {
511 		/* load */
512 		stats->load++;
513 
514 		if (!daddr) {
515 			stats->ld_noadrs++;
516 			return -1;
517 		}
518 
519 		if (lvl & P(LVL, HIT)) {
520 			if (lvl & P(LVL, UNC)) stats->ld_uncache++;
521 			if (lvl & P(LVL, IO))  stats->ld_io++;
522 			if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
523 			if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
524 			if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
525 			if (lvl & P(LVL, L3 )) {
526 				if (snoop & P(SNOOP, HITM))
527 					HITM_INC(lcl_hitm);
528 				else
529 					stats->ld_llchit++;
530 			}
531 
532 			if (lvl & P(LVL, LOC_RAM)) {
533 				stats->lcl_dram++;
534 				if (snoop & P(SNOOP, HIT))
535 					stats->ld_shared++;
536 				else
537 					stats->ld_excl++;
538 			}
539 
540 			if ((lvl & P(LVL, REM_RAM1)) ||
541 			    (lvl & P(LVL, REM_RAM2)) ||
542 			     mrem) {
543 				stats->rmt_dram++;
544 				if (snoop & P(SNOOP, HIT))
545 					stats->ld_shared++;
546 				else
547 					stats->ld_excl++;
548 			}
549 		}
550 
551 		if ((lvl & P(LVL, REM_CCE1)) ||
552 		    (lvl & P(LVL, REM_CCE2)) ||
553 		     mrem) {
554 			if (snoop & P(SNOOP, HIT))
555 				stats->rmt_hit++;
556 			else if (snoop & P(SNOOP, HITM))
557 				HITM_INC(rmt_hitm);
558 		}
559 
560 		if ((lvl & P(LVL, MISS)))
561 			stats->ld_miss++;
562 
563 	} else if (op & P(OP, STORE)) {
564 		/* store */
565 		stats->store++;
566 
567 		if (!daddr) {
568 			stats->st_noadrs++;
569 			return -1;
570 		}
571 
572 		if (lvl & P(LVL, HIT)) {
573 			if (lvl & P(LVL, UNC)) stats->st_uncache++;
574 			if (lvl & P(LVL, L1 )) stats->st_l1hit++;
575 		}
576 		if (lvl & P(LVL, MISS))
577 			if (lvl & P(LVL, L1)) stats->st_l1miss++;
578 	} else {
579 		/* unparsable data_src? */
580 		stats->noparse++;
581 		return -1;
582 	}
583 
584 	if (!mi->daddr.ms.map || !mi->iaddr.ms.map) {
585 		stats->nomap++;
586 		return -1;
587 	}
588 
589 #undef P
590 #undef HITM_INC
591 	return err;
592 }
593 
594 void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
595 {
596 	stats->nr_entries	+= add->nr_entries;
597 
598 	stats->locks		+= add->locks;
599 	stats->store		+= add->store;
600 	stats->st_uncache	+= add->st_uncache;
601 	stats->st_noadrs	+= add->st_noadrs;
602 	stats->st_l1hit		+= add->st_l1hit;
603 	stats->st_l1miss	+= add->st_l1miss;
604 	stats->load		+= add->load;
605 	stats->ld_excl		+= add->ld_excl;
606 	stats->ld_shared	+= add->ld_shared;
607 	stats->ld_uncache	+= add->ld_uncache;
608 	stats->ld_io		+= add->ld_io;
609 	stats->ld_miss		+= add->ld_miss;
610 	stats->ld_noadrs	+= add->ld_noadrs;
611 	stats->ld_fbhit		+= add->ld_fbhit;
612 	stats->ld_l1hit		+= add->ld_l1hit;
613 	stats->ld_l2hit		+= add->ld_l2hit;
614 	stats->ld_llchit	+= add->ld_llchit;
615 	stats->lcl_hitm		+= add->lcl_hitm;
616 	stats->rmt_hitm		+= add->rmt_hitm;
617 	stats->tot_hitm		+= add->tot_hitm;
618 	stats->rmt_hit		+= add->rmt_hit;
619 	stats->lcl_dram		+= add->lcl_dram;
620 	stats->rmt_dram		+= add->rmt_dram;
621 	stats->blk_data		+= add->blk_data;
622 	stats->blk_addr		+= add->blk_addr;
623 	stats->nomap		+= add->nomap;
624 	stats->noparse		+= add->noparse;
625 }
626