1 // SPDX-License-Identifier: GPL-2.0 2 #include <stddef.h> 3 #include <stdlib.h> 4 #include <string.h> 5 #include <errno.h> 6 #include <sys/types.h> 7 #include <sys/stat.h> 8 #include <unistd.h> 9 #include <api/fs/fs.h> 10 #include <linux/kernel.h> 11 #include "map_symbol.h" 12 #include "mem-events.h" 13 #include "debug.h" 14 #include "symbol.h" 15 #include "pmu.h" 16 #include "pmu-hybrid.h" 17 18 unsigned int perf_mem_events__loads_ldlat = 30; 19 20 #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } 21 22 static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { 23 E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"), 24 E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), 25 E(NULL, NULL, NULL), 26 }; 27 #undef E 28 29 static char mem_loads_name[100]; 30 static bool mem_loads_name__init; 31 32 struct perf_mem_event * __weak perf_mem_events__ptr(int i) 33 { 34 if (i >= PERF_MEM_EVENTS__MAX) 35 return NULL; 36 37 return &perf_mem_events[i]; 38 } 39 40 char * __weak perf_mem_events__name(int i, char *pmu_name __maybe_unused) 41 { 42 struct perf_mem_event *e = perf_mem_events__ptr(i); 43 44 if (!e) 45 return NULL; 46 47 if (i == PERF_MEM_EVENTS__LOAD) { 48 if (!mem_loads_name__init) { 49 mem_loads_name__init = true; 50 scnprintf(mem_loads_name, sizeof(mem_loads_name), 51 e->name, perf_mem_events__loads_ldlat); 52 } 53 return mem_loads_name; 54 } 55 56 return (char *)e->name; 57 } 58 59 __weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused) 60 { 61 return false; 62 } 63 64 int perf_mem_events__parse(const char *str) 65 { 66 char *tok, *saveptr = NULL; 67 bool found = false; 68 char *buf; 69 int j; 70 71 /* We need buffer that we know we can write to. */ 72 buf = malloc(strlen(str) + 1); 73 if (!buf) 74 return -ENOMEM; 75 76 strcpy(buf, str); 77 78 tok = strtok_r((char *)buf, ",", &saveptr); 79 80 while (tok) { 81 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 82 struct perf_mem_event *e = perf_mem_events__ptr(j); 83 84 if (!e->tag) 85 continue; 86 87 if (strstr(e->tag, tok)) 88 e->record = found = true; 89 } 90 91 tok = strtok_r(NULL, ",", &saveptr); 92 } 93 94 free(buf); 95 96 if (found) 97 return 0; 98 99 pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str); 100 return -1; 101 } 102 103 static bool perf_mem_event__supported(const char *mnt, char *sysfs_name) 104 { 105 char path[PATH_MAX]; 106 struct stat st; 107 108 scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name); 109 return !stat(path, &st); 110 } 111 112 int perf_mem_events__init(void) 113 { 114 const char *mnt = sysfs__mount(); 115 bool found = false; 116 int j; 117 118 if (!mnt) 119 return -ENOENT; 120 121 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 122 struct perf_mem_event *e = perf_mem_events__ptr(j); 123 struct perf_pmu *pmu; 124 char sysfs_name[100]; 125 126 /* 127 * If the event entry isn't valid, skip initialization 128 * and "e->supported" will keep false. 129 */ 130 if (!e->tag) 131 continue; 132 133 if (!perf_pmu__has_hybrid()) { 134 scnprintf(sysfs_name, sizeof(sysfs_name), 135 e->sysfs_name, "cpu"); 136 e->supported = perf_mem_event__supported(mnt, sysfs_name); 137 } else { 138 perf_pmu__for_each_hybrid_pmu(pmu) { 139 scnprintf(sysfs_name, sizeof(sysfs_name), 140 e->sysfs_name, pmu->name); 141 e->supported |= perf_mem_event__supported(mnt, sysfs_name); 142 } 143 } 144 145 if (e->supported) 146 found = true; 147 } 148 149 return found ? 0 : -ENOENT; 150 } 151 152 void perf_mem_events__list(void) 153 { 154 int j; 155 156 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 157 struct perf_mem_event *e = perf_mem_events__ptr(j); 158 159 fprintf(stderr, "%-13s%-*s%s\n", 160 e->tag ?: "", 161 verbose > 0 ? 25 : 0, 162 verbose > 0 ? perf_mem_events__name(j, NULL) : "", 163 e->supported ? ": available" : ""); 164 } 165 } 166 167 static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, 168 int idx) 169 { 170 const char *mnt = sysfs__mount(); 171 char sysfs_name[100]; 172 struct perf_pmu *pmu; 173 174 perf_pmu__for_each_hybrid_pmu(pmu) { 175 scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, 176 pmu->name); 177 if (!perf_mem_event__supported(mnt, sysfs_name)) { 178 pr_err("failed: event '%s' not supported\n", 179 perf_mem_events__name(idx, pmu->name)); 180 } 181 } 182 } 183 184 int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, 185 char **rec_tmp, int *tmp_nr) 186 { 187 int i = *argv_nr, k = 0; 188 struct perf_mem_event *e; 189 struct perf_pmu *pmu; 190 char *s; 191 192 for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 193 e = perf_mem_events__ptr(j); 194 if (!e->record) 195 continue; 196 197 if (!perf_pmu__has_hybrid()) { 198 if (!e->supported) { 199 pr_err("failed: event '%s' not supported\n", 200 perf_mem_events__name(j, NULL)); 201 return -1; 202 } 203 204 rec_argv[i++] = "-e"; 205 rec_argv[i++] = perf_mem_events__name(j, NULL); 206 } else { 207 if (!e->supported) { 208 perf_mem_events__print_unsupport_hybrid(e, j); 209 return -1; 210 } 211 212 perf_pmu__for_each_hybrid_pmu(pmu) { 213 rec_argv[i++] = "-e"; 214 s = perf_mem_events__name(j, pmu->name); 215 if (s) { 216 s = strdup(s); 217 if (!s) 218 return -1; 219 220 rec_argv[i++] = s; 221 rec_tmp[k++] = s; 222 } 223 } 224 } 225 } 226 227 *argv_nr = i; 228 *tmp_nr = k; 229 return 0; 230 } 231 232 static const char * const tlb_access[] = { 233 "N/A", 234 "HIT", 235 "MISS", 236 "L1", 237 "L2", 238 "Walker", 239 "Fault", 240 }; 241 242 int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 243 { 244 size_t l = 0, i; 245 u64 m = PERF_MEM_TLB_NA; 246 u64 hit, miss; 247 248 sz -= 1; /* -1 for null termination */ 249 out[0] = '\0'; 250 251 if (mem_info) 252 m = mem_info->data_src.mem_dtlb; 253 254 hit = m & PERF_MEM_TLB_HIT; 255 miss = m & PERF_MEM_TLB_MISS; 256 257 /* already taken care of */ 258 m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS); 259 260 for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) { 261 if (!(m & 0x1)) 262 continue; 263 if (l) { 264 strcat(out, " or "); 265 l += 4; 266 } 267 l += scnprintf(out + l, sz - l, tlb_access[i]); 268 } 269 if (*out == '\0') 270 l += scnprintf(out, sz - l, "N/A"); 271 if (hit) 272 l += scnprintf(out + l, sz - l, " hit"); 273 if (miss) 274 l += scnprintf(out + l, sz - l, " miss"); 275 276 return l; 277 } 278 279 static const char * const mem_lvl[] = { 280 "N/A", 281 "HIT", 282 "MISS", 283 "L1", 284 "LFB", 285 "L2", 286 "L3", 287 "Local RAM", 288 "Remote RAM (1 hop)", 289 "Remote RAM (2 hops)", 290 "Remote Cache (1 hop)", 291 "Remote Cache (2 hops)", 292 "I/O", 293 "Uncached", 294 }; 295 296 static const char * const mem_lvlnum[] = { 297 [PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache", 298 [PERF_MEM_LVLNUM_LFB] = "LFB", 299 [PERF_MEM_LVLNUM_RAM] = "RAM", 300 [PERF_MEM_LVLNUM_PMEM] = "PMEM", 301 [PERF_MEM_LVLNUM_NA] = "N/A", 302 }; 303 304 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 305 { 306 size_t i, l = 0; 307 u64 m = PERF_MEM_LVL_NA; 308 u64 hit, miss; 309 int printed; 310 311 if (mem_info) 312 m = mem_info->data_src.mem_lvl; 313 314 sz -= 1; /* -1 for null termination */ 315 out[0] = '\0'; 316 317 hit = m & PERF_MEM_LVL_HIT; 318 miss = m & PERF_MEM_LVL_MISS; 319 320 /* already taken care of */ 321 m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); 322 323 324 if (mem_info && mem_info->data_src.mem_remote) { 325 strcat(out, "Remote "); 326 l += 7; 327 } 328 329 printed = 0; 330 for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { 331 if (!(m & 0x1)) 332 continue; 333 if (printed++) { 334 strcat(out, " or "); 335 l += 4; 336 } 337 l += scnprintf(out + l, sz - l, mem_lvl[i]); 338 } 339 340 if (mem_info && mem_info->data_src.mem_lvl_num) { 341 int lvl = mem_info->data_src.mem_lvl_num; 342 if (printed++) { 343 strcat(out, " or "); 344 l += 4; 345 } 346 if (mem_lvlnum[lvl]) 347 l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]); 348 else 349 l += scnprintf(out + l, sz - l, "L%d", lvl); 350 } 351 352 if (l == 0) 353 l += scnprintf(out + l, sz - l, "N/A"); 354 if (hit) 355 l += scnprintf(out + l, sz - l, " hit"); 356 if (miss) 357 l += scnprintf(out + l, sz - l, " miss"); 358 359 return l; 360 } 361 362 static const char * const snoop_access[] = { 363 "N/A", 364 "None", 365 "Hit", 366 "Miss", 367 "HitM", 368 }; 369 370 int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 371 { 372 size_t i, l = 0; 373 u64 m = PERF_MEM_SNOOP_NA; 374 375 sz -= 1; /* -1 for null termination */ 376 out[0] = '\0'; 377 378 if (mem_info) 379 m = mem_info->data_src.mem_snoop; 380 381 for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) { 382 if (!(m & 0x1)) 383 continue; 384 if (l) { 385 strcat(out, " or "); 386 l += 4; 387 } 388 l += scnprintf(out + l, sz - l, snoop_access[i]); 389 } 390 if (mem_info && 391 (mem_info->data_src.mem_snoopx & PERF_MEM_SNOOPX_FWD)) { 392 if (l) { 393 strcat(out, " or "); 394 l += 4; 395 } 396 l += scnprintf(out + l, sz - l, "Fwd"); 397 } 398 399 if (*out == '\0') 400 l += scnprintf(out, sz - l, "N/A"); 401 402 return l; 403 } 404 405 int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 406 { 407 u64 mask = PERF_MEM_LOCK_NA; 408 int l; 409 410 if (mem_info) 411 mask = mem_info->data_src.mem_lock; 412 413 if (mask & PERF_MEM_LOCK_NA) 414 l = scnprintf(out, sz, "N/A"); 415 else if (mask & PERF_MEM_LOCK_LOCKED) 416 l = scnprintf(out, sz, "Yes"); 417 else 418 l = scnprintf(out, sz, "No"); 419 420 return l; 421 } 422 423 int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 424 { 425 size_t l = 0; 426 u64 mask = PERF_MEM_BLK_NA; 427 428 sz -= 1; /* -1 for null termination */ 429 out[0] = '\0'; 430 431 if (mem_info) 432 mask = mem_info->data_src.mem_blk; 433 434 if (!mask || (mask & PERF_MEM_BLK_NA)) { 435 l += scnprintf(out + l, sz - l, " N/A"); 436 return l; 437 } 438 if (mask & PERF_MEM_BLK_DATA) 439 l += scnprintf(out + l, sz - l, " Data"); 440 if (mask & PERF_MEM_BLK_ADDR) 441 l += scnprintf(out + l, sz - l, " Addr"); 442 443 return l; 444 } 445 446 int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 447 { 448 int i = 0; 449 450 i += perf_mem__lvl_scnprintf(out, sz, mem_info); 451 i += scnprintf(out + i, sz - i, "|SNP "); 452 i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info); 453 i += scnprintf(out + i, sz - i, "|TLB "); 454 i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info); 455 i += scnprintf(out + i, sz - i, "|LCK "); 456 i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info); 457 i += scnprintf(out + i, sz - i, "|BLK "); 458 i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info); 459 460 return i; 461 } 462 463 int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) 464 { 465 union perf_mem_data_src *data_src = &mi->data_src; 466 u64 daddr = mi->daddr.addr; 467 u64 op = data_src->mem_op; 468 u64 lvl = data_src->mem_lvl; 469 u64 snoop = data_src->mem_snoop; 470 u64 lock = data_src->mem_lock; 471 u64 blk = data_src->mem_blk; 472 /* 473 * Skylake might report unknown remote level via this 474 * bit, consider it when evaluating remote HITMs. 475 */ 476 bool mrem = data_src->mem_remote; 477 int err = 0; 478 479 #define HITM_INC(__f) \ 480 do { \ 481 stats->__f++; \ 482 stats->tot_hitm++; \ 483 } while (0) 484 485 #define P(a, b) PERF_MEM_##a##_##b 486 487 stats->nr_entries++; 488 489 if (lock & P(LOCK, LOCKED)) stats->locks++; 490 491 if (blk & P(BLK, DATA)) stats->blk_data++; 492 if (blk & P(BLK, ADDR)) stats->blk_addr++; 493 494 if (op & P(OP, LOAD)) { 495 /* load */ 496 stats->load++; 497 498 if (!daddr) { 499 stats->ld_noadrs++; 500 return -1; 501 } 502 503 if (lvl & P(LVL, HIT)) { 504 if (lvl & P(LVL, UNC)) stats->ld_uncache++; 505 if (lvl & P(LVL, IO)) stats->ld_io++; 506 if (lvl & P(LVL, LFB)) stats->ld_fbhit++; 507 if (lvl & P(LVL, L1 )) stats->ld_l1hit++; 508 if (lvl & P(LVL, L2 )) stats->ld_l2hit++; 509 if (lvl & P(LVL, L3 )) { 510 if (snoop & P(SNOOP, HITM)) 511 HITM_INC(lcl_hitm); 512 else 513 stats->ld_llchit++; 514 } 515 516 if (lvl & P(LVL, LOC_RAM)) { 517 stats->lcl_dram++; 518 if (snoop & P(SNOOP, HIT)) 519 stats->ld_shared++; 520 else 521 stats->ld_excl++; 522 } 523 524 if ((lvl & P(LVL, REM_RAM1)) || 525 (lvl & P(LVL, REM_RAM2)) || 526 mrem) { 527 stats->rmt_dram++; 528 if (snoop & P(SNOOP, HIT)) 529 stats->ld_shared++; 530 else 531 stats->ld_excl++; 532 } 533 } 534 535 if ((lvl & P(LVL, REM_CCE1)) || 536 (lvl & P(LVL, REM_CCE2)) || 537 mrem) { 538 if (snoop & P(SNOOP, HIT)) 539 stats->rmt_hit++; 540 else if (snoop & P(SNOOP, HITM)) 541 HITM_INC(rmt_hitm); 542 } 543 544 if ((lvl & P(LVL, MISS))) 545 stats->ld_miss++; 546 547 } else if (op & P(OP, STORE)) { 548 /* store */ 549 stats->store++; 550 551 if (!daddr) { 552 stats->st_noadrs++; 553 return -1; 554 } 555 556 if (lvl & P(LVL, HIT)) { 557 if (lvl & P(LVL, UNC)) stats->st_uncache++; 558 if (lvl & P(LVL, L1 )) stats->st_l1hit++; 559 } 560 if (lvl & P(LVL, MISS)) 561 if (lvl & P(LVL, L1)) stats->st_l1miss++; 562 } else { 563 /* unparsable data_src? */ 564 stats->noparse++; 565 return -1; 566 } 567 568 if (!mi->daddr.ms.map || !mi->iaddr.ms.map) { 569 stats->nomap++; 570 return -1; 571 } 572 573 #undef P 574 #undef HITM_INC 575 return err; 576 } 577 578 void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) 579 { 580 stats->nr_entries += add->nr_entries; 581 582 stats->locks += add->locks; 583 stats->store += add->store; 584 stats->st_uncache += add->st_uncache; 585 stats->st_noadrs += add->st_noadrs; 586 stats->st_l1hit += add->st_l1hit; 587 stats->st_l1miss += add->st_l1miss; 588 stats->load += add->load; 589 stats->ld_excl += add->ld_excl; 590 stats->ld_shared += add->ld_shared; 591 stats->ld_uncache += add->ld_uncache; 592 stats->ld_io += add->ld_io; 593 stats->ld_miss += add->ld_miss; 594 stats->ld_noadrs += add->ld_noadrs; 595 stats->ld_fbhit += add->ld_fbhit; 596 stats->ld_l1hit += add->ld_l1hit; 597 stats->ld_l2hit += add->ld_l2hit; 598 stats->ld_llchit += add->ld_llchit; 599 stats->lcl_hitm += add->lcl_hitm; 600 stats->rmt_hitm += add->rmt_hitm; 601 stats->tot_hitm += add->tot_hitm; 602 stats->rmt_hit += add->rmt_hit; 603 stats->lcl_dram += add->lcl_dram; 604 stats->rmt_dram += add->rmt_dram; 605 stats->blk_data += add->blk_data; 606 stats->blk_addr += add->blk_addr; 607 stats->nomap += add->nomap; 608 stats->noparse += add->noparse; 609 } 610