1 // SPDX-License-Identifier: GPL-2.0 2 #include <stddef.h> 3 #include <stdlib.h> 4 #include <string.h> 5 #include <errno.h> 6 #include <sys/types.h> 7 #include <sys/stat.h> 8 #include <unistd.h> 9 #include <api/fs/fs.h> 10 #include <linux/kernel.h> 11 #include "map_symbol.h" 12 #include "mem-events.h" 13 #include "debug.h" 14 #include "symbol.h" 15 #include "pmu.h" 16 17 unsigned int perf_mem_events__loads_ldlat = 30; 18 19 #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } 20 21 static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { 22 E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"), 23 E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), 24 E(NULL, NULL, NULL), 25 }; 26 #undef E 27 28 static char mem_loads_name[100]; 29 static bool mem_loads_name__init; 30 31 struct perf_mem_event * __weak perf_mem_events__ptr(int i) 32 { 33 if (i >= PERF_MEM_EVENTS__MAX) 34 return NULL; 35 36 return &perf_mem_events[i]; 37 } 38 39 char * __weak perf_mem_events__name(int i, char *pmu_name __maybe_unused) 40 { 41 struct perf_mem_event *e = perf_mem_events__ptr(i); 42 43 if (!e) 44 return NULL; 45 46 if (i == PERF_MEM_EVENTS__LOAD) { 47 if (!mem_loads_name__init) { 48 mem_loads_name__init = true; 49 scnprintf(mem_loads_name, sizeof(mem_loads_name), 50 e->name, perf_mem_events__loads_ldlat); 51 } 52 return mem_loads_name; 53 } 54 55 return (char *)e->name; 56 } 57 58 __weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused) 59 { 60 return false; 61 } 62 63 int perf_mem_events__parse(const char *str) 64 { 65 char *tok, *saveptr = NULL; 66 bool found = false; 67 char *buf; 68 int j; 69 70 /* We need buffer that we know we can write to. */ 71 buf = malloc(strlen(str) + 1); 72 if (!buf) 73 return -ENOMEM; 74 75 strcpy(buf, str); 76 77 tok = strtok_r((char *)buf, ",", &saveptr); 78 79 while (tok) { 80 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 81 struct perf_mem_event *e = perf_mem_events__ptr(j); 82 83 if (!e->tag) 84 continue; 85 86 if (strstr(e->tag, tok)) 87 e->record = found = true; 88 } 89 90 tok = strtok_r(NULL, ",", &saveptr); 91 } 92 93 free(buf); 94 95 if (found) 96 return 0; 97 98 pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str); 99 return -1; 100 } 101 102 static bool perf_mem_event__supported(const char *mnt, char *sysfs_name) 103 { 104 char path[PATH_MAX]; 105 struct stat st; 106 107 scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name); 108 return !stat(path, &st); 109 } 110 111 int perf_mem_events__init(void) 112 { 113 const char *mnt = sysfs__mount(); 114 bool found = false; 115 int j; 116 117 if (!mnt) 118 return -ENOENT; 119 120 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 121 struct perf_mem_event *e = perf_mem_events__ptr(j); 122 char sysfs_name[100]; 123 124 /* 125 * If the event entry isn't valid, skip initialization 126 * and "e->supported" will keep false. 127 */ 128 if (!e->tag) 129 continue; 130 131 if (!perf_pmu__has_hybrid()) { 132 scnprintf(sysfs_name, sizeof(sysfs_name), 133 e->sysfs_name, "cpu"); 134 e->supported = perf_mem_event__supported(mnt, sysfs_name); 135 } else { 136 struct perf_pmu *pmu = NULL; 137 138 while ((pmu = perf_pmu__scan(pmu)) != NULL) { 139 if (!pmu->is_core) 140 continue; 141 142 scnprintf(sysfs_name, sizeof(sysfs_name), 143 e->sysfs_name, pmu->name); 144 e->supported |= perf_mem_event__supported(mnt, sysfs_name); 145 } 146 } 147 148 if (e->supported) 149 found = true; 150 } 151 152 return found ? 0 : -ENOENT; 153 } 154 155 void perf_mem_events__list(void) 156 { 157 int j; 158 159 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 160 struct perf_mem_event *e = perf_mem_events__ptr(j); 161 162 fprintf(stderr, "%-*s%-*s%s", 163 e->tag ? 13 : 0, 164 e->tag ? : "", 165 e->tag && verbose > 0 ? 25 : 0, 166 e->tag && verbose > 0 ? perf_mem_events__name(j, NULL) : "", 167 e->supported ? ": available\n" : ""); 168 } 169 } 170 171 static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, 172 int idx) 173 { 174 const char *mnt = sysfs__mount(); 175 char sysfs_name[100]; 176 struct perf_pmu *pmu = NULL; 177 178 while ((pmu = perf_pmu__scan(pmu)) != NULL) { 179 if (!pmu->is_core) 180 continue; 181 182 scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, 183 pmu->name); 184 if (!perf_mem_event__supported(mnt, sysfs_name)) { 185 pr_err("failed: event '%s' not supported\n", 186 perf_mem_events__name(idx, pmu->name)); 187 } 188 } 189 } 190 191 int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, 192 char **rec_tmp, int *tmp_nr) 193 { 194 int i = *argv_nr, k = 0; 195 struct perf_mem_event *e; 196 struct perf_pmu *pmu; 197 char *s; 198 199 for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 200 e = perf_mem_events__ptr(j); 201 if (!e->record) 202 continue; 203 204 if (!perf_pmu__has_hybrid()) { 205 if (!e->supported) { 206 pr_err("failed: event '%s' not supported\n", 207 perf_mem_events__name(j, NULL)); 208 return -1; 209 } 210 211 rec_argv[i++] = "-e"; 212 rec_argv[i++] = perf_mem_events__name(j, NULL); 213 } else { 214 if (!e->supported) { 215 perf_mem_events__print_unsupport_hybrid(e, j); 216 return -1; 217 } 218 219 while ((pmu = perf_pmu__scan(pmu)) != NULL) { 220 if (!pmu->is_core) 221 continue; 222 rec_argv[i++] = "-e"; 223 s = perf_mem_events__name(j, pmu->name); 224 if (s) { 225 s = strdup(s); 226 if (!s) 227 return -1; 228 229 rec_argv[i++] = s; 230 rec_tmp[k++] = s; 231 } 232 } 233 } 234 } 235 236 *argv_nr = i; 237 *tmp_nr = k; 238 return 0; 239 } 240 241 static const char * const tlb_access[] = { 242 "N/A", 243 "HIT", 244 "MISS", 245 "L1", 246 "L2", 247 "Walker", 248 "Fault", 249 }; 250 251 int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 252 { 253 size_t l = 0, i; 254 u64 m = PERF_MEM_TLB_NA; 255 u64 hit, miss; 256 257 sz -= 1; /* -1 for null termination */ 258 out[0] = '\0'; 259 260 if (mem_info) 261 m = mem_info->data_src.mem_dtlb; 262 263 hit = m & PERF_MEM_TLB_HIT; 264 miss = m & PERF_MEM_TLB_MISS; 265 266 /* already taken care of */ 267 m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS); 268 269 for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) { 270 if (!(m & 0x1)) 271 continue; 272 if (l) { 273 strcat(out, " or "); 274 l += 4; 275 } 276 l += scnprintf(out + l, sz - l, tlb_access[i]); 277 } 278 if (*out == '\0') 279 l += scnprintf(out, sz - l, "N/A"); 280 if (hit) 281 l += scnprintf(out + l, sz - l, " hit"); 282 if (miss) 283 l += scnprintf(out + l, sz - l, " miss"); 284 285 return l; 286 } 287 288 static const char * const mem_lvl[] = { 289 "N/A", 290 "HIT", 291 "MISS", 292 "L1", 293 "LFB/MAB", 294 "L2", 295 "L3", 296 "Local RAM", 297 "Remote RAM (1 hop)", 298 "Remote RAM (2 hops)", 299 "Remote Cache (1 hop)", 300 "Remote Cache (2 hops)", 301 "I/O", 302 "Uncached", 303 }; 304 305 static const char * const mem_lvlnum[] = { 306 [PERF_MEM_LVLNUM_UNC] = "Uncached", 307 [PERF_MEM_LVLNUM_CXL] = "CXL", 308 [PERF_MEM_LVLNUM_IO] = "I/O", 309 [PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache", 310 [PERF_MEM_LVLNUM_LFB] = "LFB/MAB", 311 [PERF_MEM_LVLNUM_RAM] = "RAM", 312 [PERF_MEM_LVLNUM_PMEM] = "PMEM", 313 [PERF_MEM_LVLNUM_NA] = "N/A", 314 }; 315 316 static const char * const mem_hops[] = { 317 "N/A", 318 /* 319 * While printing, 'Remote' will be added to represent 320 * 'Remote core, same node' accesses as remote field need 321 * to be set with mem_hops field. 322 */ 323 "core, same node", 324 "node, same socket", 325 "socket, same board", 326 "board", 327 }; 328 329 static int perf_mem__op_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 330 { 331 u64 op = PERF_MEM_LOCK_NA; 332 int l; 333 334 if (mem_info) 335 op = mem_info->data_src.mem_op; 336 337 if (op & PERF_MEM_OP_NA) 338 l = scnprintf(out, sz, "N/A"); 339 else if (op & PERF_MEM_OP_LOAD) 340 l = scnprintf(out, sz, "LOAD"); 341 else if (op & PERF_MEM_OP_STORE) 342 l = scnprintf(out, sz, "STORE"); 343 else if (op & PERF_MEM_OP_PFETCH) 344 l = scnprintf(out, sz, "PFETCH"); 345 else if (op & PERF_MEM_OP_EXEC) 346 l = scnprintf(out, sz, "EXEC"); 347 else 348 l = scnprintf(out, sz, "No"); 349 350 return l; 351 } 352 353 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 354 { 355 union perf_mem_data_src data_src; 356 int printed = 0; 357 size_t l = 0; 358 size_t i; 359 int lvl; 360 char hit_miss[5] = {0}; 361 362 sz -= 1; /* -1 for null termination */ 363 out[0] = '\0'; 364 365 if (!mem_info) 366 goto na; 367 368 data_src = mem_info->data_src; 369 370 if (data_src.mem_lvl & PERF_MEM_LVL_HIT) 371 memcpy(hit_miss, "hit", 3); 372 else if (data_src.mem_lvl & PERF_MEM_LVL_MISS) 373 memcpy(hit_miss, "miss", 4); 374 375 lvl = data_src.mem_lvl_num; 376 if (lvl && lvl != PERF_MEM_LVLNUM_NA) { 377 if (data_src.mem_remote) { 378 strcat(out, "Remote "); 379 l += 7; 380 } 381 382 if (data_src.mem_hops) 383 l += scnprintf(out + l, sz - l, "%s ", mem_hops[data_src.mem_hops]); 384 385 if (mem_lvlnum[lvl]) 386 l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]); 387 else 388 l += scnprintf(out + l, sz - l, "L%d", lvl); 389 390 l += scnprintf(out + l, sz - l, " %s", hit_miss); 391 return l; 392 } 393 394 lvl = data_src.mem_lvl; 395 if (!lvl) 396 goto na; 397 398 lvl &= ~(PERF_MEM_LVL_NA | PERF_MEM_LVL_HIT | PERF_MEM_LVL_MISS); 399 if (!lvl) 400 goto na; 401 402 for (i = 0; lvl && i < ARRAY_SIZE(mem_lvl); i++, lvl >>= 1) { 403 if (!(lvl & 0x1)) 404 continue; 405 if (printed++) { 406 strcat(out, " or "); 407 l += 4; 408 } 409 l += scnprintf(out + l, sz - l, mem_lvl[i]); 410 } 411 412 if (printed) { 413 l += scnprintf(out + l, sz - l, " %s", hit_miss); 414 return l; 415 } 416 417 na: 418 strcat(out, "N/A"); 419 return 3; 420 } 421 422 static const char * const snoop_access[] = { 423 "N/A", 424 "None", 425 "Hit", 426 "Miss", 427 "HitM", 428 }; 429 430 static const char * const snoopx_access[] = { 431 "Fwd", 432 "Peer", 433 }; 434 435 int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 436 { 437 size_t i, l = 0; 438 u64 m = PERF_MEM_SNOOP_NA; 439 440 sz -= 1; /* -1 for null termination */ 441 out[0] = '\0'; 442 443 if (mem_info) 444 m = mem_info->data_src.mem_snoop; 445 446 for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) { 447 if (!(m & 0x1)) 448 continue; 449 if (l) { 450 strcat(out, " or "); 451 l += 4; 452 } 453 l += scnprintf(out + l, sz - l, snoop_access[i]); 454 } 455 456 m = 0; 457 if (mem_info) 458 m = mem_info->data_src.mem_snoopx; 459 460 for (i = 0; m && i < ARRAY_SIZE(snoopx_access); i++, m >>= 1) { 461 if (!(m & 0x1)) 462 continue; 463 464 if (l) { 465 strcat(out, " or "); 466 l += 4; 467 } 468 l += scnprintf(out + l, sz - l, snoopx_access[i]); 469 } 470 471 if (*out == '\0') 472 l += scnprintf(out, sz - l, "N/A"); 473 474 return l; 475 } 476 477 int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 478 { 479 u64 mask = PERF_MEM_LOCK_NA; 480 int l; 481 482 if (mem_info) 483 mask = mem_info->data_src.mem_lock; 484 485 if (mask & PERF_MEM_LOCK_NA) 486 l = scnprintf(out, sz, "N/A"); 487 else if (mask & PERF_MEM_LOCK_LOCKED) 488 l = scnprintf(out, sz, "Yes"); 489 else 490 l = scnprintf(out, sz, "No"); 491 492 return l; 493 } 494 495 int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 496 { 497 size_t l = 0; 498 u64 mask = PERF_MEM_BLK_NA; 499 500 sz -= 1; /* -1 for null termination */ 501 out[0] = '\0'; 502 503 if (mem_info) 504 mask = mem_info->data_src.mem_blk; 505 506 if (!mask || (mask & PERF_MEM_BLK_NA)) { 507 l += scnprintf(out + l, sz - l, " N/A"); 508 return l; 509 } 510 if (mask & PERF_MEM_BLK_DATA) 511 l += scnprintf(out + l, sz - l, " Data"); 512 if (mask & PERF_MEM_BLK_ADDR) 513 l += scnprintf(out + l, sz - l, " Addr"); 514 515 return l; 516 } 517 518 int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 519 { 520 int i = 0; 521 522 i += scnprintf(out, sz, "|OP "); 523 i += perf_mem__op_scnprintf(out + i, sz - i, mem_info); 524 i += scnprintf(out + i, sz - i, "|LVL "); 525 i += perf_mem__lvl_scnprintf(out + i, sz, mem_info); 526 i += scnprintf(out + i, sz - i, "|SNP "); 527 i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info); 528 i += scnprintf(out + i, sz - i, "|TLB "); 529 i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info); 530 i += scnprintf(out + i, sz - i, "|LCK "); 531 i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info); 532 i += scnprintf(out + i, sz - i, "|BLK "); 533 i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info); 534 535 return i; 536 } 537 538 int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) 539 { 540 union perf_mem_data_src *data_src = &mi->data_src; 541 u64 daddr = mi->daddr.addr; 542 u64 op = data_src->mem_op; 543 u64 lvl = data_src->mem_lvl; 544 u64 snoop = data_src->mem_snoop; 545 u64 snoopx = data_src->mem_snoopx; 546 u64 lock = data_src->mem_lock; 547 u64 blk = data_src->mem_blk; 548 /* 549 * Skylake might report unknown remote level via this 550 * bit, consider it when evaluating remote HITMs. 551 * 552 * Incase of power, remote field can also be used to denote cache 553 * accesses from the another core of same node. Hence, setting 554 * mrem only when HOPS is zero along with set remote field. 555 */ 556 bool mrem = (data_src->mem_remote && !data_src->mem_hops); 557 int err = 0; 558 559 #define HITM_INC(__f) \ 560 do { \ 561 stats->__f++; \ 562 stats->tot_hitm++; \ 563 } while (0) 564 565 #define PEER_INC(__f) \ 566 do { \ 567 stats->__f++; \ 568 stats->tot_peer++; \ 569 } while (0) 570 571 #define P(a, b) PERF_MEM_##a##_##b 572 573 stats->nr_entries++; 574 575 if (lock & P(LOCK, LOCKED)) stats->locks++; 576 577 if (blk & P(BLK, DATA)) stats->blk_data++; 578 if (blk & P(BLK, ADDR)) stats->blk_addr++; 579 580 if (op & P(OP, LOAD)) { 581 /* load */ 582 stats->load++; 583 584 if (!daddr) { 585 stats->ld_noadrs++; 586 return -1; 587 } 588 589 if (lvl & P(LVL, HIT)) { 590 if (lvl & P(LVL, UNC)) stats->ld_uncache++; 591 if (lvl & P(LVL, IO)) stats->ld_io++; 592 if (lvl & P(LVL, LFB)) stats->ld_fbhit++; 593 if (lvl & P(LVL, L1 )) stats->ld_l1hit++; 594 if (lvl & P(LVL, L2)) { 595 stats->ld_l2hit++; 596 597 if (snoopx & P(SNOOPX, PEER)) 598 PEER_INC(lcl_peer); 599 } 600 if (lvl & P(LVL, L3 )) { 601 if (snoop & P(SNOOP, HITM)) 602 HITM_INC(lcl_hitm); 603 else 604 stats->ld_llchit++; 605 606 if (snoopx & P(SNOOPX, PEER)) 607 PEER_INC(lcl_peer); 608 } 609 610 if (lvl & P(LVL, LOC_RAM)) { 611 stats->lcl_dram++; 612 if (snoop & P(SNOOP, HIT)) 613 stats->ld_shared++; 614 else 615 stats->ld_excl++; 616 } 617 618 if ((lvl & P(LVL, REM_RAM1)) || 619 (lvl & P(LVL, REM_RAM2)) || 620 mrem) { 621 stats->rmt_dram++; 622 if (snoop & P(SNOOP, HIT)) 623 stats->ld_shared++; 624 else 625 stats->ld_excl++; 626 } 627 } 628 629 if ((lvl & P(LVL, REM_CCE1)) || 630 (lvl & P(LVL, REM_CCE2)) || 631 mrem) { 632 if (snoop & P(SNOOP, HIT)) { 633 stats->rmt_hit++; 634 } else if (snoop & P(SNOOP, HITM)) { 635 HITM_INC(rmt_hitm); 636 } else if (snoopx & P(SNOOPX, PEER)) { 637 stats->rmt_hit++; 638 PEER_INC(rmt_peer); 639 } 640 } 641 642 if ((lvl & P(LVL, MISS))) 643 stats->ld_miss++; 644 645 } else if (op & P(OP, STORE)) { 646 /* store */ 647 stats->store++; 648 649 if (!daddr) { 650 stats->st_noadrs++; 651 return -1; 652 } 653 654 if (lvl & P(LVL, HIT)) { 655 if (lvl & P(LVL, UNC)) stats->st_uncache++; 656 if (lvl & P(LVL, L1 )) stats->st_l1hit++; 657 } 658 if (lvl & P(LVL, MISS)) 659 if (lvl & P(LVL, L1)) stats->st_l1miss++; 660 if (lvl & P(LVL, NA)) 661 stats->st_na++; 662 } else { 663 /* unparsable data_src? */ 664 stats->noparse++; 665 return -1; 666 } 667 668 if (!mi->daddr.ms.map || !mi->iaddr.ms.map) { 669 stats->nomap++; 670 return -1; 671 } 672 673 #undef P 674 #undef HITM_INC 675 return err; 676 } 677 678 void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) 679 { 680 stats->nr_entries += add->nr_entries; 681 682 stats->locks += add->locks; 683 stats->store += add->store; 684 stats->st_uncache += add->st_uncache; 685 stats->st_noadrs += add->st_noadrs; 686 stats->st_l1hit += add->st_l1hit; 687 stats->st_l1miss += add->st_l1miss; 688 stats->st_na += add->st_na; 689 stats->load += add->load; 690 stats->ld_excl += add->ld_excl; 691 stats->ld_shared += add->ld_shared; 692 stats->ld_uncache += add->ld_uncache; 693 stats->ld_io += add->ld_io; 694 stats->ld_miss += add->ld_miss; 695 stats->ld_noadrs += add->ld_noadrs; 696 stats->ld_fbhit += add->ld_fbhit; 697 stats->ld_l1hit += add->ld_l1hit; 698 stats->ld_l2hit += add->ld_l2hit; 699 stats->ld_llchit += add->ld_llchit; 700 stats->lcl_hitm += add->lcl_hitm; 701 stats->rmt_hitm += add->rmt_hitm; 702 stats->tot_hitm += add->tot_hitm; 703 stats->lcl_peer += add->lcl_peer; 704 stats->rmt_peer += add->rmt_peer; 705 stats->tot_peer += add->tot_peer; 706 stats->rmt_hit += add->rmt_hit; 707 stats->lcl_dram += add->lcl_dram; 708 stats->rmt_dram += add->rmt_dram; 709 stats->blk_data += add->blk_data; 710 stats->blk_addr += add->blk_addr; 711 stats->nomap += add->nomap; 712 stats->noparse += add->noparse; 713 } 714