1 // SPDX-License-Identifier: GPL-2.0 2 #include <stddef.h> 3 #include <stdlib.h> 4 #include <string.h> 5 #include <errno.h> 6 #include <sys/types.h> 7 #include <sys/stat.h> 8 #include <unistd.h> 9 #include <api/fs/fs.h> 10 #include <linux/kernel.h> 11 #include "map_symbol.h" 12 #include "mem-events.h" 13 #include "debug.h" 14 #include "symbol.h" 15 #include "pmu.h" 16 #include "pmu-hybrid.h" 17 18 unsigned int perf_mem_events__loads_ldlat = 30; 19 20 #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } 21 22 static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { 23 E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"), 24 E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), 25 E(NULL, NULL, NULL), 26 }; 27 #undef E 28 29 static char mem_loads_name[100]; 30 static bool mem_loads_name__init; 31 32 struct perf_mem_event * __weak perf_mem_events__ptr(int i) 33 { 34 if (i >= PERF_MEM_EVENTS__MAX) 35 return NULL; 36 37 return &perf_mem_events[i]; 38 } 39 40 char * __weak perf_mem_events__name(int i, char *pmu_name __maybe_unused) 41 { 42 struct perf_mem_event *e = perf_mem_events__ptr(i); 43 44 if (!e) 45 return NULL; 46 47 if (i == PERF_MEM_EVENTS__LOAD) { 48 if (!mem_loads_name__init) { 49 mem_loads_name__init = true; 50 scnprintf(mem_loads_name, sizeof(mem_loads_name), 51 e->name, perf_mem_events__loads_ldlat); 52 } 53 return mem_loads_name; 54 } 55 56 return (char *)e->name; 57 } 58 59 __weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused) 60 { 61 return false; 62 } 63 64 int perf_mem_events__parse(const char *str) 65 { 66 char *tok, *saveptr = NULL; 67 bool found = false; 68 char *buf; 69 int j; 70 71 /* We need buffer that we know we can write to. */ 72 buf = malloc(strlen(str) + 1); 73 if (!buf) 74 return -ENOMEM; 75 76 strcpy(buf, str); 77 78 tok = strtok_r((char *)buf, ",", &saveptr); 79 80 while (tok) { 81 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 82 struct perf_mem_event *e = perf_mem_events__ptr(j); 83 84 if (!e->tag) 85 continue; 86 87 if (strstr(e->tag, tok)) 88 e->record = found = true; 89 } 90 91 tok = strtok_r(NULL, ",", &saveptr); 92 } 93 94 free(buf); 95 96 if (found) 97 return 0; 98 99 pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str); 100 return -1; 101 } 102 103 static bool perf_mem_event__supported(const char *mnt, char *sysfs_name) 104 { 105 char path[PATH_MAX]; 106 struct stat st; 107 108 scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name); 109 return !stat(path, &st); 110 } 111 112 int perf_mem_events__init(void) 113 { 114 const char *mnt = sysfs__mount(); 115 bool found = false; 116 int j; 117 118 if (!mnt) 119 return -ENOENT; 120 121 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 122 struct perf_mem_event *e = perf_mem_events__ptr(j); 123 struct perf_pmu *pmu; 124 char sysfs_name[100]; 125 126 /* 127 * If the event entry isn't valid, skip initialization 128 * and "e->supported" will keep false. 129 */ 130 if (!e->tag) 131 continue; 132 133 if (!perf_pmu__has_hybrid()) { 134 scnprintf(sysfs_name, sizeof(sysfs_name), 135 e->sysfs_name, "cpu"); 136 e->supported = perf_mem_event__supported(mnt, sysfs_name); 137 } else { 138 perf_pmu__for_each_hybrid_pmu(pmu) { 139 scnprintf(sysfs_name, sizeof(sysfs_name), 140 e->sysfs_name, pmu->name); 141 e->supported |= perf_mem_event__supported(mnt, sysfs_name); 142 } 143 } 144 145 if (e->supported) 146 found = true; 147 } 148 149 return found ? 0 : -ENOENT; 150 } 151 152 void perf_mem_events__list(void) 153 { 154 int j; 155 156 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 157 struct perf_mem_event *e = perf_mem_events__ptr(j); 158 159 fprintf(stderr, "%-13s%-*s%s\n", 160 e->tag ?: "", 161 verbose > 0 ? 25 : 0, 162 verbose > 0 ? perf_mem_events__name(j, NULL) : "", 163 e->supported ? ": available" : ""); 164 } 165 } 166 167 static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, 168 int idx) 169 { 170 const char *mnt = sysfs__mount(); 171 char sysfs_name[100]; 172 struct perf_pmu *pmu; 173 174 perf_pmu__for_each_hybrid_pmu(pmu) { 175 scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, 176 pmu->name); 177 if (!perf_mem_event__supported(mnt, sysfs_name)) { 178 pr_err("failed: event '%s' not supported\n", 179 perf_mem_events__name(idx, pmu->name)); 180 } 181 } 182 } 183 184 int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, 185 char **rec_tmp, int *tmp_nr) 186 { 187 int i = *argv_nr, k = 0; 188 struct perf_mem_event *e; 189 struct perf_pmu *pmu; 190 char *s; 191 192 for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 193 e = perf_mem_events__ptr(j); 194 if (!e->record) 195 continue; 196 197 if (!perf_pmu__has_hybrid()) { 198 if (!e->supported) { 199 pr_err("failed: event '%s' not supported\n", 200 perf_mem_events__name(j, NULL)); 201 return -1; 202 } 203 204 rec_argv[i++] = "-e"; 205 rec_argv[i++] = perf_mem_events__name(j, NULL); 206 } else { 207 if (!e->supported) { 208 perf_mem_events__print_unsupport_hybrid(e, j); 209 return -1; 210 } 211 212 perf_pmu__for_each_hybrid_pmu(pmu) { 213 rec_argv[i++] = "-e"; 214 s = perf_mem_events__name(j, pmu->name); 215 if (s) { 216 s = strdup(s); 217 if (!s) 218 return -1; 219 220 rec_argv[i++] = s; 221 rec_tmp[k++] = s; 222 } 223 } 224 } 225 } 226 227 *argv_nr = i; 228 *tmp_nr = k; 229 return 0; 230 } 231 232 static const char * const tlb_access[] = { 233 "N/A", 234 "HIT", 235 "MISS", 236 "L1", 237 "L2", 238 "Walker", 239 "Fault", 240 }; 241 242 int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 243 { 244 size_t l = 0, i; 245 u64 m = PERF_MEM_TLB_NA; 246 u64 hit, miss; 247 248 sz -= 1; /* -1 for null termination */ 249 out[0] = '\0'; 250 251 if (mem_info) 252 m = mem_info->data_src.mem_dtlb; 253 254 hit = m & PERF_MEM_TLB_HIT; 255 miss = m & PERF_MEM_TLB_MISS; 256 257 /* already taken care of */ 258 m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS); 259 260 for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) { 261 if (!(m & 0x1)) 262 continue; 263 if (l) { 264 strcat(out, " or "); 265 l += 4; 266 } 267 l += scnprintf(out + l, sz - l, tlb_access[i]); 268 } 269 if (*out == '\0') 270 l += scnprintf(out, sz - l, "N/A"); 271 if (hit) 272 l += scnprintf(out + l, sz - l, " hit"); 273 if (miss) 274 l += scnprintf(out + l, sz - l, " miss"); 275 276 return l; 277 } 278 279 static const char * const mem_lvl[] = { 280 "N/A", 281 "HIT", 282 "MISS", 283 "L1", 284 "LFB", 285 "L2", 286 "L3", 287 "Local RAM", 288 "Remote RAM (1 hop)", 289 "Remote RAM (2 hops)", 290 "Remote Cache (1 hop)", 291 "Remote Cache (2 hops)", 292 "I/O", 293 "Uncached", 294 }; 295 296 static const char * const mem_lvlnum[] = { 297 [PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache", 298 [PERF_MEM_LVLNUM_LFB] = "LFB", 299 [PERF_MEM_LVLNUM_RAM] = "RAM", 300 [PERF_MEM_LVLNUM_PMEM] = "PMEM", 301 [PERF_MEM_LVLNUM_NA] = "N/A", 302 }; 303 304 static const char * const mem_hops[] = { 305 "N/A", 306 /* 307 * While printing, 'Remote' will be added to represent 308 * 'Remote core, same node' accesses as remote field need 309 * to be set with mem_hops field. 310 */ 311 "core, same node", 312 "node, same socket", 313 "socket, same board", 314 "board", 315 }; 316 317 static int perf_mem__op_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 318 { 319 u64 op = PERF_MEM_LOCK_NA; 320 int l; 321 322 if (mem_info) 323 op = mem_info->data_src.mem_op; 324 325 if (op & PERF_MEM_OP_NA) 326 l = scnprintf(out, sz, "N/A"); 327 else if (op & PERF_MEM_OP_LOAD) 328 l = scnprintf(out, sz, "LOAD"); 329 else if (op & PERF_MEM_OP_STORE) 330 l = scnprintf(out, sz, "STORE"); 331 else if (op & PERF_MEM_OP_PFETCH) 332 l = scnprintf(out, sz, "PFETCH"); 333 else if (op & PERF_MEM_OP_EXEC) 334 l = scnprintf(out, sz, "EXEC"); 335 else 336 l = scnprintf(out, sz, "No"); 337 338 return l; 339 } 340 341 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 342 { 343 size_t i, l = 0; 344 u64 m = PERF_MEM_LVL_NA; 345 u64 hit, miss; 346 int printed = 0; 347 348 if (mem_info) 349 m = mem_info->data_src.mem_lvl; 350 351 sz -= 1; /* -1 for null termination */ 352 out[0] = '\0'; 353 354 hit = m & PERF_MEM_LVL_HIT; 355 miss = m & PERF_MEM_LVL_MISS; 356 357 /* already taken care of */ 358 m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); 359 360 if (mem_info && mem_info->data_src.mem_remote) { 361 strcat(out, "Remote "); 362 l += 7; 363 } 364 365 /* 366 * Incase mem_hops field is set, we can skip printing data source via 367 * PERF_MEM_LVL namespace. 368 */ 369 if (mem_info && mem_info->data_src.mem_hops) { 370 l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]); 371 } else { 372 for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { 373 if (!(m & 0x1)) 374 continue; 375 if (printed++) { 376 strcat(out, " or "); 377 l += 4; 378 } 379 l += scnprintf(out + l, sz - l, mem_lvl[i]); 380 } 381 } 382 383 if (mem_info && mem_info->data_src.mem_lvl_num) { 384 int lvl = mem_info->data_src.mem_lvl_num; 385 if (printed++) { 386 strcat(out, " or "); 387 l += 4; 388 } 389 if (mem_lvlnum[lvl]) 390 l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]); 391 else 392 l += scnprintf(out + l, sz - l, "L%d", lvl); 393 } 394 395 if (l == 0) 396 l += scnprintf(out + l, sz - l, "N/A"); 397 if (hit) 398 l += scnprintf(out + l, sz - l, " hit"); 399 if (miss) 400 l += scnprintf(out + l, sz - l, " miss"); 401 402 return l; 403 } 404 405 static const char * const snoop_access[] = { 406 "N/A", 407 "None", 408 "Hit", 409 "Miss", 410 "HitM", 411 }; 412 413 static const char * const snoopx_access[] = { 414 "Fwd", 415 "Peer", 416 }; 417 418 int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 419 { 420 size_t i, l = 0; 421 u64 m = PERF_MEM_SNOOP_NA; 422 423 sz -= 1; /* -1 for null termination */ 424 out[0] = '\0'; 425 426 if (mem_info) 427 m = mem_info->data_src.mem_snoop; 428 429 for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) { 430 if (!(m & 0x1)) 431 continue; 432 if (l) { 433 strcat(out, " or "); 434 l += 4; 435 } 436 l += scnprintf(out + l, sz - l, snoop_access[i]); 437 } 438 439 m = 0; 440 if (mem_info) 441 m = mem_info->data_src.mem_snoopx; 442 443 for (i = 0; m && i < ARRAY_SIZE(snoopx_access); i++, m >>= 1) { 444 if (!(m & 0x1)) 445 continue; 446 447 if (l) { 448 strcat(out, " or "); 449 l += 4; 450 } 451 l += scnprintf(out + l, sz - l, snoopx_access[i]); 452 } 453 454 if (*out == '\0') 455 l += scnprintf(out, sz - l, "N/A"); 456 457 return l; 458 } 459 460 int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 461 { 462 u64 mask = PERF_MEM_LOCK_NA; 463 int l; 464 465 if (mem_info) 466 mask = mem_info->data_src.mem_lock; 467 468 if (mask & PERF_MEM_LOCK_NA) 469 l = scnprintf(out, sz, "N/A"); 470 else if (mask & PERF_MEM_LOCK_LOCKED) 471 l = scnprintf(out, sz, "Yes"); 472 else 473 l = scnprintf(out, sz, "No"); 474 475 return l; 476 } 477 478 int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 479 { 480 size_t l = 0; 481 u64 mask = PERF_MEM_BLK_NA; 482 483 sz -= 1; /* -1 for null termination */ 484 out[0] = '\0'; 485 486 if (mem_info) 487 mask = mem_info->data_src.mem_blk; 488 489 if (!mask || (mask & PERF_MEM_BLK_NA)) { 490 l += scnprintf(out + l, sz - l, " N/A"); 491 return l; 492 } 493 if (mask & PERF_MEM_BLK_DATA) 494 l += scnprintf(out + l, sz - l, " Data"); 495 if (mask & PERF_MEM_BLK_ADDR) 496 l += scnprintf(out + l, sz - l, " Addr"); 497 498 return l; 499 } 500 501 int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 502 { 503 int i = 0; 504 505 i += scnprintf(out, sz, "|OP "); 506 i += perf_mem__op_scnprintf(out + i, sz - i, mem_info); 507 i += scnprintf(out + i, sz - i, "|LVL "); 508 i += perf_mem__lvl_scnprintf(out + i, sz, mem_info); 509 i += scnprintf(out + i, sz - i, "|SNP "); 510 i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info); 511 i += scnprintf(out + i, sz - i, "|TLB "); 512 i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info); 513 i += scnprintf(out + i, sz - i, "|LCK "); 514 i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info); 515 i += scnprintf(out + i, sz - i, "|BLK "); 516 i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info); 517 518 return i; 519 } 520 521 int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) 522 { 523 union perf_mem_data_src *data_src = &mi->data_src; 524 u64 daddr = mi->daddr.addr; 525 u64 op = data_src->mem_op; 526 u64 lvl = data_src->mem_lvl; 527 u64 snoop = data_src->mem_snoop; 528 u64 snoopx = data_src->mem_snoopx; 529 u64 lock = data_src->mem_lock; 530 u64 blk = data_src->mem_blk; 531 /* 532 * Skylake might report unknown remote level via this 533 * bit, consider it when evaluating remote HITMs. 534 * 535 * Incase of power, remote field can also be used to denote cache 536 * accesses from the another core of same node. Hence, setting 537 * mrem only when HOPS is zero along with set remote field. 538 */ 539 bool mrem = (data_src->mem_remote && !data_src->mem_hops); 540 int err = 0; 541 542 #define HITM_INC(__f) \ 543 do { \ 544 stats->__f++; \ 545 stats->tot_hitm++; \ 546 } while (0) 547 548 #define PEER_INC(__f) \ 549 do { \ 550 stats->__f++; \ 551 stats->tot_peer++; \ 552 } while (0) 553 554 #define P(a, b) PERF_MEM_##a##_##b 555 556 stats->nr_entries++; 557 558 if (lock & P(LOCK, LOCKED)) stats->locks++; 559 560 if (blk & P(BLK, DATA)) stats->blk_data++; 561 if (blk & P(BLK, ADDR)) stats->blk_addr++; 562 563 if (op & P(OP, LOAD)) { 564 /* load */ 565 stats->load++; 566 567 if (!daddr) { 568 stats->ld_noadrs++; 569 return -1; 570 } 571 572 if (lvl & P(LVL, HIT)) { 573 if (lvl & P(LVL, UNC)) stats->ld_uncache++; 574 if (lvl & P(LVL, IO)) stats->ld_io++; 575 if (lvl & P(LVL, LFB)) stats->ld_fbhit++; 576 if (lvl & P(LVL, L1 )) stats->ld_l1hit++; 577 if (lvl & P(LVL, L2)) { 578 stats->ld_l2hit++; 579 580 if (snoopx & P(SNOOPX, PEER)) 581 PEER_INC(lcl_peer); 582 } 583 if (lvl & P(LVL, L3 )) { 584 if (snoop & P(SNOOP, HITM)) 585 HITM_INC(lcl_hitm); 586 else 587 stats->ld_llchit++; 588 589 if (snoopx & P(SNOOPX, PEER)) 590 PEER_INC(lcl_peer); 591 } 592 593 if (lvl & P(LVL, LOC_RAM)) { 594 stats->lcl_dram++; 595 if (snoop & P(SNOOP, HIT)) 596 stats->ld_shared++; 597 else 598 stats->ld_excl++; 599 } 600 601 if ((lvl & P(LVL, REM_RAM1)) || 602 (lvl & P(LVL, REM_RAM2)) || 603 mrem) { 604 stats->rmt_dram++; 605 if (snoop & P(SNOOP, HIT)) 606 stats->ld_shared++; 607 else 608 stats->ld_excl++; 609 } 610 } 611 612 if ((lvl & P(LVL, REM_CCE1)) || 613 (lvl & P(LVL, REM_CCE2)) || 614 mrem) { 615 if (snoop & P(SNOOP, HIT)) { 616 stats->rmt_hit++; 617 } else if (snoop & P(SNOOP, HITM)) { 618 HITM_INC(rmt_hitm); 619 } else if (snoopx & P(SNOOPX, PEER)) { 620 stats->rmt_hit++; 621 PEER_INC(rmt_peer); 622 } 623 } 624 625 if ((lvl & P(LVL, MISS))) 626 stats->ld_miss++; 627 628 } else if (op & P(OP, STORE)) { 629 /* store */ 630 stats->store++; 631 632 if (!daddr) { 633 stats->st_noadrs++; 634 return -1; 635 } 636 637 if (lvl & P(LVL, HIT)) { 638 if (lvl & P(LVL, UNC)) stats->st_uncache++; 639 if (lvl & P(LVL, L1 )) stats->st_l1hit++; 640 } 641 if (lvl & P(LVL, MISS)) 642 if (lvl & P(LVL, L1)) stats->st_l1miss++; 643 if (lvl & P(LVL, NA)) 644 stats->st_na++; 645 } else { 646 /* unparsable data_src? */ 647 stats->noparse++; 648 return -1; 649 } 650 651 if (!mi->daddr.ms.map || !mi->iaddr.ms.map) { 652 stats->nomap++; 653 return -1; 654 } 655 656 #undef P 657 #undef HITM_INC 658 return err; 659 } 660 661 void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) 662 { 663 stats->nr_entries += add->nr_entries; 664 665 stats->locks += add->locks; 666 stats->store += add->store; 667 stats->st_uncache += add->st_uncache; 668 stats->st_noadrs += add->st_noadrs; 669 stats->st_l1hit += add->st_l1hit; 670 stats->st_l1miss += add->st_l1miss; 671 stats->st_na += add->st_na; 672 stats->load += add->load; 673 stats->ld_excl += add->ld_excl; 674 stats->ld_shared += add->ld_shared; 675 stats->ld_uncache += add->ld_uncache; 676 stats->ld_io += add->ld_io; 677 stats->ld_miss += add->ld_miss; 678 stats->ld_noadrs += add->ld_noadrs; 679 stats->ld_fbhit += add->ld_fbhit; 680 stats->ld_l1hit += add->ld_l1hit; 681 stats->ld_l2hit += add->ld_l2hit; 682 stats->ld_llchit += add->ld_llchit; 683 stats->lcl_hitm += add->lcl_hitm; 684 stats->rmt_hitm += add->rmt_hitm; 685 stats->tot_hitm += add->tot_hitm; 686 stats->lcl_peer += add->lcl_peer; 687 stats->rmt_peer += add->rmt_peer; 688 stats->tot_peer += add->tot_peer; 689 stats->rmt_hit += add->rmt_hit; 690 stats->lcl_dram += add->lcl_dram; 691 stats->rmt_dram += add->rmt_dram; 692 stats->blk_data += add->blk_data; 693 stats->blk_addr += add->blk_addr; 694 stats->nomap += add->nomap; 695 stats->noparse += add->noparse; 696 } 697