1 /* 2 * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com> 3 * 4 * License: GNU GPL, version 2 or later. 5 * See the COPYING file in the top-level directory. 6 */ 7 8 #include <inttypes.h> 9 #include <stdio.h> 10 #include <glib.h> 11 12 #include <qemu-plugin.h> 13 14 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; 15 16 static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW; 17 18 static GHashTable *miss_ht; 19 20 static GMutex hashtable_lock; 21 static GRand *rng; 22 23 static int limit; 24 static bool sys; 25 26 enum EvictionPolicy { 27 LRU, 28 FIFO, 29 RAND, 30 }; 31 32 enum EvictionPolicy policy; 33 34 /* 35 * A CacheSet is a set of cache blocks. A memory block that maps to a set can be 36 * put in any of the blocks inside the set. The number of block per set is 37 * called the associativity (assoc). 38 * 39 * Each block contains the the stored tag and a valid bit. Since this is not 40 * a functional simulator, the data itself is not stored. We only identify 41 * whether a block is in the cache or not by searching for its tag. 42 * 43 * In order to search for memory data in the cache, the set identifier and tag 44 * are extracted from the address and the set is probed to see whether a tag 45 * match occur. 46 * 47 * An address is logically divided into three portions: The block offset, 48 * the set number, and the tag. 49 * 50 * The set number is used to identify the set in which the block may exist. 51 * The tag is compared against all the tags of a set to search for a match. If a 52 * match is found, then the access is a hit. 53 * 54 * The CacheSet also contains bookkeaping information about eviction details. 55 */ 56 57 typedef struct { 58 uint64_t tag; 59 bool valid; 60 } CacheBlock; 61 62 typedef struct { 63 CacheBlock *blocks; 64 uint64_t *lru_priorities; 65 uint64_t lru_gen_counter; 66 GQueue *fifo_queue; 67 } CacheSet; 68 69 typedef struct { 70 CacheSet *sets; 71 int num_sets; 72 int cachesize; 73 int assoc; 74 int blksize_shift; 75 uint64_t set_mask; 76 uint64_t tag_mask; 77 uint64_t accesses; 78 uint64_t misses; 79 } Cache; 80 81 typedef struct { 82 char *disas_str; 83 const char *symbol; 84 uint64_t addr; 85 uint64_t dmisses; 86 uint64_t imisses; 87 } InsnData; 88 89 void (*update_hit)(Cache *cache, int set, int blk); 90 void (*update_miss)(Cache *cache, int set, int blk); 91 92 void (*metadata_init)(Cache *cache); 93 void (*metadata_destroy)(Cache *cache); 94 95 static int cores; 96 static Cache **dcaches, **icaches; 97 98 static GMutex *dcache_locks; 99 static GMutex *icache_locks; 100 101 static uint64_t all_dmem_accesses; 102 static uint64_t all_imem_accesses; 103 static uint64_t all_imisses; 104 static uint64_t all_dmisses; 105 106 static int pow_of_two(int num) 107 { 108 g_assert((num & (num - 1)) == 0); 109 int ret = 0; 110 while (num /= 2) { 111 ret++; 112 } 113 return ret; 114 } 115 116 /* 117 * LRU evection policy: For each set, a generation counter is maintained 118 * alongside a priority array. 119 * 120 * On each set access, the generation counter is incremented. 121 * 122 * On a cache hit: The hit-block is assigned the current generation counter, 123 * indicating that it is the most recently used block. 124 * 125 * On a cache miss: The block with the least priority is searched and replaced 126 * with the newly-cached block, of which the priority is set to the current 127 * generation number. 128 */ 129 130 static void lru_priorities_init(Cache *cache) 131 { 132 int i; 133 134 for (i = 0; i < cache->num_sets; i++) { 135 cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc); 136 cache->sets[i].lru_gen_counter = 0; 137 } 138 } 139 140 static void lru_update_blk(Cache *cache, int set_idx, int blk_idx) 141 { 142 CacheSet *set = &cache->sets[set_idx]; 143 set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter; 144 set->lru_gen_counter++; 145 } 146 147 static int lru_get_lru_block(Cache *cache, int set_idx) 148 { 149 int i, min_idx, min_priority; 150 151 min_priority = cache->sets[set_idx].lru_priorities[0]; 152 min_idx = 0; 153 154 for (i = 1; i < cache->assoc; i++) { 155 if (cache->sets[set_idx].lru_priorities[i] < min_priority) { 156 min_priority = cache->sets[set_idx].lru_priorities[i]; 157 min_idx = i; 158 } 159 } 160 return min_idx; 161 } 162 163 static void lru_priorities_destroy(Cache *cache) 164 { 165 int i; 166 167 for (i = 0; i < cache->num_sets; i++) { 168 g_free(cache->sets[i].lru_priorities); 169 } 170 } 171 172 /* 173 * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that 174 * stores accesses to the cache. 175 * 176 * On a compulsory miss: The block index is enqueued to the fifo_queue to 177 * indicate that it's the latest cached block. 178 * 179 * On a conflict miss: The first-in block is removed from the cache and the new 180 * block is put in its place and enqueued to the FIFO queue. 181 */ 182 183 static void fifo_init(Cache *cache) 184 { 185 int i; 186 187 for (i = 0; i < cache->num_sets; i++) { 188 cache->sets[i].fifo_queue = g_queue_new(); 189 } 190 } 191 192 static int fifo_get_first_block(Cache *cache, int set) 193 { 194 GQueue *q = cache->sets[set].fifo_queue; 195 return GPOINTER_TO_INT(g_queue_pop_tail(q)); 196 } 197 198 static void fifo_update_on_miss(Cache *cache, int set, int blk_idx) 199 { 200 GQueue *q = cache->sets[set].fifo_queue; 201 g_queue_push_head(q, GINT_TO_POINTER(blk_idx)); 202 } 203 204 static void fifo_destroy(Cache *cache) 205 { 206 int i; 207 208 for (i = 0; i < cache->num_sets; i++) { 209 g_queue_free(cache->sets[i].fifo_queue); 210 } 211 } 212 213 static inline uint64_t extract_tag(Cache *cache, uint64_t addr) 214 { 215 return addr & cache->tag_mask; 216 } 217 218 static inline uint64_t extract_set(Cache *cache, uint64_t addr) 219 { 220 return (addr & cache->set_mask) >> cache->blksize_shift; 221 } 222 223 static const char *cache_config_error(int blksize, int assoc, int cachesize) 224 { 225 if (cachesize % blksize != 0) { 226 return "cache size must be divisible by block size"; 227 } else if (cachesize % (blksize * assoc) != 0) { 228 return "cache size must be divisible by set size (assoc * block size)"; 229 } else { 230 return NULL; 231 } 232 } 233 234 static bool bad_cache_params(int blksize, int assoc, int cachesize) 235 { 236 return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0); 237 } 238 239 static Cache *cache_init(int blksize, int assoc, int cachesize) 240 { 241 Cache *cache; 242 int i; 243 uint64_t blk_mask; 244 245 /* 246 * This function shall not be called directly, and hence expects suitable 247 * parameters. 248 */ 249 g_assert(!bad_cache_params(blksize, assoc, cachesize)); 250 251 cache = g_new(Cache, 1); 252 cache->assoc = assoc; 253 cache->cachesize = cachesize; 254 cache->num_sets = cachesize / (blksize * assoc); 255 cache->sets = g_new(CacheSet, cache->num_sets); 256 cache->blksize_shift = pow_of_two(blksize); 257 cache->accesses = 0; 258 cache->misses = 0; 259 260 for (i = 0; i < cache->num_sets; i++) { 261 cache->sets[i].blocks = g_new0(CacheBlock, assoc); 262 } 263 264 blk_mask = blksize - 1; 265 cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift); 266 cache->tag_mask = ~(cache->set_mask | blk_mask); 267 268 if (metadata_init) { 269 metadata_init(cache); 270 } 271 272 return cache; 273 } 274 275 static Cache **caches_init(int blksize, int assoc, int cachesize) 276 { 277 Cache **caches; 278 int i; 279 280 if (bad_cache_params(blksize, assoc, cachesize)) { 281 return NULL; 282 } 283 284 caches = g_new(Cache *, cores); 285 286 for (i = 0; i < cores; i++) { 287 caches[i] = cache_init(blksize, assoc, cachesize); 288 } 289 290 return caches; 291 } 292 293 static int get_invalid_block(Cache *cache, uint64_t set) 294 { 295 int i; 296 297 for (i = 0; i < cache->assoc; i++) { 298 if (!cache->sets[set].blocks[i].valid) { 299 return i; 300 } 301 } 302 303 return -1; 304 } 305 306 static int get_replaced_block(Cache *cache, int set) 307 { 308 switch (policy) { 309 case RAND: 310 return g_rand_int_range(rng, 0, cache->assoc); 311 case LRU: 312 return lru_get_lru_block(cache, set); 313 case FIFO: 314 return fifo_get_first_block(cache, set); 315 default: 316 g_assert_not_reached(); 317 } 318 } 319 320 static int in_cache(Cache *cache, uint64_t addr) 321 { 322 int i; 323 uint64_t tag, set; 324 325 tag = extract_tag(cache, addr); 326 set = extract_set(cache, addr); 327 328 for (i = 0; i < cache->assoc; i++) { 329 if (cache->sets[set].blocks[i].tag == tag && 330 cache->sets[set].blocks[i].valid) { 331 return i; 332 } 333 } 334 335 return -1; 336 } 337 338 /** 339 * access_cache(): Simulate a cache access 340 * @cache: The cache under simulation 341 * @addr: The address of the requested memory location 342 * 343 * Returns true if the requsted data is hit in the cache and false when missed. 344 * The cache is updated on miss for the next access. 345 */ 346 static bool access_cache(Cache *cache, uint64_t addr) 347 { 348 int hit_blk, replaced_blk; 349 uint64_t tag, set; 350 351 tag = extract_tag(cache, addr); 352 set = extract_set(cache, addr); 353 354 hit_blk = in_cache(cache, addr); 355 if (hit_blk != -1) { 356 if (update_hit) { 357 update_hit(cache, set, hit_blk); 358 } 359 return true; 360 } 361 362 replaced_blk = get_invalid_block(cache, set); 363 364 if (replaced_blk == -1) { 365 replaced_blk = get_replaced_block(cache, set); 366 } 367 368 if (update_miss) { 369 update_miss(cache, set, replaced_blk); 370 } 371 372 cache->sets[set].blocks[replaced_blk].tag = tag; 373 cache->sets[set].blocks[replaced_blk].valid = true; 374 375 return false; 376 } 377 378 static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info, 379 uint64_t vaddr, void *userdata) 380 { 381 uint64_t effective_addr; 382 struct qemu_plugin_hwaddr *hwaddr; 383 int cache_idx; 384 InsnData *insn; 385 386 hwaddr = qemu_plugin_get_hwaddr(info, vaddr); 387 if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) { 388 return; 389 } 390 391 effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr; 392 cache_idx = vcpu_index % cores; 393 394 g_mutex_lock(&dcache_locks[cache_idx]); 395 if (!access_cache(dcaches[cache_idx], effective_addr)) { 396 insn = (InsnData *) userdata; 397 __atomic_fetch_add(&insn->dmisses, 1, __ATOMIC_SEQ_CST); 398 dcaches[cache_idx]->misses++; 399 } 400 dcaches[cache_idx]->accesses++; 401 g_mutex_unlock(&dcache_locks[cache_idx]); 402 } 403 404 static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata) 405 { 406 uint64_t insn_addr; 407 InsnData *insn; 408 int cache_idx; 409 410 insn_addr = ((InsnData *) userdata)->addr; 411 412 cache_idx = vcpu_index % cores; 413 g_mutex_lock(&icache_locks[cache_idx]); 414 if (!access_cache(icaches[cache_idx], insn_addr)) { 415 insn = (InsnData *) userdata; 416 __atomic_fetch_add(&insn->imisses, 1, __ATOMIC_SEQ_CST); 417 icaches[cache_idx]->misses++; 418 } 419 icaches[cache_idx]->accesses++; 420 g_mutex_unlock(&icache_locks[cache_idx]); 421 } 422 423 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) 424 { 425 size_t n_insns; 426 size_t i; 427 InsnData *data; 428 429 n_insns = qemu_plugin_tb_n_insns(tb); 430 for (i = 0; i < n_insns; i++) { 431 struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); 432 uint64_t effective_addr; 433 434 if (sys) { 435 effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn); 436 } else { 437 effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn); 438 } 439 440 /* 441 * Instructions might get translated multiple times, we do not create 442 * new entries for those instructions. Instead, we fetch the same 443 * entry from the hash table and register it for the callback again. 444 */ 445 g_mutex_lock(&hashtable_lock); 446 data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr)); 447 if (data == NULL) { 448 data = g_new0(InsnData, 1); 449 data->disas_str = qemu_plugin_insn_disas(insn); 450 data->symbol = qemu_plugin_insn_symbol(insn); 451 data->addr = effective_addr; 452 g_hash_table_insert(miss_ht, GUINT_TO_POINTER(effective_addr), 453 (gpointer) data); 454 } 455 g_mutex_unlock(&hashtable_lock); 456 457 qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access, 458 QEMU_PLUGIN_CB_NO_REGS, 459 rw, data); 460 461 qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec, 462 QEMU_PLUGIN_CB_NO_REGS, data); 463 } 464 } 465 466 static void insn_free(gpointer data) 467 { 468 InsnData *insn = (InsnData *) data; 469 g_free(insn->disas_str); 470 g_free(insn); 471 } 472 473 static void cache_free(Cache *cache) 474 { 475 for (int i = 0; i < cache->num_sets; i++) { 476 g_free(cache->sets[i].blocks); 477 } 478 479 if (metadata_destroy) { 480 metadata_destroy(cache); 481 } 482 483 g_free(cache->sets); 484 g_free(cache); 485 } 486 487 static void caches_free(Cache **caches) 488 { 489 int i; 490 491 for (i = 0; i < cores; i++) { 492 cache_free(caches[i]); 493 } 494 } 495 496 static int dcmp(gconstpointer a, gconstpointer b) 497 { 498 InsnData *insn_a = (InsnData *) a; 499 InsnData *insn_b = (InsnData *) b; 500 501 return insn_a->dmisses < insn_b->dmisses ? 1 : -1; 502 } 503 504 static void append_stats_line(GString *line, uint64_t daccess, uint64_t dmisses, 505 uint64_t iaccess, uint64_t imisses) 506 { 507 double dmiss_rate, imiss_rate; 508 509 dmiss_rate = ((double) dmisses) / (daccess) * 100.0; 510 imiss_rate = ((double) imisses) / (iaccess) * 100.0; 511 512 g_string_append_printf(line, "%-14lu %-12lu %9.4lf%% %-14lu %-12lu" 513 " %9.4lf%%\n", 514 daccess, 515 dmisses, 516 daccess ? dmiss_rate : 0.0, 517 iaccess, 518 imisses, 519 iaccess ? imiss_rate : 0.0); 520 } 521 522 static void sum_stats(void) 523 { 524 int i; 525 526 g_assert(cores > 1); 527 for (i = 0; i < cores; i++) { 528 all_imisses += icaches[i]->misses; 529 all_dmisses += dcaches[i]->misses; 530 all_imem_accesses += icaches[i]->accesses; 531 all_dmem_accesses += dcaches[i]->accesses; 532 } 533 } 534 535 static int icmp(gconstpointer a, gconstpointer b) 536 { 537 InsnData *insn_a = (InsnData *) a; 538 InsnData *insn_b = (InsnData *) b; 539 540 return insn_a->imisses < insn_b->imisses ? 1 : -1; 541 } 542 543 static void log_stats(void) 544 { 545 int i; 546 Cache *icache, *dcache; 547 548 g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses," 549 " dmiss rate, insn accesses," 550 " insn misses, imiss rate\n"); 551 552 for (i = 0; i < cores; i++) { 553 g_string_append_printf(rep, "%-8d", i); 554 dcache = dcaches[i]; 555 icache = icaches[i]; 556 append_stats_line(rep, dcache->accesses, dcache->misses, 557 icache->accesses, icache->misses); 558 } 559 560 if (cores > 1) { 561 sum_stats(); 562 g_string_append_printf(rep, "%-8s", "sum"); 563 append_stats_line(rep, all_dmem_accesses, all_dmisses, 564 all_imem_accesses, all_imisses); 565 } 566 567 g_string_append(rep, "\n"); 568 qemu_plugin_outs(rep->str); 569 } 570 571 static void log_top_insns(void) 572 { 573 int i; 574 GList *curr, *miss_insns; 575 InsnData *insn; 576 577 miss_insns = g_hash_table_get_values(miss_ht); 578 miss_insns = g_list_sort(miss_insns, dcmp); 579 g_autoptr(GString) rep = g_string_new(""); 580 g_string_append_printf(rep, "%s", "address, data misses, instruction\n"); 581 582 for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) { 583 insn = (InsnData *) curr->data; 584 g_string_append_printf(rep, "0x%" PRIx64, insn->addr); 585 if (insn->symbol) { 586 g_string_append_printf(rep, " (%s)", insn->symbol); 587 } 588 g_string_append_printf(rep, ", %ld, %s\n", insn->dmisses, 589 insn->disas_str); 590 } 591 592 miss_insns = g_list_sort(miss_insns, icmp); 593 g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n"); 594 595 for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) { 596 insn = (InsnData *) curr->data; 597 g_string_append_printf(rep, "0x%" PRIx64, insn->addr); 598 if (insn->symbol) { 599 g_string_append_printf(rep, " (%s)", insn->symbol); 600 } 601 g_string_append_printf(rep, ", %ld, %s\n", insn->imisses, 602 insn->disas_str); 603 } 604 605 qemu_plugin_outs(rep->str); 606 g_list_free(miss_insns); 607 } 608 609 static void plugin_exit(qemu_plugin_id_t id, void *p) 610 { 611 log_stats(); 612 log_top_insns(); 613 614 caches_free(dcaches); 615 caches_free(icaches); 616 617 g_hash_table_destroy(miss_ht); 618 } 619 620 static void policy_init(void) 621 { 622 switch (policy) { 623 case LRU: 624 update_hit = lru_update_blk; 625 update_miss = lru_update_blk; 626 metadata_init = lru_priorities_init; 627 metadata_destroy = lru_priorities_destroy; 628 break; 629 case FIFO: 630 update_miss = fifo_update_on_miss; 631 metadata_init = fifo_init; 632 metadata_destroy = fifo_destroy; 633 break; 634 case RAND: 635 rng = g_rand_new(); 636 break; 637 default: 638 g_assert_not_reached(); 639 } 640 } 641 642 QEMU_PLUGIN_EXPORT 643 int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, 644 int argc, char **argv) 645 { 646 int i; 647 int iassoc, iblksize, icachesize; 648 int dassoc, dblksize, dcachesize; 649 650 limit = 32; 651 sys = info->system_emulation; 652 653 dassoc = 8; 654 dblksize = 64; 655 dcachesize = dblksize * dassoc * 32; 656 657 iassoc = 8; 658 iblksize = 64; 659 icachesize = iblksize * iassoc * 32; 660 661 policy = LRU; 662 663 cores = sys ? qemu_plugin_n_vcpus() : 1; 664 665 for (i = 0; i < argc; i++) { 666 char *opt = argv[i]; 667 if (g_str_has_prefix(opt, "iblksize=")) { 668 iblksize = g_ascii_strtoll(opt + 9, NULL, 10); 669 } else if (g_str_has_prefix(opt, "iassoc=")) { 670 iassoc = g_ascii_strtoll(opt + 7, NULL, 10); 671 } else if (g_str_has_prefix(opt, "icachesize=")) { 672 icachesize = g_ascii_strtoll(opt + 11, NULL, 10); 673 } else if (g_str_has_prefix(opt, "dblksize=")) { 674 dblksize = g_ascii_strtoll(opt + 9, NULL, 10); 675 } else if (g_str_has_prefix(opt, "dassoc=")) { 676 dassoc = g_ascii_strtoll(opt + 7, NULL, 10); 677 } else if (g_str_has_prefix(opt, "dcachesize=")) { 678 dcachesize = g_ascii_strtoll(opt + 11, NULL, 10); 679 } else if (g_str_has_prefix(opt, "limit=")) { 680 limit = g_ascii_strtoll(opt + 6, NULL, 10); 681 } else if (g_str_has_prefix(opt, "cores=")) { 682 cores = g_ascii_strtoll(opt + 6, NULL, 10); 683 } else if (g_str_has_prefix(opt, "evict=")) { 684 gchar *p = opt + 6; 685 if (g_strcmp0(p, "rand") == 0) { 686 policy = RAND; 687 } else if (g_strcmp0(p, "lru") == 0) { 688 policy = LRU; 689 } else if (g_strcmp0(p, "fifo") == 0) { 690 policy = FIFO; 691 } else { 692 fprintf(stderr, "invalid eviction policy: %s\n", opt); 693 return -1; 694 } 695 } else { 696 fprintf(stderr, "option parsing failed: %s\n", opt); 697 return -1; 698 } 699 } 700 701 policy_init(); 702 703 dcaches = caches_init(dblksize, dassoc, dcachesize); 704 if (!dcaches) { 705 const char *err = cache_config_error(dblksize, dassoc, dcachesize); 706 fprintf(stderr, "dcache cannot be constructed from given parameters\n"); 707 fprintf(stderr, "%s\n", err); 708 return -1; 709 } 710 711 icaches = caches_init(iblksize, iassoc, icachesize); 712 if (!icaches) { 713 const char *err = cache_config_error(iblksize, iassoc, icachesize); 714 fprintf(stderr, "icache cannot be constructed from given parameters\n"); 715 fprintf(stderr, "%s\n", err); 716 return -1; 717 } 718 719 dcache_locks = g_new0(GMutex, cores); 720 icache_locks = g_new0(GMutex, cores); 721 722 qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); 723 qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); 724 725 miss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, insn_free); 726 727 return 0; 728 } 729