1 /* 2 * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com> 3 * 4 * License: GNU GPL, version 2 or later. 5 * See the COPYING file in the top-level directory. 6 */ 7 8 #include <inttypes.h> 9 #include <stdio.h> 10 #include <glib.h> 11 12 #include <qemu-plugin.h> 13 14 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; 15 16 static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW; 17 18 static GHashTable *miss_ht; 19 20 static GMutex mtx; 21 static GRand *rng; 22 23 static int limit; 24 static bool sys; 25 26 static uint64_t dmem_accesses; 27 static uint64_t dmisses; 28 29 static uint64_t imem_accesses; 30 static uint64_t imisses; 31 32 enum EvictionPolicy { 33 LRU, 34 FIFO, 35 RAND, 36 }; 37 38 enum EvictionPolicy policy; 39 40 /* 41 * A CacheSet is a set of cache blocks. A memory block that maps to a set can be 42 * put in any of the blocks inside the set. The number of block per set is 43 * called the associativity (assoc). 44 * 45 * Each block contains the the stored tag and a valid bit. Since this is not 46 * a functional simulator, the data itself is not stored. We only identify 47 * whether a block is in the cache or not by searching for its tag. 48 * 49 * In order to search for memory data in the cache, the set identifier and tag 50 * are extracted from the address and the set is probed to see whether a tag 51 * match occur. 52 * 53 * An address is logically divided into three portions: The block offset, 54 * the set number, and the tag. 55 * 56 * The set number is used to identify the set in which the block may exist. 57 * The tag is compared against all the tags of a set to search for a match. If a 58 * match is found, then the access is a hit. 59 * 60 * The CacheSet also contains bookkeaping information about eviction details. 61 */ 62 63 typedef struct { 64 uint64_t tag; 65 bool valid; 66 } CacheBlock; 67 68 typedef struct { 69 CacheBlock *blocks; 70 uint64_t *lru_priorities; 71 uint64_t lru_gen_counter; 72 GQueue *fifo_queue; 73 } CacheSet; 74 75 typedef struct { 76 CacheSet *sets; 77 int num_sets; 78 int cachesize; 79 int assoc; 80 int blksize_shift; 81 uint64_t set_mask; 82 uint64_t tag_mask; 83 } Cache; 84 85 typedef struct { 86 char *disas_str; 87 const char *symbol; 88 uint64_t addr; 89 uint64_t dmisses; 90 uint64_t imisses; 91 } InsnData; 92 93 void (*update_hit)(Cache *cache, int set, int blk); 94 void (*update_miss)(Cache *cache, int set, int blk); 95 96 void (*metadata_init)(Cache *cache); 97 void (*metadata_destroy)(Cache *cache); 98 99 Cache *dcache, *icache; 100 101 static int pow_of_two(int num) 102 { 103 g_assert((num & (num - 1)) == 0); 104 int ret = 0; 105 while (num /= 2) { 106 ret++; 107 } 108 return ret; 109 } 110 111 /* 112 * LRU evection policy: For each set, a generation counter is maintained 113 * alongside a priority array. 114 * 115 * On each set access, the generation counter is incremented. 116 * 117 * On a cache hit: The hit-block is assigned the current generation counter, 118 * indicating that it is the most recently used block. 119 * 120 * On a cache miss: The block with the least priority is searched and replaced 121 * with the newly-cached block, of which the priority is set to the current 122 * generation number. 123 */ 124 125 static void lru_priorities_init(Cache *cache) 126 { 127 int i; 128 129 for (i = 0; i < cache->num_sets; i++) { 130 cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc); 131 cache->sets[i].lru_gen_counter = 0; 132 } 133 } 134 135 static void lru_update_blk(Cache *cache, int set_idx, int blk_idx) 136 { 137 CacheSet *set = &cache->sets[set_idx]; 138 set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter; 139 set->lru_gen_counter++; 140 } 141 142 static int lru_get_lru_block(Cache *cache, int set_idx) 143 { 144 int i, min_idx, min_priority; 145 146 min_priority = cache->sets[set_idx].lru_priorities[0]; 147 min_idx = 0; 148 149 for (i = 1; i < cache->assoc; i++) { 150 if (cache->sets[set_idx].lru_priorities[i] < min_priority) { 151 min_priority = cache->sets[set_idx].lru_priorities[i]; 152 min_idx = i; 153 } 154 } 155 return min_idx; 156 } 157 158 static void lru_priorities_destroy(Cache *cache) 159 { 160 int i; 161 162 for (i = 0; i < cache->num_sets; i++) { 163 g_free(cache->sets[i].lru_priorities); 164 } 165 } 166 167 /* 168 * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that 169 * stores accesses to the cache. 170 * 171 * On a compulsory miss: The block index is enqueued to the fifo_queue to 172 * indicate that it's the latest cached block. 173 * 174 * On a conflict miss: The first-in block is removed from the cache and the new 175 * block is put in its place and enqueued to the FIFO queue. 176 */ 177 178 static void fifo_init(Cache *cache) 179 { 180 int i; 181 182 for (i = 0; i < cache->num_sets; i++) { 183 cache->sets[i].fifo_queue = g_queue_new(); 184 } 185 } 186 187 static int fifo_get_first_block(Cache *cache, int set) 188 { 189 GQueue *q = cache->sets[set].fifo_queue; 190 return GPOINTER_TO_INT(g_queue_pop_tail(q)); 191 } 192 193 static void fifo_update_on_miss(Cache *cache, int set, int blk_idx) 194 { 195 GQueue *q = cache->sets[set].fifo_queue; 196 g_queue_push_head(q, GINT_TO_POINTER(blk_idx)); 197 } 198 199 static void fifo_destroy(Cache *cache) 200 { 201 int i; 202 203 for (i = 0; i < cache->assoc; i++) { 204 g_queue_free(cache->sets[i].fifo_queue); 205 } 206 } 207 208 static inline uint64_t extract_tag(Cache *cache, uint64_t addr) 209 { 210 return addr & cache->tag_mask; 211 } 212 213 static inline uint64_t extract_set(Cache *cache, uint64_t addr) 214 { 215 return (addr & cache->set_mask) >> cache->blksize_shift; 216 } 217 218 static const char *cache_config_error(int blksize, int assoc, int cachesize) 219 { 220 if (cachesize % blksize != 0) { 221 return "cache size must be divisible by block size"; 222 } else if (cachesize % (blksize * assoc) != 0) { 223 return "cache size must be divisible by set size (assoc * block size)"; 224 } else { 225 return NULL; 226 } 227 } 228 229 static bool bad_cache_params(int blksize, int assoc, int cachesize) 230 { 231 return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0); 232 } 233 234 static Cache *cache_init(int blksize, int assoc, int cachesize) 235 { 236 if (bad_cache_params(blksize, assoc, cachesize)) { 237 return NULL; 238 } 239 240 Cache *cache; 241 int i; 242 uint64_t blk_mask; 243 244 cache = g_new(Cache, 1); 245 cache->assoc = assoc; 246 cache->cachesize = cachesize; 247 cache->num_sets = cachesize / (blksize * assoc); 248 cache->sets = g_new(CacheSet, cache->num_sets); 249 cache->blksize_shift = pow_of_two(blksize); 250 251 for (i = 0; i < cache->num_sets; i++) { 252 cache->sets[i].blocks = g_new0(CacheBlock, assoc); 253 } 254 255 blk_mask = blksize - 1; 256 cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift); 257 cache->tag_mask = ~(cache->set_mask | blk_mask); 258 259 if (metadata_init) { 260 metadata_init(cache); 261 } 262 263 return cache; 264 } 265 266 static int get_invalid_block(Cache *cache, uint64_t set) 267 { 268 int i; 269 270 for (i = 0; i < cache->assoc; i++) { 271 if (!cache->sets[set].blocks[i].valid) { 272 return i; 273 } 274 } 275 276 return -1; 277 } 278 279 static int get_replaced_block(Cache *cache, int set) 280 { 281 switch (policy) { 282 case RAND: 283 return g_rand_int_range(rng, 0, cache->assoc); 284 case LRU: 285 return lru_get_lru_block(cache, set); 286 case FIFO: 287 return fifo_get_first_block(cache, set); 288 default: 289 g_assert_not_reached(); 290 } 291 } 292 293 static int in_cache(Cache *cache, uint64_t addr) 294 { 295 int i; 296 uint64_t tag, set; 297 298 tag = extract_tag(cache, addr); 299 set = extract_set(cache, addr); 300 301 for (i = 0; i < cache->assoc; i++) { 302 if (cache->sets[set].blocks[i].tag == tag && 303 cache->sets[set].blocks[i].valid) { 304 return i; 305 } 306 } 307 308 return -1; 309 } 310 311 /** 312 * access_cache(): Simulate a cache access 313 * @cache: The cache under simulation 314 * @addr: The address of the requested memory location 315 * 316 * Returns true if the requsted data is hit in the cache and false when missed. 317 * The cache is updated on miss for the next access. 318 */ 319 static bool access_cache(Cache *cache, uint64_t addr) 320 { 321 int hit_blk, replaced_blk; 322 uint64_t tag, set; 323 324 tag = extract_tag(cache, addr); 325 set = extract_set(cache, addr); 326 327 hit_blk = in_cache(cache, addr); 328 if (hit_blk != -1) { 329 if (update_hit) { 330 update_hit(cache, set, hit_blk); 331 } 332 return true; 333 } 334 335 replaced_blk = get_invalid_block(cache, set); 336 337 if (replaced_blk == -1) { 338 replaced_blk = get_replaced_block(cache, set); 339 } 340 341 if (update_miss) { 342 update_miss(cache, set, replaced_blk); 343 } 344 345 cache->sets[set].blocks[replaced_blk].tag = tag; 346 cache->sets[set].blocks[replaced_blk].valid = true; 347 348 return false; 349 } 350 351 static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info, 352 uint64_t vaddr, void *userdata) 353 { 354 uint64_t effective_addr; 355 struct qemu_plugin_hwaddr *hwaddr; 356 InsnData *insn; 357 358 g_mutex_lock(&mtx); 359 hwaddr = qemu_plugin_get_hwaddr(info, vaddr); 360 if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) { 361 g_mutex_unlock(&mtx); 362 return; 363 } 364 365 effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr; 366 367 if (!access_cache(dcache, effective_addr)) { 368 insn = (InsnData *) userdata; 369 insn->dmisses++; 370 dmisses++; 371 } 372 dmem_accesses++; 373 g_mutex_unlock(&mtx); 374 } 375 376 static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata) 377 { 378 uint64_t insn_addr; 379 InsnData *insn; 380 381 g_mutex_lock(&mtx); 382 insn_addr = ((InsnData *) userdata)->addr; 383 384 if (!access_cache(icache, insn_addr)) { 385 insn = (InsnData *) userdata; 386 insn->imisses++; 387 imisses++; 388 } 389 imem_accesses++; 390 g_mutex_unlock(&mtx); 391 } 392 393 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) 394 { 395 size_t n_insns; 396 size_t i; 397 InsnData *data; 398 399 n_insns = qemu_plugin_tb_n_insns(tb); 400 for (i = 0; i < n_insns; i++) { 401 struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); 402 uint64_t effective_addr; 403 404 if (sys) { 405 effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn); 406 } else { 407 effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn); 408 } 409 410 /* 411 * Instructions might get translated multiple times, we do not create 412 * new entries for those instructions. Instead, we fetch the same 413 * entry from the hash table and register it for the callback again. 414 */ 415 g_mutex_lock(&mtx); 416 data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr)); 417 if (data == NULL) { 418 data = g_new0(InsnData, 1); 419 data->disas_str = qemu_plugin_insn_disas(insn); 420 data->symbol = qemu_plugin_insn_symbol(insn); 421 data->addr = effective_addr; 422 g_hash_table_insert(miss_ht, GUINT_TO_POINTER(effective_addr), 423 (gpointer) data); 424 } 425 g_mutex_unlock(&mtx); 426 427 qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access, 428 QEMU_PLUGIN_CB_NO_REGS, 429 rw, data); 430 431 qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec, 432 QEMU_PLUGIN_CB_NO_REGS, data); 433 } 434 } 435 436 static void insn_free(gpointer data) 437 { 438 InsnData *insn = (InsnData *) data; 439 g_free(insn->disas_str); 440 g_free(insn); 441 } 442 443 static void cache_free(Cache *cache) 444 { 445 for (int i = 0; i < cache->num_sets; i++) { 446 g_free(cache->sets[i].blocks); 447 } 448 449 if (metadata_destroy) { 450 metadata_destroy(cache); 451 } 452 453 g_free(cache->sets); 454 g_free(cache); 455 } 456 457 static int dcmp(gconstpointer a, gconstpointer b) 458 { 459 InsnData *insn_a = (InsnData *) a; 460 InsnData *insn_b = (InsnData *) b; 461 462 return insn_a->dmisses < insn_b->dmisses ? 1 : -1; 463 } 464 465 static int icmp(gconstpointer a, gconstpointer b) 466 { 467 InsnData *insn_a = (InsnData *) a; 468 InsnData *insn_b = (InsnData *) b; 469 470 return insn_a->imisses < insn_b->imisses ? 1 : -1; 471 } 472 473 static void log_stats() 474 { 475 g_autoptr(GString) rep = g_string_new(""); 476 g_string_append_printf(rep, 477 "Data accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n", 478 dmem_accesses, 479 dmisses, 480 ((double) dmisses / (double) dmem_accesses) * 100.0); 481 482 g_string_append_printf(rep, 483 "Instruction accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n", 484 imem_accesses, 485 imisses, 486 ((double) imisses / (double) imem_accesses) * 100.0); 487 488 qemu_plugin_outs(rep->str); 489 } 490 491 static void log_top_insns() 492 { 493 int i; 494 GList *curr, *miss_insns; 495 InsnData *insn; 496 497 miss_insns = g_hash_table_get_values(miss_ht); 498 miss_insns = g_list_sort(miss_insns, dcmp); 499 g_autoptr(GString) rep = g_string_new(""); 500 g_string_append_printf(rep, "%s", "address, data misses, instruction\n"); 501 502 for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) { 503 insn = (InsnData *) curr->data; 504 g_string_append_printf(rep, "0x%" PRIx64, insn->addr); 505 if (insn->symbol) { 506 g_string_append_printf(rep, " (%s)", insn->symbol); 507 } 508 g_string_append_printf(rep, ", %ld, %s\n", insn->dmisses, 509 insn->disas_str); 510 } 511 512 miss_insns = g_list_sort(miss_insns, icmp); 513 g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n"); 514 515 for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) { 516 insn = (InsnData *) curr->data; 517 g_string_append_printf(rep, "0x%" PRIx64, insn->addr); 518 if (insn->symbol) { 519 g_string_append_printf(rep, " (%s)", insn->symbol); 520 } 521 g_string_append_printf(rep, ", %ld, %s\n", insn->imisses, 522 insn->disas_str); 523 } 524 525 qemu_plugin_outs(rep->str); 526 g_list_free(miss_insns); 527 } 528 529 static void plugin_exit(qemu_plugin_id_t id, void *p) 530 { 531 log_stats(); 532 log_top_insns(); 533 534 cache_free(dcache); 535 cache_free(icache); 536 537 g_hash_table_destroy(miss_ht); 538 } 539 540 static void policy_init() 541 { 542 switch (policy) { 543 case LRU: 544 update_hit = lru_update_blk; 545 update_miss = lru_update_blk; 546 metadata_init = lru_priorities_init; 547 metadata_destroy = lru_priorities_destroy; 548 break; 549 case FIFO: 550 update_miss = fifo_update_on_miss; 551 metadata_init = fifo_init; 552 metadata_destroy = fifo_destroy; 553 break; 554 case RAND: 555 rng = g_rand_new(); 556 break; 557 default: 558 g_assert_not_reached(); 559 } 560 } 561 562 QEMU_PLUGIN_EXPORT 563 int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, 564 int argc, char **argv) 565 { 566 int i; 567 int iassoc, iblksize, icachesize; 568 int dassoc, dblksize, dcachesize; 569 570 limit = 32; 571 sys = info->system_emulation; 572 573 dassoc = 8; 574 dblksize = 64; 575 dcachesize = dblksize * dassoc * 32; 576 577 iassoc = 8; 578 iblksize = 64; 579 icachesize = iblksize * iassoc * 32; 580 581 policy = LRU; 582 583 for (i = 0; i < argc; i++) { 584 char *opt = argv[i]; 585 if (g_str_has_prefix(opt, "iblksize=")) { 586 iblksize = g_ascii_strtoll(opt + 9, NULL, 10); 587 } else if (g_str_has_prefix(opt, "iassoc=")) { 588 iassoc = g_ascii_strtoll(opt + 7, NULL, 10); 589 } else if (g_str_has_prefix(opt, "icachesize=")) { 590 icachesize = g_ascii_strtoll(opt + 11, NULL, 10); 591 } else if (g_str_has_prefix(opt, "dblksize=")) { 592 dblksize = g_ascii_strtoll(opt + 9, NULL, 10); 593 } else if (g_str_has_prefix(opt, "dassoc=")) { 594 dassoc = g_ascii_strtoll(opt + 7, NULL, 10); 595 } else if (g_str_has_prefix(opt, "dcachesize=")) { 596 dcachesize = g_ascii_strtoll(opt + 11, NULL, 10); 597 } else if (g_str_has_prefix(opt, "limit=")) { 598 limit = g_ascii_strtoll(opt + 6, NULL, 10); 599 } else if (g_str_has_prefix(opt, "evict=")) { 600 gchar *p = opt + 6; 601 if (g_strcmp0(p, "rand") == 0) { 602 policy = RAND; 603 } else if (g_strcmp0(p, "lru") == 0) { 604 policy = LRU; 605 } else if (g_strcmp0(p, "fifo") == 0) { 606 policy = FIFO; 607 } else { 608 fprintf(stderr, "invalid eviction policy: %s\n", opt); 609 return -1; 610 } 611 } else { 612 fprintf(stderr, "option parsing failed: %s\n", opt); 613 return -1; 614 } 615 } 616 617 policy_init(); 618 619 dcache = cache_init(dblksize, dassoc, dcachesize); 620 if (!dcache) { 621 const char *err = cache_config_error(dblksize, dassoc, dcachesize); 622 fprintf(stderr, "dcache cannot be constructed from given parameters\n"); 623 fprintf(stderr, "%s\n", err); 624 return -1; 625 } 626 627 icache = cache_init(iblksize, iassoc, icachesize); 628 if (!icache) { 629 const char *err = cache_config_error(iblksize, iassoc, icachesize); 630 fprintf(stderr, "icache cannot be constructed from given parameters\n"); 631 fprintf(stderr, "%s\n", err); 632 return -1; 633 } 634 635 qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); 636 qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); 637 638 miss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, insn_free); 639 640 return 0; 641 } 642