1 /* 2 * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com> 3 * 4 * License: GNU GPL, version 2 or later. 5 * See the COPYING file in the top-level directory. 6 */ 7 8 #include <inttypes.h> 9 #include <stdio.h> 10 #include <glib.h> 11 12 #include <qemu-plugin.h> 13 14 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; 15 16 static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW; 17 18 static GHashTable *miss_ht; 19 20 static GMutex mtx; 21 static GRand *rng; 22 23 static int limit; 24 static bool sys; 25 26 static uint64_t dmem_accesses; 27 static uint64_t dmisses; 28 29 static uint64_t imem_accesses; 30 static uint64_t imisses; 31 32 enum EvictionPolicy { 33 LRU, 34 FIFO, 35 RAND, 36 }; 37 38 enum EvictionPolicy policy; 39 40 /* 41 * A CacheSet is a set of cache blocks. A memory block that maps to a set can be 42 * put in any of the blocks inside the set. The number of block per set is 43 * called the associativity (assoc). 44 * 45 * Each block contains the the stored tag and a valid bit. Since this is not 46 * a functional simulator, the data itself is not stored. We only identify 47 * whether a block is in the cache or not by searching for its tag. 48 * 49 * In order to search for memory data in the cache, the set identifier and tag 50 * are extracted from the address and the set is probed to see whether a tag 51 * match occur. 52 * 53 * An address is logically divided into three portions: The block offset, 54 * the set number, and the tag. 55 * 56 * The set number is used to identify the set in which the block may exist. 57 * The tag is compared against all the tags of a set to search for a match. If a 58 * match is found, then the access is a hit. 59 * 60 * The CacheSet also contains bookkeaping information about eviction details. 61 */ 62 63 typedef struct { 64 uint64_t tag; 65 bool valid; 66 } CacheBlock; 67 68 typedef struct { 69 CacheBlock *blocks; 70 uint64_t *lru_priorities; 71 uint64_t lru_gen_counter; 72 GQueue *fifo_queue; 73 } CacheSet; 74 75 typedef struct { 76 CacheSet *sets; 77 int num_sets; 78 int cachesize; 79 int assoc; 80 int blksize_shift; 81 uint64_t set_mask; 82 uint64_t tag_mask; 83 } Cache; 84 85 typedef struct { 86 char *disas_str; 87 const char *symbol; 88 uint64_t addr; 89 uint64_t dmisses; 90 uint64_t imisses; 91 } InsnData; 92 93 void (*update_hit)(Cache *cache, int set, int blk); 94 void (*update_miss)(Cache *cache, int set, int blk); 95 96 void (*metadata_init)(Cache *cache); 97 void (*metadata_destroy)(Cache *cache); 98 99 Cache *dcache, *icache; 100 101 static int pow_of_two(int num) 102 { 103 g_assert((num & (num - 1)) == 0); 104 int ret = 0; 105 while (num /= 2) { 106 ret++; 107 } 108 return ret; 109 } 110 111 /* 112 * LRU evection policy: For each set, a generation counter is maintained 113 * alongside a priority array. 114 * 115 * On each set access, the generation counter is incremented. 116 * 117 * On a cache hit: The hit-block is assigned the current generation counter, 118 * indicating that it is the most recently used block. 119 * 120 * On a cache miss: The block with the least priority is searched and replaced 121 * with the newly-cached block, of which the priority is set to the current 122 * generation number. 123 */ 124 125 static void lru_priorities_init(Cache *cache) 126 { 127 int i; 128 129 for (i = 0; i < cache->num_sets; i++) { 130 cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc); 131 cache->sets[i].lru_gen_counter = 0; 132 } 133 } 134 135 static void lru_update_blk(Cache *cache, int set_idx, int blk_idx) 136 { 137 CacheSet *set = &cache->sets[set_idx]; 138 set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter; 139 set->lru_gen_counter++; 140 } 141 142 static int lru_get_lru_block(Cache *cache, int set_idx) 143 { 144 int i, min_idx, min_priority; 145 146 min_priority = cache->sets[set_idx].lru_priorities[0]; 147 min_idx = 0; 148 149 for (i = 1; i < cache->assoc; i++) { 150 if (cache->sets[set_idx].lru_priorities[i] < min_priority) { 151 min_priority = cache->sets[set_idx].lru_priorities[i]; 152 min_idx = i; 153 } 154 } 155 return min_idx; 156 } 157 158 static void lru_priorities_destroy(Cache *cache) 159 { 160 int i; 161 162 for (i = 0; i < cache->num_sets; i++) { 163 g_free(cache->sets[i].lru_priorities); 164 } 165 } 166 167 /* 168 * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that 169 * stores accesses to the cache. 170 * 171 * On a compulsory miss: The block index is enqueued to the fifo_queue to 172 * indicate that it's the latest cached block. 173 * 174 * On a conflict miss: The first-in block is removed from the cache and the new 175 * block is put in its place and enqueued to the FIFO queue. 176 */ 177 178 static void fifo_init(Cache *cache) 179 { 180 int i; 181 182 for (i = 0; i < cache->num_sets; i++) { 183 cache->sets[i].fifo_queue = g_queue_new(); 184 } 185 } 186 187 static int fifo_get_first_block(Cache *cache, int set) 188 { 189 GQueue *q = cache->sets[set].fifo_queue; 190 return GPOINTER_TO_INT(g_queue_pop_tail(q)); 191 } 192 193 static void fifo_update_on_miss(Cache *cache, int set, int blk_idx) 194 { 195 GQueue *q = cache->sets[set].fifo_queue; 196 g_queue_push_head(q, GINT_TO_POINTER(blk_idx)); 197 } 198 199 static void fifo_destroy(Cache *cache) 200 { 201 int i; 202 203 for (i = 0; i < cache->num_sets; i++) { 204 g_queue_free(cache->sets[i].fifo_queue); 205 } 206 } 207 208 static inline uint64_t extract_tag(Cache *cache, uint64_t addr) 209 { 210 return addr & cache->tag_mask; 211 } 212 213 static inline uint64_t extract_set(Cache *cache, uint64_t addr) 214 { 215 return (addr & cache->set_mask) >> cache->blksize_shift; 216 } 217 218 static const char *cache_config_error(int blksize, int assoc, int cachesize) 219 { 220 if (cachesize % blksize != 0) { 221 return "cache size must be divisible by block size"; 222 } else if (cachesize % (blksize * assoc) != 0) { 223 return "cache size must be divisible by set size (assoc * block size)"; 224 } else { 225 return NULL; 226 } 227 } 228 229 static bool bad_cache_params(int blksize, int assoc, int cachesize) 230 { 231 return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0); 232 } 233 234 static Cache *cache_init(int blksize, int assoc, int cachesize) 235 { 236 if (bad_cache_params(blksize, assoc, cachesize)) { 237 return NULL; 238 } 239 240 Cache *cache; 241 int i; 242 uint64_t blk_mask; 243 244 cache = g_new(Cache, 1); 245 cache->assoc = assoc; 246 cache->cachesize = cachesize; 247 cache->num_sets = cachesize / (blksize * assoc); 248 cache->sets = g_new(CacheSet, cache->num_sets); 249 cache->blksize_shift = pow_of_two(blksize); 250 251 for (i = 0; i < cache->num_sets; i++) { 252 cache->sets[i].blocks = g_new0(CacheBlock, assoc); 253 } 254 255 blk_mask = blksize - 1; 256 cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift); 257 cache->tag_mask = ~(cache->set_mask | blk_mask); 258 259 if (metadata_init) { 260 metadata_init(cache); 261 } 262 263 return cache; 264 } 265 266 static int get_invalid_block(Cache *cache, uint64_t set) 267 { 268 int i; 269 270 for (i = 0; i < cache->assoc; i++) { 271 if (!cache->sets[set].blocks[i].valid) { 272 return i; 273 } 274 } 275 276 return -1; 277 } 278 279 static int get_replaced_block(Cache *cache, int set) 280 { 281 switch (policy) { 282 case RAND: 283 return g_rand_int_range(rng, 0, cache->assoc); 284 case LRU: 285 return lru_get_lru_block(cache, set); 286 case FIFO: 287 return fifo_get_first_block(cache, set); 288 default: 289 g_assert_not_reached(); 290 } 291 } 292 293 static int in_cache(Cache *cache, uint64_t addr) 294 { 295 int i; 296 uint64_t tag, set; 297 298 tag = extract_tag(cache, addr); 299 set = extract_set(cache, addr); 300 301 for (i = 0; i < cache->assoc; i++) { 302 if (cache->sets[set].blocks[i].tag == tag && 303 cache->sets[set].blocks[i].valid) { 304 return i; 305 } 306 } 307 308 return -1; 309 } 310 311 /** 312 * access_cache(): Simulate a cache access 313 * @cache: The cache under simulation 314 * @addr: The address of the requested memory location 315 * 316 * Returns true if the requsted data is hit in the cache and false when missed. 317 * The cache is updated on miss for the next access. 318 */ 319 static bool access_cache(Cache *cache, uint64_t addr) 320 { 321 int hit_blk, replaced_blk; 322 uint64_t tag, set; 323 324 tag = extract_tag(cache, addr); 325 set = extract_set(cache, addr); 326 327 hit_blk = in_cache(cache, addr); 328 if (hit_blk != -1) { 329 if (update_hit) { 330 update_hit(cache, set, hit_blk); 331 } 332 return true; 333 } 334 335 replaced_blk = get_invalid_block(cache, set); 336 337 if (replaced_blk == -1) { 338 replaced_blk = get_replaced_block(cache, set); 339 } 340 341 if (update_miss) { 342 update_miss(cache, set, replaced_blk); 343 } 344 345 cache->sets[set].blocks[replaced_blk].tag = tag; 346 cache->sets[set].blocks[replaced_blk].valid = true; 347 348 return false; 349 } 350 351 static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info, 352 uint64_t vaddr, void *userdata) 353 { 354 uint64_t effective_addr; 355 struct qemu_plugin_hwaddr *hwaddr; 356 InsnData *insn; 357 358 hwaddr = qemu_plugin_get_hwaddr(info, vaddr); 359 if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) { 360 return; 361 } 362 363 effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr; 364 365 g_mutex_lock(&mtx); 366 if (!access_cache(dcache, effective_addr)) { 367 insn = (InsnData *) userdata; 368 insn->dmisses++; 369 dmisses++; 370 } 371 dmem_accesses++; 372 g_mutex_unlock(&mtx); 373 } 374 375 static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata) 376 { 377 uint64_t insn_addr; 378 InsnData *insn; 379 380 g_mutex_lock(&mtx); 381 insn_addr = ((InsnData *) userdata)->addr; 382 383 if (!access_cache(icache, insn_addr)) { 384 insn = (InsnData *) userdata; 385 insn->imisses++; 386 imisses++; 387 } 388 imem_accesses++; 389 g_mutex_unlock(&mtx); 390 } 391 392 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) 393 { 394 size_t n_insns; 395 size_t i; 396 InsnData *data; 397 398 n_insns = qemu_plugin_tb_n_insns(tb); 399 for (i = 0; i < n_insns; i++) { 400 struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); 401 uint64_t effective_addr; 402 403 if (sys) { 404 effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn); 405 } else { 406 effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn); 407 } 408 409 /* 410 * Instructions might get translated multiple times, we do not create 411 * new entries for those instructions. Instead, we fetch the same 412 * entry from the hash table and register it for the callback again. 413 */ 414 g_mutex_lock(&mtx); 415 data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr)); 416 if (data == NULL) { 417 data = g_new0(InsnData, 1); 418 data->disas_str = qemu_plugin_insn_disas(insn); 419 data->symbol = qemu_plugin_insn_symbol(insn); 420 data->addr = effective_addr; 421 g_hash_table_insert(miss_ht, GUINT_TO_POINTER(effective_addr), 422 (gpointer) data); 423 } 424 g_mutex_unlock(&mtx); 425 426 qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access, 427 QEMU_PLUGIN_CB_NO_REGS, 428 rw, data); 429 430 qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec, 431 QEMU_PLUGIN_CB_NO_REGS, data); 432 } 433 } 434 435 static void insn_free(gpointer data) 436 { 437 InsnData *insn = (InsnData *) data; 438 g_free(insn->disas_str); 439 g_free(insn); 440 } 441 442 static void cache_free(Cache *cache) 443 { 444 for (int i = 0; i < cache->num_sets; i++) { 445 g_free(cache->sets[i].blocks); 446 } 447 448 if (metadata_destroy) { 449 metadata_destroy(cache); 450 } 451 452 g_free(cache->sets); 453 g_free(cache); 454 } 455 456 static int dcmp(gconstpointer a, gconstpointer b) 457 { 458 InsnData *insn_a = (InsnData *) a; 459 InsnData *insn_b = (InsnData *) b; 460 461 return insn_a->dmisses < insn_b->dmisses ? 1 : -1; 462 } 463 464 static int icmp(gconstpointer a, gconstpointer b) 465 { 466 InsnData *insn_a = (InsnData *) a; 467 InsnData *insn_b = (InsnData *) b; 468 469 return insn_a->imisses < insn_b->imisses ? 1 : -1; 470 } 471 472 static void log_stats(void) 473 { 474 g_autoptr(GString) rep = g_string_new(""); 475 g_string_append_printf(rep, 476 "Data accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n", 477 dmem_accesses, 478 dmisses, 479 ((double) dmisses / (double) dmem_accesses) * 100.0); 480 481 g_string_append_printf(rep, 482 "Instruction accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n", 483 imem_accesses, 484 imisses, 485 ((double) imisses / (double) imem_accesses) * 100.0); 486 487 qemu_plugin_outs(rep->str); 488 } 489 490 static void log_top_insns(void) 491 { 492 int i; 493 GList *curr, *miss_insns; 494 InsnData *insn; 495 496 miss_insns = g_hash_table_get_values(miss_ht); 497 miss_insns = g_list_sort(miss_insns, dcmp); 498 g_autoptr(GString) rep = g_string_new(""); 499 g_string_append_printf(rep, "%s", "address, data misses, instruction\n"); 500 501 for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) { 502 insn = (InsnData *) curr->data; 503 g_string_append_printf(rep, "0x%" PRIx64, insn->addr); 504 if (insn->symbol) { 505 g_string_append_printf(rep, " (%s)", insn->symbol); 506 } 507 g_string_append_printf(rep, ", %ld, %s\n", insn->dmisses, 508 insn->disas_str); 509 } 510 511 miss_insns = g_list_sort(miss_insns, icmp); 512 g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n"); 513 514 for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) { 515 insn = (InsnData *) curr->data; 516 g_string_append_printf(rep, "0x%" PRIx64, insn->addr); 517 if (insn->symbol) { 518 g_string_append_printf(rep, " (%s)", insn->symbol); 519 } 520 g_string_append_printf(rep, ", %ld, %s\n", insn->imisses, 521 insn->disas_str); 522 } 523 524 qemu_plugin_outs(rep->str); 525 g_list_free(miss_insns); 526 } 527 528 static void plugin_exit(qemu_plugin_id_t id, void *p) 529 { 530 log_stats(); 531 log_top_insns(); 532 533 cache_free(dcache); 534 cache_free(icache); 535 536 g_hash_table_destroy(miss_ht); 537 } 538 539 static void policy_init(void) 540 { 541 switch (policy) { 542 case LRU: 543 update_hit = lru_update_blk; 544 update_miss = lru_update_blk; 545 metadata_init = lru_priorities_init; 546 metadata_destroy = lru_priorities_destroy; 547 break; 548 case FIFO: 549 update_miss = fifo_update_on_miss; 550 metadata_init = fifo_init; 551 metadata_destroy = fifo_destroy; 552 break; 553 case RAND: 554 rng = g_rand_new(); 555 break; 556 default: 557 g_assert_not_reached(); 558 } 559 } 560 561 QEMU_PLUGIN_EXPORT 562 int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, 563 int argc, char **argv) 564 { 565 int i; 566 int iassoc, iblksize, icachesize; 567 int dassoc, dblksize, dcachesize; 568 569 limit = 32; 570 sys = info->system_emulation; 571 572 dassoc = 8; 573 dblksize = 64; 574 dcachesize = dblksize * dassoc * 32; 575 576 iassoc = 8; 577 iblksize = 64; 578 icachesize = iblksize * iassoc * 32; 579 580 policy = LRU; 581 582 for (i = 0; i < argc; i++) { 583 char *opt = argv[i]; 584 if (g_str_has_prefix(opt, "iblksize=")) { 585 iblksize = g_ascii_strtoll(opt + 9, NULL, 10); 586 } else if (g_str_has_prefix(opt, "iassoc=")) { 587 iassoc = g_ascii_strtoll(opt + 7, NULL, 10); 588 } else if (g_str_has_prefix(opt, "icachesize=")) { 589 icachesize = g_ascii_strtoll(opt + 11, NULL, 10); 590 } else if (g_str_has_prefix(opt, "dblksize=")) { 591 dblksize = g_ascii_strtoll(opt + 9, NULL, 10); 592 } else if (g_str_has_prefix(opt, "dassoc=")) { 593 dassoc = g_ascii_strtoll(opt + 7, NULL, 10); 594 } else if (g_str_has_prefix(opt, "dcachesize=")) { 595 dcachesize = g_ascii_strtoll(opt + 11, NULL, 10); 596 } else if (g_str_has_prefix(opt, "limit=")) { 597 limit = g_ascii_strtoll(opt + 6, NULL, 10); 598 } else if (g_str_has_prefix(opt, "evict=")) { 599 gchar *p = opt + 6; 600 if (g_strcmp0(p, "rand") == 0) { 601 policy = RAND; 602 } else if (g_strcmp0(p, "lru") == 0) { 603 policy = LRU; 604 } else if (g_strcmp0(p, "fifo") == 0) { 605 policy = FIFO; 606 } else { 607 fprintf(stderr, "invalid eviction policy: %s\n", opt); 608 return -1; 609 } 610 } else { 611 fprintf(stderr, "option parsing failed: %s\n", opt); 612 return -1; 613 } 614 } 615 616 policy_init(); 617 618 dcache = cache_init(dblksize, dassoc, dcachesize); 619 if (!dcache) { 620 const char *err = cache_config_error(dblksize, dassoc, dcachesize); 621 fprintf(stderr, "dcache cannot be constructed from given parameters\n"); 622 fprintf(stderr, "%s\n", err); 623 return -1; 624 } 625 626 icache = cache_init(iblksize, iassoc, icachesize); 627 if (!icache) { 628 const char *err = cache_config_error(iblksize, iassoc, icachesize); 629 fprintf(stderr, "icache cannot be constructed from given parameters\n"); 630 fprintf(stderr, "%s\n", err); 631 return -1; 632 } 633 634 qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); 635 qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); 636 637 miss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, insn_free); 638 639 return 0; 640 } 641