xref: /openbmc/qemu/contrib/plugins/cache.c (revision c09124dc)
1 /*
2  * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
3  *
4  * License: GNU GPL, version 2 or later.
5  *   See the COPYING file in the top-level directory.
6  */
7 
8 #include <inttypes.h>
9 #include <stdio.h>
10 #include <glib.h>
11 
12 #include <qemu-plugin.h>
13 
14 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
15 
16 static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
17 
18 static GHashTable *miss_ht;
19 
20 static GMutex hashtable_lock;
21 static GRand *rng;
22 
23 static int limit;
24 static bool sys;
25 
26 enum EvictionPolicy {
27     LRU,
28     FIFO,
29     RAND,
30 };
31 
32 enum EvictionPolicy policy;
33 
34 /*
35  * A CacheSet is a set of cache blocks. A memory block that maps to a set can be
36  * put in any of the blocks inside the set. The number of block per set is
37  * called the associativity (assoc).
38  *
39  * Each block contains the the stored tag and a valid bit. Since this is not
40  * a functional simulator, the data itself is not stored. We only identify
41  * whether a block is in the cache or not by searching for its tag.
42  *
43  * In order to search for memory data in the cache, the set identifier and tag
44  * are extracted from the address and the set is probed to see whether a tag
45  * match occur.
46  *
47  * An address is logically divided into three portions: The block offset,
48  * the set number, and the tag.
49  *
50  * The set number is used to identify the set in which the block may exist.
51  * The tag is compared against all the tags of a set to search for a match. If a
52  * match is found, then the access is a hit.
53  *
54  * The CacheSet also contains bookkeaping information about eviction details.
55  */
56 
57 typedef struct {
58     uint64_t tag;
59     bool valid;
60 } CacheBlock;
61 
62 typedef struct {
63     CacheBlock *blocks;
64     uint64_t *lru_priorities;
65     uint64_t lru_gen_counter;
66     GQueue *fifo_queue;
67 } CacheSet;
68 
69 typedef struct {
70     CacheSet *sets;
71     int num_sets;
72     int cachesize;
73     int assoc;
74     int blksize_shift;
75     uint64_t set_mask;
76     uint64_t tag_mask;
77     uint64_t accesses;
78     uint64_t misses;
79 } Cache;
80 
81 typedef struct {
82     char *disas_str;
83     const char *symbol;
84     uint64_t addr;
85     uint64_t dmisses;
86     uint64_t imisses;
87 } InsnData;
88 
89 void (*update_hit)(Cache *cache, int set, int blk);
90 void (*update_miss)(Cache *cache, int set, int blk);
91 
92 void (*metadata_init)(Cache *cache);
93 void (*metadata_destroy)(Cache *cache);
94 
95 static int cores;
96 static Cache **dcaches, **icaches;
97 
98 static GMutex *dcache_locks;
99 static GMutex *icache_locks;
100 
101 static uint64_t all_dmem_accesses;
102 static uint64_t all_imem_accesses;
103 static uint64_t all_imisses;
104 static uint64_t all_dmisses;
105 
106 static int pow_of_two(int num)
107 {
108     g_assert((num & (num - 1)) == 0);
109     int ret = 0;
110     while (num /= 2) {
111         ret++;
112     }
113     return ret;
114 }
115 
116 /*
117  * LRU evection policy: For each set, a generation counter is maintained
118  * alongside a priority array.
119  *
120  * On each set access, the generation counter is incremented.
121  *
122  * On a cache hit: The hit-block is assigned the current generation counter,
123  * indicating that it is the most recently used block.
124  *
125  * On a cache miss: The block with the least priority is searched and replaced
126  * with the newly-cached block, of which the priority is set to the current
127  * generation number.
128  */
129 
130 static void lru_priorities_init(Cache *cache)
131 {
132     int i;
133 
134     for (i = 0; i < cache->num_sets; i++) {
135         cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc);
136         cache->sets[i].lru_gen_counter = 0;
137     }
138 }
139 
140 static void lru_update_blk(Cache *cache, int set_idx, int blk_idx)
141 {
142     CacheSet *set = &cache->sets[set_idx];
143     set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter;
144     set->lru_gen_counter++;
145 }
146 
147 static int lru_get_lru_block(Cache *cache, int set_idx)
148 {
149     int i, min_idx, min_priority;
150 
151     min_priority = cache->sets[set_idx].lru_priorities[0];
152     min_idx = 0;
153 
154     for (i = 1; i < cache->assoc; i++) {
155         if (cache->sets[set_idx].lru_priorities[i] < min_priority) {
156             min_priority = cache->sets[set_idx].lru_priorities[i];
157             min_idx = i;
158         }
159     }
160     return min_idx;
161 }
162 
163 static void lru_priorities_destroy(Cache *cache)
164 {
165     int i;
166 
167     for (i = 0; i < cache->num_sets; i++) {
168         g_free(cache->sets[i].lru_priorities);
169     }
170 }
171 
172 /*
173  * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that
174  * stores accesses to the cache.
175  *
176  * On a compulsory miss: The block index is enqueued to the fifo_queue to
177  * indicate that it's the latest cached block.
178  *
179  * On a conflict miss: The first-in block is removed from the cache and the new
180  * block is put in its place and enqueued to the FIFO queue.
181  */
182 
183 static void fifo_init(Cache *cache)
184 {
185     int i;
186 
187     for (i = 0; i < cache->num_sets; i++) {
188         cache->sets[i].fifo_queue = g_queue_new();
189     }
190 }
191 
192 static int fifo_get_first_block(Cache *cache, int set)
193 {
194     GQueue *q = cache->sets[set].fifo_queue;
195     return GPOINTER_TO_INT(g_queue_pop_tail(q));
196 }
197 
198 static void fifo_update_on_miss(Cache *cache, int set, int blk_idx)
199 {
200     GQueue *q = cache->sets[set].fifo_queue;
201     g_queue_push_head(q, GINT_TO_POINTER(blk_idx));
202 }
203 
204 static void fifo_destroy(Cache *cache)
205 {
206     int i;
207 
208     for (i = 0; i < cache->num_sets; i++) {
209         g_queue_free(cache->sets[i].fifo_queue);
210     }
211 }
212 
213 static inline uint64_t extract_tag(Cache *cache, uint64_t addr)
214 {
215     return addr & cache->tag_mask;
216 }
217 
218 static inline uint64_t extract_set(Cache *cache, uint64_t addr)
219 {
220     return (addr & cache->set_mask) >> cache->blksize_shift;
221 }
222 
223 static const char *cache_config_error(int blksize, int assoc, int cachesize)
224 {
225     if (cachesize % blksize != 0) {
226         return "cache size must be divisible by block size";
227     } else if (cachesize % (blksize * assoc) != 0) {
228         return "cache size must be divisible by set size (assoc * block size)";
229     } else {
230         return NULL;
231     }
232 }
233 
234 static bool bad_cache_params(int blksize, int assoc, int cachesize)
235 {
236     return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0);
237 }
238 
239 static Cache *cache_init(int blksize, int assoc, int cachesize)
240 {
241     Cache *cache;
242     int i;
243     uint64_t blk_mask;
244 
245     /*
246      * This function shall not be called directly, and hence expects suitable
247      * parameters.
248      */
249     g_assert(!bad_cache_params(blksize, assoc, cachesize));
250 
251     cache = g_new(Cache, 1);
252     cache->assoc = assoc;
253     cache->cachesize = cachesize;
254     cache->num_sets = cachesize / (blksize * assoc);
255     cache->sets = g_new(CacheSet, cache->num_sets);
256     cache->blksize_shift = pow_of_two(blksize);
257     cache->accesses = 0;
258     cache->misses = 0;
259 
260     for (i = 0; i < cache->num_sets; i++) {
261         cache->sets[i].blocks = g_new0(CacheBlock, assoc);
262     }
263 
264     blk_mask = blksize - 1;
265     cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift);
266     cache->tag_mask = ~(cache->set_mask | blk_mask);
267 
268     if (metadata_init) {
269         metadata_init(cache);
270     }
271 
272     return cache;
273 }
274 
275 static Cache **caches_init(int blksize, int assoc, int cachesize)
276 {
277     Cache **caches;
278     int i;
279 
280     if (bad_cache_params(blksize, assoc, cachesize)) {
281         return NULL;
282     }
283 
284     caches = g_new(Cache *, cores);
285 
286     for (i = 0; i < cores; i++) {
287         caches[i] = cache_init(blksize, assoc, cachesize);
288     }
289 
290     return caches;
291 }
292 
293 static int get_invalid_block(Cache *cache, uint64_t set)
294 {
295     int i;
296 
297     for (i = 0; i < cache->assoc; i++) {
298         if (!cache->sets[set].blocks[i].valid) {
299             return i;
300         }
301     }
302 
303     return -1;
304 }
305 
306 static int get_replaced_block(Cache *cache, int set)
307 {
308     switch (policy) {
309     case RAND:
310         return g_rand_int_range(rng, 0, cache->assoc);
311     case LRU:
312         return lru_get_lru_block(cache, set);
313     case FIFO:
314         return fifo_get_first_block(cache, set);
315     default:
316         g_assert_not_reached();
317     }
318 }
319 
320 static int in_cache(Cache *cache, uint64_t addr)
321 {
322     int i;
323     uint64_t tag, set;
324 
325     tag = extract_tag(cache, addr);
326     set = extract_set(cache, addr);
327 
328     for (i = 0; i < cache->assoc; i++) {
329         if (cache->sets[set].blocks[i].tag == tag &&
330                 cache->sets[set].blocks[i].valid) {
331             return i;
332         }
333     }
334 
335     return -1;
336 }
337 
338 /**
339  * access_cache(): Simulate a cache access
340  * @cache: The cache under simulation
341  * @addr: The address of the requested memory location
342  *
343  * Returns true if the requsted data is hit in the cache and false when missed.
344  * The cache is updated on miss for the next access.
345  */
346 static bool access_cache(Cache *cache, uint64_t addr)
347 {
348     int hit_blk, replaced_blk;
349     uint64_t tag, set;
350 
351     tag = extract_tag(cache, addr);
352     set = extract_set(cache, addr);
353 
354     hit_blk = in_cache(cache, addr);
355     if (hit_blk != -1) {
356         if (update_hit) {
357             update_hit(cache, set, hit_blk);
358         }
359         return true;
360     }
361 
362     replaced_blk = get_invalid_block(cache, set);
363 
364     if (replaced_blk == -1) {
365         replaced_blk = get_replaced_block(cache, set);
366     }
367 
368     if (update_miss) {
369         update_miss(cache, set, replaced_blk);
370     }
371 
372     cache->sets[set].blocks[replaced_blk].tag = tag;
373     cache->sets[set].blocks[replaced_blk].valid = true;
374 
375     return false;
376 }
377 
378 static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
379                             uint64_t vaddr, void *userdata)
380 {
381     uint64_t effective_addr;
382     struct qemu_plugin_hwaddr *hwaddr;
383     int cache_idx;
384     InsnData *insn;
385 
386     hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
387     if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
388         return;
389     }
390 
391     effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
392     cache_idx = vcpu_index % cores;
393 
394     g_mutex_lock(&dcache_locks[cache_idx]);
395     if (!access_cache(dcaches[cache_idx], effective_addr)) {
396         insn = (InsnData *) userdata;
397         __atomic_fetch_add(&insn->dmisses, 1, __ATOMIC_SEQ_CST);
398         dcaches[cache_idx]->misses++;
399     }
400     dcaches[cache_idx]->accesses++;
401     g_mutex_unlock(&dcache_locks[cache_idx]);
402 }
403 
404 static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
405 {
406     uint64_t insn_addr;
407     InsnData *insn;
408     int cache_idx;
409 
410     insn_addr = ((InsnData *) userdata)->addr;
411 
412     cache_idx = vcpu_index % cores;
413     g_mutex_lock(&icache_locks[cache_idx]);
414     if (!access_cache(icaches[cache_idx], insn_addr)) {
415         insn = (InsnData *) userdata;
416         __atomic_fetch_add(&insn->imisses, 1, __ATOMIC_SEQ_CST);
417         icaches[cache_idx]->misses++;
418     }
419     icaches[cache_idx]->accesses++;
420     g_mutex_unlock(&icache_locks[cache_idx]);
421 }
422 
423 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
424 {
425     size_t n_insns;
426     size_t i;
427     InsnData *data;
428 
429     n_insns = qemu_plugin_tb_n_insns(tb);
430     for (i = 0; i < n_insns; i++) {
431         struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
432         uint64_t effective_addr;
433 
434         if (sys) {
435             effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn);
436         } else {
437             effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
438         }
439 
440         /*
441          * Instructions might get translated multiple times, we do not create
442          * new entries for those instructions. Instead, we fetch the same
443          * entry from the hash table and register it for the callback again.
444          */
445         g_mutex_lock(&hashtable_lock);
446         data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr));
447         if (data == NULL) {
448             data = g_new0(InsnData, 1);
449             data->disas_str = qemu_plugin_insn_disas(insn);
450             data->symbol = qemu_plugin_insn_symbol(insn);
451             data->addr = effective_addr;
452             g_hash_table_insert(miss_ht, GUINT_TO_POINTER(effective_addr),
453                                (gpointer) data);
454         }
455         g_mutex_unlock(&hashtable_lock);
456 
457         qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
458                                          QEMU_PLUGIN_CB_NO_REGS,
459                                          rw, data);
460 
461         qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
462                                                QEMU_PLUGIN_CB_NO_REGS, data);
463     }
464 }
465 
466 static void insn_free(gpointer data)
467 {
468     InsnData *insn = (InsnData *) data;
469     g_free(insn->disas_str);
470     g_free(insn);
471 }
472 
473 static void cache_free(Cache *cache)
474 {
475     for (int i = 0; i < cache->num_sets; i++) {
476         g_free(cache->sets[i].blocks);
477     }
478 
479     if (metadata_destroy) {
480         metadata_destroy(cache);
481     }
482 
483     g_free(cache->sets);
484     g_free(cache);
485 }
486 
487 static void caches_free(Cache **caches)
488 {
489     int i;
490 
491     for (i = 0; i < cores; i++) {
492         cache_free(caches[i]);
493     }
494 }
495 
496 static int dcmp(gconstpointer a, gconstpointer b)
497 {
498     InsnData *insn_a = (InsnData *) a;
499     InsnData *insn_b = (InsnData *) b;
500 
501     return insn_a->dmisses < insn_b->dmisses ? 1 : -1;
502 }
503 
504 static void append_stats_line(GString *line, uint64_t daccess, uint64_t dmisses,
505                               uint64_t iaccess, uint64_t imisses)
506 {
507     double dmiss_rate, imiss_rate;
508 
509     dmiss_rate = ((double) dmisses) / (daccess) * 100.0;
510     imiss_rate = ((double) imisses) / (iaccess) * 100.0;
511 
512     g_string_append_printf(line, "%-14lu %-12lu %9.4lf%%  %-14lu %-12lu"
513                            " %9.4lf%%\n",
514                            daccess,
515                            dmisses,
516                            daccess ? dmiss_rate : 0.0,
517                            iaccess,
518                            imisses,
519                            iaccess ? imiss_rate : 0.0);
520 }
521 
522 static void sum_stats(void)
523 {
524     int i;
525 
526     g_assert(cores > 1);
527     for (i = 0; i < cores; i++) {
528         all_imisses += icaches[i]->misses;
529         all_dmisses += dcaches[i]->misses;
530         all_imem_accesses += icaches[i]->accesses;
531         all_dmem_accesses += dcaches[i]->accesses;
532     }
533 }
534 
535 static int icmp(gconstpointer a, gconstpointer b)
536 {
537     InsnData *insn_a = (InsnData *) a;
538     InsnData *insn_b = (InsnData *) b;
539 
540     return insn_a->imisses < insn_b->imisses ? 1 : -1;
541 }
542 
543 static void log_stats(void)
544 {
545     int i;
546     Cache *icache, *dcache;
547 
548     g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses,"
549                                           " dmiss rate, insn accesses,"
550                                           " insn misses, imiss rate\n");
551 
552     for (i = 0; i < cores; i++) {
553         g_string_append_printf(rep, "%-8d", i);
554         dcache = dcaches[i];
555         icache = icaches[i];
556         append_stats_line(rep, dcache->accesses, dcache->misses,
557                 icache->accesses, icache->misses);
558     }
559 
560     if (cores > 1) {
561         sum_stats();
562         g_string_append_printf(rep, "%-8s", "sum");
563         append_stats_line(rep, all_dmem_accesses, all_dmisses,
564                 all_imem_accesses, all_imisses);
565     }
566 
567     g_string_append(rep, "\n");
568     qemu_plugin_outs(rep->str);
569 }
570 
571 static void log_top_insns(void)
572 {
573     int i;
574     GList *curr, *miss_insns;
575     InsnData *insn;
576 
577     miss_insns = g_hash_table_get_values(miss_ht);
578     miss_insns = g_list_sort(miss_insns, dcmp);
579     g_autoptr(GString) rep = g_string_new("");
580     g_string_append_printf(rep, "%s", "address, data misses, instruction\n");
581 
582     for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
583         insn = (InsnData *) curr->data;
584         g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
585         if (insn->symbol) {
586             g_string_append_printf(rep, " (%s)", insn->symbol);
587         }
588         g_string_append_printf(rep, ", %ld, %s\n", insn->dmisses,
589                                insn->disas_str);
590     }
591 
592     miss_insns = g_list_sort(miss_insns, icmp);
593     g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n");
594 
595     for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
596         insn = (InsnData *) curr->data;
597         g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
598         if (insn->symbol) {
599             g_string_append_printf(rep, " (%s)", insn->symbol);
600         }
601         g_string_append_printf(rep, ", %ld, %s\n", insn->imisses,
602                                insn->disas_str);
603     }
604 
605     qemu_plugin_outs(rep->str);
606     g_list_free(miss_insns);
607 }
608 
609 static void plugin_exit(qemu_plugin_id_t id, void *p)
610 {
611     log_stats();
612     log_top_insns();
613 
614     caches_free(dcaches);
615     caches_free(icaches);
616 
617     g_hash_table_destroy(miss_ht);
618 }
619 
620 static void policy_init(void)
621 {
622     switch (policy) {
623     case LRU:
624         update_hit = lru_update_blk;
625         update_miss = lru_update_blk;
626         metadata_init = lru_priorities_init;
627         metadata_destroy = lru_priorities_destroy;
628         break;
629     case FIFO:
630         update_miss = fifo_update_on_miss;
631         metadata_init = fifo_init;
632         metadata_destroy = fifo_destroy;
633         break;
634     case RAND:
635         rng = g_rand_new();
636         break;
637     default:
638         g_assert_not_reached();
639     }
640 }
641 
642 QEMU_PLUGIN_EXPORT
643 int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
644                         int argc, char **argv)
645 {
646     int i;
647     int iassoc, iblksize, icachesize;
648     int dassoc, dblksize, dcachesize;
649 
650     limit = 32;
651     sys = info->system_emulation;
652 
653     dassoc = 8;
654     dblksize = 64;
655     dcachesize = dblksize * dassoc * 32;
656 
657     iassoc = 8;
658     iblksize = 64;
659     icachesize = iblksize * iassoc * 32;
660 
661     policy = LRU;
662 
663     cores = sys ? qemu_plugin_n_vcpus() : 1;
664 
665     for (i = 0; i < argc; i++) {
666         char *opt = argv[i];
667         if (g_str_has_prefix(opt, "iblksize=")) {
668             iblksize = g_ascii_strtoll(opt + 9, NULL, 10);
669         } else if (g_str_has_prefix(opt, "iassoc=")) {
670             iassoc = g_ascii_strtoll(opt + 7, NULL, 10);
671         } else if (g_str_has_prefix(opt, "icachesize=")) {
672             icachesize = g_ascii_strtoll(opt + 11, NULL, 10);
673         } else if (g_str_has_prefix(opt, "dblksize=")) {
674             dblksize = g_ascii_strtoll(opt + 9, NULL, 10);
675         } else if (g_str_has_prefix(opt, "dassoc=")) {
676             dassoc = g_ascii_strtoll(opt + 7, NULL, 10);
677         } else if (g_str_has_prefix(opt, "dcachesize=")) {
678             dcachesize = g_ascii_strtoll(opt + 11, NULL, 10);
679         } else if (g_str_has_prefix(opt, "limit=")) {
680             limit = g_ascii_strtoll(opt + 6, NULL, 10);
681         } else if (g_str_has_prefix(opt, "cores=")) {
682             cores = g_ascii_strtoll(opt + 6, NULL, 10);
683         } else if (g_str_has_prefix(opt, "evict=")) {
684             gchar *p = opt + 6;
685             if (g_strcmp0(p, "rand") == 0) {
686                 policy = RAND;
687             } else if (g_strcmp0(p, "lru") == 0) {
688                 policy = LRU;
689             } else if (g_strcmp0(p, "fifo") == 0) {
690                 policy = FIFO;
691             } else {
692                 fprintf(stderr, "invalid eviction policy: %s\n", opt);
693                 return -1;
694             }
695         } else {
696             fprintf(stderr, "option parsing failed: %s\n", opt);
697             return -1;
698         }
699     }
700 
701     policy_init();
702 
703     dcaches = caches_init(dblksize, dassoc, dcachesize);
704     if (!dcaches) {
705         const char *err = cache_config_error(dblksize, dassoc, dcachesize);
706         fprintf(stderr, "dcache cannot be constructed from given parameters\n");
707         fprintf(stderr, "%s\n", err);
708         return -1;
709     }
710 
711     icaches = caches_init(iblksize, iassoc, icachesize);
712     if (!icaches) {
713         const char *err = cache_config_error(iblksize, iassoc, icachesize);
714         fprintf(stderr, "icache cannot be constructed from given parameters\n");
715         fprintf(stderr, "%s\n", err);
716         return -1;
717     }
718 
719     dcache_locks = g_new0(GMutex, cores);
720     icache_locks = g_new0(GMutex, cores);
721 
722     qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
723     qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
724 
725     miss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, insn_free);
726 
727     return 0;
728 }
729