xref: /openbmc/qemu/contrib/plugins/cache.c (revision 1580b897)
1 /*
2  * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
3  *
4  * License: GNU GPL, version 2 or later.
5  *   See the COPYING file in the top-level directory.
6  */
7 
8 #include <inttypes.h>
9 #include <stdio.h>
10 #include <glib.h>
11 
12 #include <qemu-plugin.h>
13 
14 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
15 
16 static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
17 
18 static GHashTable *miss_ht;
19 
20 static GMutex mtx;
21 static GRand *rng;
22 
23 static int limit;
24 static bool sys;
25 
26 static uint64_t dmem_accesses;
27 static uint64_t dmisses;
28 
29 static uint64_t imem_accesses;
30 static uint64_t imisses;
31 
32 enum EvictionPolicy {
33     LRU,
34     FIFO,
35     RAND,
36 };
37 
38 enum EvictionPolicy policy;
39 
40 /*
41  * A CacheSet is a set of cache blocks. A memory block that maps to a set can be
42  * put in any of the blocks inside the set. The number of block per set is
43  * called the associativity (assoc).
44  *
45  * Each block contains the the stored tag and a valid bit. Since this is not
46  * a functional simulator, the data itself is not stored. We only identify
47  * whether a block is in the cache or not by searching for its tag.
48  *
49  * In order to search for memory data in the cache, the set identifier and tag
50  * are extracted from the address and the set is probed to see whether a tag
51  * match occur.
52  *
53  * An address is logically divided into three portions: The block offset,
54  * the set number, and the tag.
55  *
56  * The set number is used to identify the set in which the block may exist.
57  * The tag is compared against all the tags of a set to search for a match. If a
58  * match is found, then the access is a hit.
59  *
60  * The CacheSet also contains bookkeaping information about eviction details.
61  */
62 
63 typedef struct {
64     uint64_t tag;
65     bool valid;
66 } CacheBlock;
67 
68 typedef struct {
69     CacheBlock *blocks;
70     uint64_t *lru_priorities;
71     uint64_t lru_gen_counter;
72     GQueue *fifo_queue;
73 } CacheSet;
74 
75 typedef struct {
76     CacheSet *sets;
77     int num_sets;
78     int cachesize;
79     int assoc;
80     int blksize_shift;
81     uint64_t set_mask;
82     uint64_t tag_mask;
83 } Cache;
84 
85 typedef struct {
86     char *disas_str;
87     const char *symbol;
88     uint64_t addr;
89     uint64_t dmisses;
90     uint64_t imisses;
91 } InsnData;
92 
93 void (*update_hit)(Cache *cache, int set, int blk);
94 void (*update_miss)(Cache *cache, int set, int blk);
95 
96 void (*metadata_init)(Cache *cache);
97 void (*metadata_destroy)(Cache *cache);
98 
99 Cache *dcache, *icache;
100 
101 static int pow_of_two(int num)
102 {
103     g_assert((num & (num - 1)) == 0);
104     int ret = 0;
105     while (num /= 2) {
106         ret++;
107     }
108     return ret;
109 }
110 
111 /*
112  * LRU evection policy: For each set, a generation counter is maintained
113  * alongside a priority array.
114  *
115  * On each set access, the generation counter is incremented.
116  *
117  * On a cache hit: The hit-block is assigned the current generation counter,
118  * indicating that it is the most recently used block.
119  *
120  * On a cache miss: The block with the least priority is searched and replaced
121  * with the newly-cached block, of which the priority is set to the current
122  * generation number.
123  */
124 
125 static void lru_priorities_init(Cache *cache)
126 {
127     int i;
128 
129     for (i = 0; i < cache->num_sets; i++) {
130         cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc);
131         cache->sets[i].lru_gen_counter = 0;
132     }
133 }
134 
135 static void lru_update_blk(Cache *cache, int set_idx, int blk_idx)
136 {
137     CacheSet *set = &cache->sets[set_idx];
138     set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter;
139     set->lru_gen_counter++;
140 }
141 
142 static int lru_get_lru_block(Cache *cache, int set_idx)
143 {
144     int i, min_idx, min_priority;
145 
146     min_priority = cache->sets[set_idx].lru_priorities[0];
147     min_idx = 0;
148 
149     for (i = 1; i < cache->assoc; i++) {
150         if (cache->sets[set_idx].lru_priorities[i] < min_priority) {
151             min_priority = cache->sets[set_idx].lru_priorities[i];
152             min_idx = i;
153         }
154     }
155     return min_idx;
156 }
157 
158 static void lru_priorities_destroy(Cache *cache)
159 {
160     int i;
161 
162     for (i = 0; i < cache->num_sets; i++) {
163         g_free(cache->sets[i].lru_priorities);
164     }
165 }
166 
167 /*
168  * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that
169  * stores accesses to the cache.
170  *
171  * On a compulsory miss: The block index is enqueued to the fifo_queue to
172  * indicate that it's the latest cached block.
173  *
174  * On a conflict miss: The first-in block is removed from the cache and the new
175  * block is put in its place and enqueued to the FIFO queue.
176  */
177 
178 static void fifo_init(Cache *cache)
179 {
180     int i;
181 
182     for (i = 0; i < cache->num_sets; i++) {
183         cache->sets[i].fifo_queue = g_queue_new();
184     }
185 }
186 
187 static int fifo_get_first_block(Cache *cache, int set)
188 {
189     GQueue *q = cache->sets[set].fifo_queue;
190     return GPOINTER_TO_INT(g_queue_pop_tail(q));
191 }
192 
193 static void fifo_update_on_miss(Cache *cache, int set, int blk_idx)
194 {
195     GQueue *q = cache->sets[set].fifo_queue;
196     g_queue_push_head(q, GINT_TO_POINTER(blk_idx));
197 }
198 
199 static void fifo_destroy(Cache *cache)
200 {
201     int i;
202 
203     for (i = 0; i < cache->assoc; i++) {
204         g_queue_free(cache->sets[i].fifo_queue);
205     }
206 }
207 
208 static inline uint64_t extract_tag(Cache *cache, uint64_t addr)
209 {
210     return addr & cache->tag_mask;
211 }
212 
213 static inline uint64_t extract_set(Cache *cache, uint64_t addr)
214 {
215     return (addr & cache->set_mask) >> cache->blksize_shift;
216 }
217 
218 static const char *cache_config_error(int blksize, int assoc, int cachesize)
219 {
220     if (cachesize % blksize != 0) {
221         return "cache size must be divisible by block size";
222     } else if (cachesize % (blksize * assoc) != 0) {
223         return "cache size must be divisible by set size (assoc * block size)";
224     } else {
225         return NULL;
226     }
227 }
228 
229 static bool bad_cache_params(int blksize, int assoc, int cachesize)
230 {
231     return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0);
232 }
233 
234 static Cache *cache_init(int blksize, int assoc, int cachesize)
235 {
236     if (bad_cache_params(blksize, assoc, cachesize)) {
237         return NULL;
238     }
239 
240     Cache *cache;
241     int i;
242     uint64_t blk_mask;
243 
244     cache = g_new(Cache, 1);
245     cache->assoc = assoc;
246     cache->cachesize = cachesize;
247     cache->num_sets = cachesize / (blksize * assoc);
248     cache->sets = g_new(CacheSet, cache->num_sets);
249     cache->blksize_shift = pow_of_two(blksize);
250 
251     for (i = 0; i < cache->num_sets; i++) {
252         cache->sets[i].blocks = g_new0(CacheBlock, assoc);
253     }
254 
255     blk_mask = blksize - 1;
256     cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift);
257     cache->tag_mask = ~(cache->set_mask | blk_mask);
258 
259     if (metadata_init) {
260         metadata_init(cache);
261     }
262 
263     return cache;
264 }
265 
266 static int get_invalid_block(Cache *cache, uint64_t set)
267 {
268     int i;
269 
270     for (i = 0; i < cache->assoc; i++) {
271         if (!cache->sets[set].blocks[i].valid) {
272             return i;
273         }
274     }
275 
276     return -1;
277 }
278 
279 static int get_replaced_block(Cache *cache, int set)
280 {
281     switch (policy) {
282     case RAND:
283         return g_rand_int_range(rng, 0, cache->assoc);
284     case LRU:
285         return lru_get_lru_block(cache, set);
286     case FIFO:
287         return fifo_get_first_block(cache, set);
288     default:
289         g_assert_not_reached();
290     }
291 }
292 
293 static int in_cache(Cache *cache, uint64_t addr)
294 {
295     int i;
296     uint64_t tag, set;
297 
298     tag = extract_tag(cache, addr);
299     set = extract_set(cache, addr);
300 
301     for (i = 0; i < cache->assoc; i++) {
302         if (cache->sets[set].blocks[i].tag == tag &&
303                 cache->sets[set].blocks[i].valid) {
304             return i;
305         }
306     }
307 
308     return -1;
309 }
310 
311 /**
312  * access_cache(): Simulate a cache access
313  * @cache: The cache under simulation
314  * @addr: The address of the requested memory location
315  *
316  * Returns true if the requsted data is hit in the cache and false when missed.
317  * The cache is updated on miss for the next access.
318  */
319 static bool access_cache(Cache *cache, uint64_t addr)
320 {
321     int hit_blk, replaced_blk;
322     uint64_t tag, set;
323 
324     tag = extract_tag(cache, addr);
325     set = extract_set(cache, addr);
326 
327     hit_blk = in_cache(cache, addr);
328     if (hit_blk != -1) {
329         if (update_hit) {
330             update_hit(cache, set, hit_blk);
331         }
332         return true;
333     }
334 
335     replaced_blk = get_invalid_block(cache, set);
336 
337     if (replaced_blk == -1) {
338         replaced_blk = get_replaced_block(cache, set);
339     }
340 
341     if (update_miss) {
342         update_miss(cache, set, replaced_blk);
343     }
344 
345     cache->sets[set].blocks[replaced_blk].tag = tag;
346     cache->sets[set].blocks[replaced_blk].valid = true;
347 
348     return false;
349 }
350 
351 static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
352                             uint64_t vaddr, void *userdata)
353 {
354     uint64_t effective_addr;
355     struct qemu_plugin_hwaddr *hwaddr;
356     InsnData *insn;
357 
358     g_mutex_lock(&mtx);
359     hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
360     if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
361         g_mutex_unlock(&mtx);
362         return;
363     }
364 
365     effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
366 
367     if (!access_cache(dcache, effective_addr)) {
368         insn = (InsnData *) userdata;
369         insn->dmisses++;
370         dmisses++;
371     }
372     dmem_accesses++;
373     g_mutex_unlock(&mtx);
374 }
375 
376 static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
377 {
378     uint64_t insn_addr;
379     InsnData *insn;
380 
381     g_mutex_lock(&mtx);
382     insn_addr = ((InsnData *) userdata)->addr;
383 
384     if (!access_cache(icache, insn_addr)) {
385         insn = (InsnData *) userdata;
386         insn->imisses++;
387         imisses++;
388     }
389     imem_accesses++;
390     g_mutex_unlock(&mtx);
391 }
392 
393 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
394 {
395     size_t n_insns;
396     size_t i;
397     InsnData *data;
398 
399     n_insns = qemu_plugin_tb_n_insns(tb);
400     for (i = 0; i < n_insns; i++) {
401         struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
402         uint64_t effective_addr;
403 
404         if (sys) {
405             effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn);
406         } else {
407             effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
408         }
409 
410         /*
411          * Instructions might get translated multiple times, we do not create
412          * new entries for those instructions. Instead, we fetch the same
413          * entry from the hash table and register it for the callback again.
414          */
415         g_mutex_lock(&mtx);
416         data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr));
417         if (data == NULL) {
418             data = g_new0(InsnData, 1);
419             data->disas_str = qemu_plugin_insn_disas(insn);
420             data->symbol = qemu_plugin_insn_symbol(insn);
421             data->addr = effective_addr;
422             g_hash_table_insert(miss_ht, GUINT_TO_POINTER(effective_addr),
423                                (gpointer) data);
424         }
425         g_mutex_unlock(&mtx);
426 
427         qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
428                                          QEMU_PLUGIN_CB_NO_REGS,
429                                          rw, data);
430 
431         qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
432                                                QEMU_PLUGIN_CB_NO_REGS, data);
433     }
434 }
435 
436 static void insn_free(gpointer data)
437 {
438     InsnData *insn = (InsnData *) data;
439     g_free(insn->disas_str);
440     g_free(insn);
441 }
442 
443 static void cache_free(Cache *cache)
444 {
445     for (int i = 0; i < cache->num_sets; i++) {
446         g_free(cache->sets[i].blocks);
447     }
448 
449     if (metadata_destroy) {
450         metadata_destroy(cache);
451     }
452 
453     g_free(cache->sets);
454     g_free(cache);
455 }
456 
457 static int dcmp(gconstpointer a, gconstpointer b)
458 {
459     InsnData *insn_a = (InsnData *) a;
460     InsnData *insn_b = (InsnData *) b;
461 
462     return insn_a->dmisses < insn_b->dmisses ? 1 : -1;
463 }
464 
465 static int icmp(gconstpointer a, gconstpointer b)
466 {
467     InsnData *insn_a = (InsnData *) a;
468     InsnData *insn_b = (InsnData *) b;
469 
470     return insn_a->imisses < insn_b->imisses ? 1 : -1;
471 }
472 
473 static void log_stats()
474 {
475     g_autoptr(GString) rep = g_string_new("");
476     g_string_append_printf(rep,
477         "Data accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
478         dmem_accesses,
479         dmisses,
480         ((double) dmisses / (double) dmem_accesses) * 100.0);
481 
482     g_string_append_printf(rep,
483         "Instruction accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
484         imem_accesses,
485         imisses,
486         ((double) imisses / (double) imem_accesses) * 100.0);
487 
488     qemu_plugin_outs(rep->str);
489 }
490 
491 static void log_top_insns()
492 {
493     int i;
494     GList *curr, *miss_insns;
495     InsnData *insn;
496 
497     miss_insns = g_hash_table_get_values(miss_ht);
498     miss_insns = g_list_sort(miss_insns, dcmp);
499     g_autoptr(GString) rep = g_string_new("");
500     g_string_append_printf(rep, "%s", "address, data misses, instruction\n");
501 
502     for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
503         insn = (InsnData *) curr->data;
504         g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
505         if (insn->symbol) {
506             g_string_append_printf(rep, " (%s)", insn->symbol);
507         }
508         g_string_append_printf(rep, ", %ld, %s\n", insn->dmisses,
509                                insn->disas_str);
510     }
511 
512     miss_insns = g_list_sort(miss_insns, icmp);
513     g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n");
514 
515     for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
516         insn = (InsnData *) curr->data;
517         g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
518         if (insn->symbol) {
519             g_string_append_printf(rep, " (%s)", insn->symbol);
520         }
521         g_string_append_printf(rep, ", %ld, %s\n", insn->imisses,
522                                insn->disas_str);
523     }
524 
525     qemu_plugin_outs(rep->str);
526     g_list_free(miss_insns);
527 }
528 
529 static void plugin_exit(qemu_plugin_id_t id, void *p)
530 {
531     log_stats();
532     log_top_insns();
533 
534     cache_free(dcache);
535     cache_free(icache);
536 
537     g_hash_table_destroy(miss_ht);
538 }
539 
540 static void policy_init()
541 {
542     switch (policy) {
543     case LRU:
544         update_hit = lru_update_blk;
545         update_miss = lru_update_blk;
546         metadata_init = lru_priorities_init;
547         metadata_destroy = lru_priorities_destroy;
548         break;
549     case FIFO:
550         update_miss = fifo_update_on_miss;
551         metadata_init = fifo_init;
552         metadata_destroy = fifo_destroy;
553         break;
554     case RAND:
555         rng = g_rand_new();
556         break;
557     default:
558         g_assert_not_reached();
559     }
560 }
561 
562 QEMU_PLUGIN_EXPORT
563 int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
564                         int argc, char **argv)
565 {
566     int i;
567     int iassoc, iblksize, icachesize;
568     int dassoc, dblksize, dcachesize;
569 
570     limit = 32;
571     sys = info->system_emulation;
572 
573     dassoc = 8;
574     dblksize = 64;
575     dcachesize = dblksize * dassoc * 32;
576 
577     iassoc = 8;
578     iblksize = 64;
579     icachesize = iblksize * iassoc * 32;
580 
581     policy = LRU;
582 
583     for (i = 0; i < argc; i++) {
584         char *opt = argv[i];
585         if (g_str_has_prefix(opt, "iblksize=")) {
586             iblksize = g_ascii_strtoll(opt + 9, NULL, 10);
587         } else if (g_str_has_prefix(opt, "iassoc=")) {
588             iassoc = g_ascii_strtoll(opt + 7, NULL, 10);
589         } else if (g_str_has_prefix(opt, "icachesize=")) {
590             icachesize = g_ascii_strtoll(opt + 11, NULL, 10);
591         } else if (g_str_has_prefix(opt, "dblksize=")) {
592             dblksize = g_ascii_strtoll(opt + 9, NULL, 10);
593         } else if (g_str_has_prefix(opt, "dassoc=")) {
594             dassoc = g_ascii_strtoll(opt + 7, NULL, 10);
595         } else if (g_str_has_prefix(opt, "dcachesize=")) {
596             dcachesize = g_ascii_strtoll(opt + 11, NULL, 10);
597         } else if (g_str_has_prefix(opt, "limit=")) {
598             limit = g_ascii_strtoll(opt + 6, NULL, 10);
599         } else if (g_str_has_prefix(opt, "evict=")) {
600             gchar *p = opt + 6;
601             if (g_strcmp0(p, "rand") == 0) {
602                 policy = RAND;
603             } else if (g_strcmp0(p, "lru") == 0) {
604                 policy = LRU;
605             } else if (g_strcmp0(p, "fifo") == 0) {
606                 policy = FIFO;
607             } else {
608                 fprintf(stderr, "invalid eviction policy: %s\n", opt);
609                 return -1;
610             }
611         } else {
612             fprintf(stderr, "option parsing failed: %s\n", opt);
613             return -1;
614         }
615     }
616 
617     policy_init();
618 
619     dcache = cache_init(dblksize, dassoc, dcachesize);
620     if (!dcache) {
621         const char *err = cache_config_error(dblksize, dassoc, dcachesize);
622         fprintf(stderr, "dcache cannot be constructed from given parameters\n");
623         fprintf(stderr, "%s\n", err);
624         return -1;
625     }
626 
627     icache = cache_init(iblksize, iassoc, icachesize);
628     if (!icache) {
629         const char *err = cache_config_error(iblksize, iassoc, icachesize);
630         fprintf(stderr, "icache cannot be constructed from given parameters\n");
631         fprintf(stderr, "%s\n", err);
632         return -1;
633     }
634 
635     qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
636     qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
637 
638     miss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, insn_free);
639 
640     return 0;
641 }
642