tests: tcg: Fix PVH test with binutils 2.36+
[qemu/rayw.git] / contrib / plugins / cache.c
bloba1e03ca882025ffc2a04165984ac2678c960779b
1 /*
2 * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
4 * License: GNU GPL, version 2 or later.
5 * See the COPYING file in the top-level directory.
6 */
8 #include <inttypes.h>
9 #include <stdio.h>
10 #include <glib.h>
12 #include <qemu-plugin.h>
14 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
16 static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
18 static GHashTable *miss_ht;
20 static GMutex hashtable_lock;
21 static GRand *rng;
23 static int limit;
24 static bool sys;
26 enum EvictionPolicy {
27 LRU,
28 FIFO,
29 RAND,
32 enum EvictionPolicy policy;
35 * A CacheSet is a set of cache blocks. A memory block that maps to a set can be
36 * put in any of the blocks inside the set. The number of block per set is
37 * called the associativity (assoc).
39 * Each block contains the the stored tag and a valid bit. Since this is not
40 * a functional simulator, the data itself is not stored. We only identify
41 * whether a block is in the cache or not by searching for its tag.
43 * In order to search for memory data in the cache, the set identifier and tag
44 * are extracted from the address and the set is probed to see whether a tag
45 * match occur.
47 * An address is logically divided into three portions: The block offset,
48 * the set number, and the tag.
50 * The set number is used to identify the set in which the block may exist.
51 * The tag is compared against all the tags of a set to search for a match. If a
52 * match is found, then the access is a hit.
54 * The CacheSet also contains bookkeaping information about eviction details.
57 typedef struct {
58 uint64_t tag;
59 bool valid;
60 } CacheBlock;
62 typedef struct {
63 CacheBlock *blocks;
64 uint64_t *lru_priorities;
65 uint64_t lru_gen_counter;
66 GQueue *fifo_queue;
67 } CacheSet;
69 typedef struct {
70 CacheSet *sets;
71 int num_sets;
72 int cachesize;
73 int assoc;
74 int blksize_shift;
75 uint64_t set_mask;
76 uint64_t tag_mask;
77 uint64_t accesses;
78 uint64_t misses;
79 } Cache;
81 typedef struct {
82 char *disas_str;
83 const char *symbol;
84 uint64_t addr;
85 uint64_t dmisses;
86 uint64_t imisses;
87 } InsnData;
89 void (*update_hit)(Cache *cache, int set, int blk);
90 void (*update_miss)(Cache *cache, int set, int blk);
92 void (*metadata_init)(Cache *cache);
93 void (*metadata_destroy)(Cache *cache);
95 static int cores;
96 static Cache **dcaches, **icaches;
98 static GMutex *dcache_locks;
99 static GMutex *icache_locks;
101 static uint64_t all_dmem_accesses;
102 static uint64_t all_imem_accesses;
103 static uint64_t all_imisses;
104 static uint64_t all_dmisses;
106 static int pow_of_two(int num)
108 g_assert((num & (num - 1)) == 0);
109 int ret = 0;
110 while (num /= 2) {
111 ret++;
113 return ret;
117 * LRU evection policy: For each set, a generation counter is maintained
118 * alongside a priority array.
120 * On each set access, the generation counter is incremented.
122 * On a cache hit: The hit-block is assigned the current generation counter,
123 * indicating that it is the most recently used block.
125 * On a cache miss: The block with the least priority is searched and replaced
126 * with the newly-cached block, of which the priority is set to the current
127 * generation number.
130 static void lru_priorities_init(Cache *cache)
132 int i;
134 for (i = 0; i < cache->num_sets; i++) {
135 cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc);
136 cache->sets[i].lru_gen_counter = 0;
140 static void lru_update_blk(Cache *cache, int set_idx, int blk_idx)
142 CacheSet *set = &cache->sets[set_idx];
143 set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter;
144 set->lru_gen_counter++;
147 static int lru_get_lru_block(Cache *cache, int set_idx)
149 int i, min_idx, min_priority;
151 min_priority = cache->sets[set_idx].lru_priorities[0];
152 min_idx = 0;
154 for (i = 1; i < cache->assoc; i++) {
155 if (cache->sets[set_idx].lru_priorities[i] < min_priority) {
156 min_priority = cache->sets[set_idx].lru_priorities[i];
157 min_idx = i;
160 return min_idx;
163 static void lru_priorities_destroy(Cache *cache)
165 int i;
167 for (i = 0; i < cache->num_sets; i++) {
168 g_free(cache->sets[i].lru_priorities);
173 * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that
174 * stores accesses to the cache.
176 * On a compulsory miss: The block index is enqueued to the fifo_queue to
177 * indicate that it's the latest cached block.
179 * On a conflict miss: The first-in block is removed from the cache and the new
180 * block is put in its place and enqueued to the FIFO queue.
183 static void fifo_init(Cache *cache)
185 int i;
187 for (i = 0; i < cache->num_sets; i++) {
188 cache->sets[i].fifo_queue = g_queue_new();
192 static int fifo_get_first_block(Cache *cache, int set)
194 GQueue *q = cache->sets[set].fifo_queue;
195 return GPOINTER_TO_INT(g_queue_pop_tail(q));
198 static void fifo_update_on_miss(Cache *cache, int set, int blk_idx)
200 GQueue *q = cache->sets[set].fifo_queue;
201 g_queue_push_head(q, GINT_TO_POINTER(blk_idx));
204 static void fifo_destroy(Cache *cache)
206 int i;
208 for (i = 0; i < cache->num_sets; i++) {
209 g_queue_free(cache->sets[i].fifo_queue);
213 static inline uint64_t extract_tag(Cache *cache, uint64_t addr)
215 return addr & cache->tag_mask;
218 static inline uint64_t extract_set(Cache *cache, uint64_t addr)
220 return (addr & cache->set_mask) >> cache->blksize_shift;
223 static const char *cache_config_error(int blksize, int assoc, int cachesize)
225 if (cachesize % blksize != 0) {
226 return "cache size must be divisible by block size";
227 } else if (cachesize % (blksize * assoc) != 0) {
228 return "cache size must be divisible by set size (assoc * block size)";
229 } else {
230 return NULL;
234 static bool bad_cache_params(int blksize, int assoc, int cachesize)
236 return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0);
239 static Cache *cache_init(int blksize, int assoc, int cachesize)
241 Cache *cache;
242 int i;
243 uint64_t blk_mask;
246 * This function shall not be called directly, and hence expects suitable
247 * parameters.
249 g_assert(!bad_cache_params(blksize, assoc, cachesize));
251 cache = g_new(Cache, 1);
252 cache->assoc = assoc;
253 cache->cachesize = cachesize;
254 cache->num_sets = cachesize / (blksize * assoc);
255 cache->sets = g_new(CacheSet, cache->num_sets);
256 cache->blksize_shift = pow_of_two(blksize);
257 cache->accesses = 0;
258 cache->misses = 0;
260 for (i = 0; i < cache->num_sets; i++) {
261 cache->sets[i].blocks = g_new0(CacheBlock, assoc);
264 blk_mask = blksize - 1;
265 cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift);
266 cache->tag_mask = ~(cache->set_mask | blk_mask);
268 if (metadata_init) {
269 metadata_init(cache);
272 return cache;
275 static Cache **caches_init(int blksize, int assoc, int cachesize)
277 Cache **caches;
278 int i;
280 if (bad_cache_params(blksize, assoc, cachesize)) {
281 return NULL;
284 caches = g_new(Cache *, cores);
286 for (i = 0; i < cores; i++) {
287 caches[i] = cache_init(blksize, assoc, cachesize);
290 return caches;
293 static int get_invalid_block(Cache *cache, uint64_t set)
295 int i;
297 for (i = 0; i < cache->assoc; i++) {
298 if (!cache->sets[set].blocks[i].valid) {
299 return i;
303 return -1;
306 static int get_replaced_block(Cache *cache, int set)
308 switch (policy) {
309 case RAND:
310 return g_rand_int_range(rng, 0, cache->assoc);
311 case LRU:
312 return lru_get_lru_block(cache, set);
313 case FIFO:
314 return fifo_get_first_block(cache, set);
315 default:
316 g_assert_not_reached();
320 static int in_cache(Cache *cache, uint64_t addr)
322 int i;
323 uint64_t tag, set;
325 tag = extract_tag(cache, addr);
326 set = extract_set(cache, addr);
328 for (i = 0; i < cache->assoc; i++) {
329 if (cache->sets[set].blocks[i].tag == tag &&
330 cache->sets[set].blocks[i].valid) {
331 return i;
335 return -1;
339 * access_cache(): Simulate a cache access
340 * @cache: The cache under simulation
341 * @addr: The address of the requested memory location
343 * Returns true if the requsted data is hit in the cache and false when missed.
344 * The cache is updated on miss for the next access.
346 static bool access_cache(Cache *cache, uint64_t addr)
348 int hit_blk, replaced_blk;
349 uint64_t tag, set;
351 tag = extract_tag(cache, addr);
352 set = extract_set(cache, addr);
354 hit_blk = in_cache(cache, addr);
355 if (hit_blk != -1) {
356 if (update_hit) {
357 update_hit(cache, set, hit_blk);
359 return true;
362 replaced_blk = get_invalid_block(cache, set);
364 if (replaced_blk == -1) {
365 replaced_blk = get_replaced_block(cache, set);
368 if (update_miss) {
369 update_miss(cache, set, replaced_blk);
372 cache->sets[set].blocks[replaced_blk].tag = tag;
373 cache->sets[set].blocks[replaced_blk].valid = true;
375 return false;
378 static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
379 uint64_t vaddr, void *userdata)
381 uint64_t effective_addr;
382 struct qemu_plugin_hwaddr *hwaddr;
383 int cache_idx;
384 InsnData *insn;
386 hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
387 if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
388 return;
391 effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
392 cache_idx = vcpu_index % cores;
394 g_mutex_lock(&dcache_locks[cache_idx]);
395 if (!access_cache(dcaches[cache_idx], effective_addr)) {
396 insn = (InsnData *) userdata;
397 __atomic_fetch_add(&insn->dmisses, 1, __ATOMIC_SEQ_CST);
398 dcaches[cache_idx]->misses++;
400 dcaches[cache_idx]->accesses++;
401 g_mutex_unlock(&dcache_locks[cache_idx]);
404 static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
406 uint64_t insn_addr;
407 InsnData *insn;
408 int cache_idx;
410 insn_addr = ((InsnData *) userdata)->addr;
412 cache_idx = vcpu_index % cores;
413 g_mutex_lock(&icache_locks[cache_idx]);
414 if (!access_cache(icaches[cache_idx], insn_addr)) {
415 insn = (InsnData *) userdata;
416 __atomic_fetch_add(&insn->imisses, 1, __ATOMIC_SEQ_CST);
417 icaches[cache_idx]->misses++;
419 icaches[cache_idx]->accesses++;
420 g_mutex_unlock(&icache_locks[cache_idx]);
423 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
425 size_t n_insns;
426 size_t i;
427 InsnData *data;
429 n_insns = qemu_plugin_tb_n_insns(tb);
430 for (i = 0; i < n_insns; i++) {
431 struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
432 uint64_t effective_addr;
434 if (sys) {
435 effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn);
436 } else {
437 effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
441 * Instructions might get translated multiple times, we do not create
442 * new entries for those instructions. Instead, we fetch the same
443 * entry from the hash table and register it for the callback again.
445 g_mutex_lock(&hashtable_lock);
446 data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr));
447 if (data == NULL) {
448 data = g_new0(InsnData, 1);
449 data->disas_str = qemu_plugin_insn_disas(insn);
450 data->symbol = qemu_plugin_insn_symbol(insn);
451 data->addr = effective_addr;
452 g_hash_table_insert(miss_ht, GUINT_TO_POINTER(effective_addr),
453 (gpointer) data);
455 g_mutex_unlock(&hashtable_lock);
457 qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
458 QEMU_PLUGIN_CB_NO_REGS,
459 rw, data);
461 qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
462 QEMU_PLUGIN_CB_NO_REGS, data);
466 static void insn_free(gpointer data)
468 InsnData *insn = (InsnData *) data;
469 g_free(insn->disas_str);
470 g_free(insn);
473 static void cache_free(Cache *cache)
475 for (int i = 0; i < cache->num_sets; i++) {
476 g_free(cache->sets[i].blocks);
479 if (metadata_destroy) {
480 metadata_destroy(cache);
483 g_free(cache->sets);
484 g_free(cache);
487 static void caches_free(Cache **caches)
489 int i;
491 for (i = 0; i < cores; i++) {
492 cache_free(caches[i]);
496 static int dcmp(gconstpointer a, gconstpointer b)
498 InsnData *insn_a = (InsnData *) a;
499 InsnData *insn_b = (InsnData *) b;
501 return insn_a->dmisses < insn_b->dmisses ? 1 : -1;
504 static void append_stats_line(GString *line, uint64_t daccess, uint64_t dmisses,
505 uint64_t iaccess, uint64_t imisses)
507 double dmiss_rate, imiss_rate;
509 dmiss_rate = ((double) dmisses) / (daccess) * 100.0;
510 imiss_rate = ((double) imisses) / (iaccess) * 100.0;
512 g_string_append_printf(line, "%-14lu %-12lu %9.4lf%% %-14lu %-12lu"
513 " %9.4lf%%\n",
514 daccess,
515 dmisses,
516 daccess ? dmiss_rate : 0.0,
517 iaccess,
518 imisses,
519 iaccess ? imiss_rate : 0.0);
522 static void sum_stats(void)
524 int i;
526 g_assert(cores > 1);
527 for (i = 0; i < cores; i++) {
528 all_imisses += icaches[i]->misses;
529 all_dmisses += dcaches[i]->misses;
530 all_imem_accesses += icaches[i]->accesses;
531 all_dmem_accesses += dcaches[i]->accesses;
535 static int icmp(gconstpointer a, gconstpointer b)
537 InsnData *insn_a = (InsnData *) a;
538 InsnData *insn_b = (InsnData *) b;
540 return insn_a->imisses < insn_b->imisses ? 1 : -1;
543 static void log_stats(void)
545 int i;
546 Cache *icache, *dcache;
548 g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses,"
549 " dmiss rate, insn accesses,"
550 " insn misses, imiss rate\n");
552 for (i = 0; i < cores; i++) {
553 g_string_append_printf(rep, "%-8d", i);
554 dcache = dcaches[i];
555 icache = icaches[i];
556 append_stats_line(rep, dcache->accesses, dcache->misses,
557 icache->accesses, icache->misses);
560 if (cores > 1) {
561 sum_stats();
562 g_string_append_printf(rep, "%-8s", "sum");
563 append_stats_line(rep, all_dmem_accesses, all_dmisses,
564 all_imem_accesses, all_imisses);
567 g_string_append(rep, "\n");
568 qemu_plugin_outs(rep->str);
571 static void log_top_insns(void)
573 int i;
574 GList *curr, *miss_insns;
575 InsnData *insn;
577 miss_insns = g_hash_table_get_values(miss_ht);
578 miss_insns = g_list_sort(miss_insns, dcmp);
579 g_autoptr(GString) rep = g_string_new("");
580 g_string_append_printf(rep, "%s", "address, data misses, instruction\n");
582 for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
583 insn = (InsnData *) curr->data;
584 g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
585 if (insn->symbol) {
586 g_string_append_printf(rep, " (%s)", insn->symbol);
588 g_string_append_printf(rep, ", %ld, %s\n", insn->dmisses,
589 insn->disas_str);
592 miss_insns = g_list_sort(miss_insns, icmp);
593 g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n");
595 for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
596 insn = (InsnData *) curr->data;
597 g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
598 if (insn->symbol) {
599 g_string_append_printf(rep, " (%s)", insn->symbol);
601 g_string_append_printf(rep, ", %ld, %s\n", insn->imisses,
602 insn->disas_str);
605 qemu_plugin_outs(rep->str);
606 g_list_free(miss_insns);
609 static void plugin_exit(qemu_plugin_id_t id, void *p)
611 log_stats();
612 log_top_insns();
614 caches_free(dcaches);
615 caches_free(icaches);
617 g_hash_table_destroy(miss_ht);
620 static void policy_init(void)
622 switch (policy) {
623 case LRU:
624 update_hit = lru_update_blk;
625 update_miss = lru_update_blk;
626 metadata_init = lru_priorities_init;
627 metadata_destroy = lru_priorities_destroy;
628 break;
629 case FIFO:
630 update_miss = fifo_update_on_miss;
631 metadata_init = fifo_init;
632 metadata_destroy = fifo_destroy;
633 break;
634 case RAND:
635 rng = g_rand_new();
636 break;
637 default:
638 g_assert_not_reached();
642 QEMU_PLUGIN_EXPORT
643 int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
644 int argc, char **argv)
646 int i;
647 int iassoc, iblksize, icachesize;
648 int dassoc, dblksize, dcachesize;
650 limit = 32;
651 sys = info->system_emulation;
653 dassoc = 8;
654 dblksize = 64;
655 dcachesize = dblksize * dassoc * 32;
657 iassoc = 8;
658 iblksize = 64;
659 icachesize = iblksize * iassoc * 32;
661 policy = LRU;
663 cores = sys ? qemu_plugin_n_vcpus() : 1;
665 for (i = 0; i < argc; i++) {
666 char *opt = argv[i];
667 if (g_str_has_prefix(opt, "iblksize=")) {
668 iblksize = g_ascii_strtoll(opt + 9, NULL, 10);
669 } else if (g_str_has_prefix(opt, "iassoc=")) {
670 iassoc = g_ascii_strtoll(opt + 7, NULL, 10);
671 } else if (g_str_has_prefix(opt, "icachesize=")) {
672 icachesize = g_ascii_strtoll(opt + 11, NULL, 10);
673 } else if (g_str_has_prefix(opt, "dblksize=")) {
674 dblksize = g_ascii_strtoll(opt + 9, NULL, 10);
675 } else if (g_str_has_prefix(opt, "dassoc=")) {
676 dassoc = g_ascii_strtoll(opt + 7, NULL, 10);
677 } else if (g_str_has_prefix(opt, "dcachesize=")) {
678 dcachesize = g_ascii_strtoll(opt + 11, NULL, 10);
679 } else if (g_str_has_prefix(opt, "limit=")) {
680 limit = g_ascii_strtoll(opt + 6, NULL, 10);
681 } else if (g_str_has_prefix(opt, "cores=")) {
682 cores = g_ascii_strtoll(opt + 6, NULL, 10);
683 } else if (g_str_has_prefix(opt, "evict=")) {
684 gchar *p = opt + 6;
685 if (g_strcmp0(p, "rand") == 0) {
686 policy = RAND;
687 } else if (g_strcmp0(p, "lru") == 0) {
688 policy = LRU;
689 } else if (g_strcmp0(p, "fifo") == 0) {
690 policy = FIFO;
691 } else {
692 fprintf(stderr, "invalid eviction policy: %s\n", opt);
693 return -1;
695 } else {
696 fprintf(stderr, "option parsing failed: %s\n", opt);
697 return -1;
701 policy_init();
703 dcaches = caches_init(dblksize, dassoc, dcachesize);
704 if (!dcaches) {
705 const char *err = cache_config_error(dblksize, dassoc, dcachesize);
706 fprintf(stderr, "dcache cannot be constructed from given parameters\n");
707 fprintf(stderr, "%s\n", err);
708 return -1;
711 icaches = caches_init(iblksize, iassoc, icachesize);
712 if (!icaches) {
713 const char *err = cache_config_error(iblksize, iassoc, icachesize);
714 fprintf(stderr, "icache cannot be constructed from given parameters\n");
715 fprintf(stderr, "%s\n", err);
716 return -1;
719 dcache_locks = g_new0(GMutex, cores);
720 icache_locks = g_new0(GMutex, cores);
722 qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
723 qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
725 miss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, insn_free);
727 return 0;