2 * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
4 * License: GNU GPL, version 2 or later.
5 * See the COPYING file in the top-level directory.
12 #include <qemu-plugin.h>
14 QEMU_PLUGIN_EXPORT
int qemu_plugin_version
= QEMU_PLUGIN_VERSION
;
16 static enum qemu_plugin_mem_rw rw
= QEMU_PLUGIN_MEM_RW
;
18 static GHashTable
*miss_ht
;
20 static GMutex hashtable_lock
;
32 enum EvictionPolicy policy
;
35 * A CacheSet is a set of cache blocks. A memory block that maps to a set can be
36 * put in any of the blocks inside the set. The number of block per set is
37 * called the associativity (assoc).
39 * Each block contains the the stored tag and a valid bit. Since this is not
40 * a functional simulator, the data itself is not stored. We only identify
41 * whether a block is in the cache or not by searching for its tag.
43 * In order to search for memory data in the cache, the set identifier and tag
44 * are extracted from the address and the set is probed to see whether a tag
47 * An address is logically divided into three portions: The block offset,
48 * the set number, and the tag.
50 * The set number is used to identify the set in which the block may exist.
51 * The tag is compared against all the tags of a set to search for a match. If a
52 * match is found, then the access is a hit.
54 * The CacheSet also contains bookkeaping information about eviction details.
64 uint64_t *lru_priorities
;
65 uint64_t lru_gen_counter
;
89 void (*update_hit
)(Cache
*cache
, int set
, int blk
);
90 void (*update_miss
)(Cache
*cache
, int set
, int blk
);
92 void (*metadata_init
)(Cache
*cache
);
93 void (*metadata_destroy
)(Cache
*cache
);
96 static Cache
**dcaches
, **icaches
;
98 static GMutex
*dcache_locks
;
99 static GMutex
*icache_locks
;
101 static uint64_t all_dmem_accesses
;
102 static uint64_t all_imem_accesses
;
103 static uint64_t all_imisses
;
104 static uint64_t all_dmisses
;
106 static int pow_of_two(int num
)
108 g_assert((num
& (num
- 1)) == 0);
117 * LRU evection policy: For each set, a generation counter is maintained
118 * alongside a priority array.
120 * On each set access, the generation counter is incremented.
122 * On a cache hit: The hit-block is assigned the current generation counter,
123 * indicating that it is the most recently used block.
125 * On a cache miss: The block with the least priority is searched and replaced
126 * with the newly-cached block, of which the priority is set to the current
130 static void lru_priorities_init(Cache
*cache
)
134 for (i
= 0; i
< cache
->num_sets
; i
++) {
135 cache
->sets
[i
].lru_priorities
= g_new0(uint64_t, cache
->assoc
);
136 cache
->sets
[i
].lru_gen_counter
= 0;
140 static void lru_update_blk(Cache
*cache
, int set_idx
, int blk_idx
)
142 CacheSet
*set
= &cache
->sets
[set_idx
];
143 set
->lru_priorities
[blk_idx
] = cache
->sets
[set_idx
].lru_gen_counter
;
144 set
->lru_gen_counter
++;
147 static int lru_get_lru_block(Cache
*cache
, int set_idx
)
149 int i
, min_idx
, min_priority
;
151 min_priority
= cache
->sets
[set_idx
].lru_priorities
[0];
154 for (i
= 1; i
< cache
->assoc
; i
++) {
155 if (cache
->sets
[set_idx
].lru_priorities
[i
] < min_priority
) {
156 min_priority
= cache
->sets
[set_idx
].lru_priorities
[i
];
163 static void lru_priorities_destroy(Cache
*cache
)
167 for (i
= 0; i
< cache
->num_sets
; i
++) {
168 g_free(cache
->sets
[i
].lru_priorities
);
173 * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that
174 * stores accesses to the cache.
176 * On a compulsory miss: The block index is enqueued to the fifo_queue to
177 * indicate that it's the latest cached block.
179 * On a conflict miss: The first-in block is removed from the cache and the new
180 * block is put in its place and enqueued to the FIFO queue.
183 static void fifo_init(Cache
*cache
)
187 for (i
= 0; i
< cache
->num_sets
; i
++) {
188 cache
->sets
[i
].fifo_queue
= g_queue_new();
192 static int fifo_get_first_block(Cache
*cache
, int set
)
194 GQueue
*q
= cache
->sets
[set
].fifo_queue
;
195 return GPOINTER_TO_INT(g_queue_pop_tail(q
));
198 static void fifo_update_on_miss(Cache
*cache
, int set
, int blk_idx
)
200 GQueue
*q
= cache
->sets
[set
].fifo_queue
;
201 g_queue_push_head(q
, GINT_TO_POINTER(blk_idx
));
204 static void fifo_destroy(Cache
*cache
)
208 for (i
= 0; i
< cache
->num_sets
; i
++) {
209 g_queue_free(cache
->sets
[i
].fifo_queue
);
213 static inline uint64_t extract_tag(Cache
*cache
, uint64_t addr
)
215 return addr
& cache
->tag_mask
;
218 static inline uint64_t extract_set(Cache
*cache
, uint64_t addr
)
220 return (addr
& cache
->set_mask
) >> cache
->blksize_shift
;
223 static const char *cache_config_error(int blksize
, int assoc
, int cachesize
)
225 if (cachesize
% blksize
!= 0) {
226 return "cache size must be divisible by block size";
227 } else if (cachesize
% (blksize
* assoc
) != 0) {
228 return "cache size must be divisible by set size (assoc * block size)";
234 static bool bad_cache_params(int blksize
, int assoc
, int cachesize
)
236 return (cachesize
% blksize
) != 0 || (cachesize
% (blksize
* assoc
) != 0);
239 static Cache
*cache_init(int blksize
, int assoc
, int cachesize
)
246 * This function shall not be called directly, and hence expects suitable
249 g_assert(!bad_cache_params(blksize
, assoc
, cachesize
));
251 cache
= g_new(Cache
, 1);
252 cache
->assoc
= assoc
;
253 cache
->cachesize
= cachesize
;
254 cache
->num_sets
= cachesize
/ (blksize
* assoc
);
255 cache
->sets
= g_new(CacheSet
, cache
->num_sets
);
256 cache
->blksize_shift
= pow_of_two(blksize
);
260 for (i
= 0; i
< cache
->num_sets
; i
++) {
261 cache
->sets
[i
].blocks
= g_new0(CacheBlock
, assoc
);
264 blk_mask
= blksize
- 1;
265 cache
->set_mask
= ((cache
->num_sets
- 1) << cache
->blksize_shift
);
266 cache
->tag_mask
= ~(cache
->set_mask
| blk_mask
);
269 metadata_init(cache
);
275 static Cache
**caches_init(int blksize
, int assoc
, int cachesize
)
280 if (bad_cache_params(blksize
, assoc
, cachesize
)) {
284 caches
= g_new(Cache
*, cores
);
286 for (i
= 0; i
< cores
; i
++) {
287 caches
[i
] = cache_init(blksize
, assoc
, cachesize
);
293 static int get_invalid_block(Cache
*cache
, uint64_t set
)
297 for (i
= 0; i
< cache
->assoc
; i
++) {
298 if (!cache
->sets
[set
].blocks
[i
].valid
) {
306 static int get_replaced_block(Cache
*cache
, int set
)
310 return g_rand_int_range(rng
, 0, cache
->assoc
);
312 return lru_get_lru_block(cache
, set
);
314 return fifo_get_first_block(cache
, set
);
316 g_assert_not_reached();
320 static int in_cache(Cache
*cache
, uint64_t addr
)
325 tag
= extract_tag(cache
, addr
);
326 set
= extract_set(cache
, addr
);
328 for (i
= 0; i
< cache
->assoc
; i
++) {
329 if (cache
->sets
[set
].blocks
[i
].tag
== tag
&&
330 cache
->sets
[set
].blocks
[i
].valid
) {
339 * access_cache(): Simulate a cache access
340 * @cache: The cache under simulation
341 * @addr: The address of the requested memory location
343 * Returns true if the requsted data is hit in the cache and false when missed.
344 * The cache is updated on miss for the next access.
346 static bool access_cache(Cache
*cache
, uint64_t addr
)
348 int hit_blk
, replaced_blk
;
351 tag
= extract_tag(cache
, addr
);
352 set
= extract_set(cache
, addr
);
354 hit_blk
= in_cache(cache
, addr
);
357 update_hit(cache
, set
, hit_blk
);
362 replaced_blk
= get_invalid_block(cache
, set
);
364 if (replaced_blk
== -1) {
365 replaced_blk
= get_replaced_block(cache
, set
);
369 update_miss(cache
, set
, replaced_blk
);
372 cache
->sets
[set
].blocks
[replaced_blk
].tag
= tag
;
373 cache
->sets
[set
].blocks
[replaced_blk
].valid
= true;
378 static void vcpu_mem_access(unsigned int vcpu_index
, qemu_plugin_meminfo_t info
,
379 uint64_t vaddr
, void *userdata
)
381 uint64_t effective_addr
;
382 struct qemu_plugin_hwaddr
*hwaddr
;
386 hwaddr
= qemu_plugin_get_hwaddr(info
, vaddr
);
387 if (hwaddr
&& qemu_plugin_hwaddr_is_io(hwaddr
)) {
391 effective_addr
= hwaddr
? qemu_plugin_hwaddr_phys_addr(hwaddr
) : vaddr
;
392 cache_idx
= vcpu_index
% cores
;
394 g_mutex_lock(&dcache_locks
[cache_idx
]);
395 if (!access_cache(dcaches
[cache_idx
], effective_addr
)) {
396 insn
= (InsnData
*) userdata
;
397 __atomic_fetch_add(&insn
->dmisses
, 1, __ATOMIC_SEQ_CST
);
398 dcaches
[cache_idx
]->misses
++;
400 dcaches
[cache_idx
]->accesses
++;
401 g_mutex_unlock(&dcache_locks
[cache_idx
]);
404 static void vcpu_insn_exec(unsigned int vcpu_index
, void *userdata
)
410 insn_addr
= ((InsnData
*) userdata
)->addr
;
412 cache_idx
= vcpu_index
% cores
;
413 g_mutex_lock(&icache_locks
[cache_idx
]);
414 if (!access_cache(icaches
[cache_idx
], insn_addr
)) {
415 insn
= (InsnData
*) userdata
;
416 __atomic_fetch_add(&insn
->imisses
, 1, __ATOMIC_SEQ_CST
);
417 icaches
[cache_idx
]->misses
++;
419 icaches
[cache_idx
]->accesses
++;
420 g_mutex_unlock(&icache_locks
[cache_idx
]);
423 static void vcpu_tb_trans(qemu_plugin_id_t id
, struct qemu_plugin_tb
*tb
)
429 n_insns
= qemu_plugin_tb_n_insns(tb
);
430 for (i
= 0; i
< n_insns
; i
++) {
431 struct qemu_plugin_insn
*insn
= qemu_plugin_tb_get_insn(tb
, i
);
432 uint64_t effective_addr
;
435 effective_addr
= (uint64_t) qemu_plugin_insn_haddr(insn
);
437 effective_addr
= (uint64_t) qemu_plugin_insn_vaddr(insn
);
441 * Instructions might get translated multiple times, we do not create
442 * new entries for those instructions. Instead, we fetch the same
443 * entry from the hash table and register it for the callback again.
445 g_mutex_lock(&hashtable_lock
);
446 data
= g_hash_table_lookup(miss_ht
, GUINT_TO_POINTER(effective_addr
));
448 data
= g_new0(InsnData
, 1);
449 data
->disas_str
= qemu_plugin_insn_disas(insn
);
450 data
->symbol
= qemu_plugin_insn_symbol(insn
);
451 data
->addr
= effective_addr
;
452 g_hash_table_insert(miss_ht
, GUINT_TO_POINTER(effective_addr
),
455 g_mutex_unlock(&hashtable_lock
);
457 qemu_plugin_register_vcpu_mem_cb(insn
, vcpu_mem_access
,
458 QEMU_PLUGIN_CB_NO_REGS
,
461 qemu_plugin_register_vcpu_insn_exec_cb(insn
, vcpu_insn_exec
,
462 QEMU_PLUGIN_CB_NO_REGS
, data
);
466 static void insn_free(gpointer data
)
468 InsnData
*insn
= (InsnData
*) data
;
469 g_free(insn
->disas_str
);
473 static void cache_free(Cache
*cache
)
475 for (int i
= 0; i
< cache
->num_sets
; i
++) {
476 g_free(cache
->sets
[i
].blocks
);
479 if (metadata_destroy
) {
480 metadata_destroy(cache
);
487 static void caches_free(Cache
**caches
)
491 for (i
= 0; i
< cores
; i
++) {
492 cache_free(caches
[i
]);
496 static int dcmp(gconstpointer a
, gconstpointer b
)
498 InsnData
*insn_a
= (InsnData
*) a
;
499 InsnData
*insn_b
= (InsnData
*) b
;
501 return insn_a
->dmisses
< insn_b
->dmisses
? 1 : -1;
504 static void append_stats_line(GString
*line
, uint64_t daccess
, uint64_t dmisses
,
505 uint64_t iaccess
, uint64_t imisses
)
507 double dmiss_rate
, imiss_rate
;
509 dmiss_rate
= ((double) dmisses
) / (daccess
) * 100.0;
510 imiss_rate
= ((double) imisses
) / (iaccess
) * 100.0;
512 g_string_append_printf(line
, "%-14lu %-12lu %9.4lf%% %-14lu %-12lu"
516 daccess
? dmiss_rate
: 0.0,
519 iaccess
? imiss_rate
: 0.0);
522 static void sum_stats(void)
527 for (i
= 0; i
< cores
; i
++) {
528 all_imisses
+= icaches
[i
]->misses
;
529 all_dmisses
+= dcaches
[i
]->misses
;
530 all_imem_accesses
+= icaches
[i
]->accesses
;
531 all_dmem_accesses
+= dcaches
[i
]->accesses
;
535 static int icmp(gconstpointer a
, gconstpointer b
)
537 InsnData
*insn_a
= (InsnData
*) a
;
538 InsnData
*insn_b
= (InsnData
*) b
;
540 return insn_a
->imisses
< insn_b
->imisses
? 1 : -1;
543 static void log_stats(void)
546 Cache
*icache
, *dcache
;
548 g_autoptr(GString
) rep
= g_string_new("core #, data accesses, data misses,"
549 " dmiss rate, insn accesses,"
550 " insn misses, imiss rate\n");
552 for (i
= 0; i
< cores
; i
++) {
553 g_string_append_printf(rep
, "%-8d", i
);
556 append_stats_line(rep
, dcache
->accesses
, dcache
->misses
,
557 icache
->accesses
, icache
->misses
);
562 g_string_append_printf(rep
, "%-8s", "sum");
563 append_stats_line(rep
, all_dmem_accesses
, all_dmisses
,
564 all_imem_accesses
, all_imisses
);
567 g_string_append(rep
, "\n");
568 qemu_plugin_outs(rep
->str
);
571 static void log_top_insns(void)
574 GList
*curr
, *miss_insns
;
577 miss_insns
= g_hash_table_get_values(miss_ht
);
578 miss_insns
= g_list_sort(miss_insns
, dcmp
);
579 g_autoptr(GString
) rep
= g_string_new("");
580 g_string_append_printf(rep
, "%s", "address, data misses, instruction\n");
582 for (curr
= miss_insns
, i
= 0; curr
&& i
< limit
; i
++, curr
= curr
->next
) {
583 insn
= (InsnData
*) curr
->data
;
584 g_string_append_printf(rep
, "0x%" PRIx64
, insn
->addr
);
586 g_string_append_printf(rep
, " (%s)", insn
->symbol
);
588 g_string_append_printf(rep
, ", %ld, %s\n", insn
->dmisses
,
592 miss_insns
= g_list_sort(miss_insns
, icmp
);
593 g_string_append_printf(rep
, "%s", "\naddress, fetch misses, instruction\n");
595 for (curr
= miss_insns
, i
= 0; curr
&& i
< limit
; i
++, curr
= curr
->next
) {
596 insn
= (InsnData
*) curr
->data
;
597 g_string_append_printf(rep
, "0x%" PRIx64
, insn
->addr
);
599 g_string_append_printf(rep
, " (%s)", insn
->symbol
);
601 g_string_append_printf(rep
, ", %ld, %s\n", insn
->imisses
,
605 qemu_plugin_outs(rep
->str
);
606 g_list_free(miss_insns
);
609 static void plugin_exit(qemu_plugin_id_t id
, void *p
)
614 caches_free(dcaches
);
615 caches_free(icaches
);
617 g_hash_table_destroy(miss_ht
);
620 static void policy_init(void)
624 update_hit
= lru_update_blk
;
625 update_miss
= lru_update_blk
;
626 metadata_init
= lru_priorities_init
;
627 metadata_destroy
= lru_priorities_destroy
;
630 update_miss
= fifo_update_on_miss
;
631 metadata_init
= fifo_init
;
632 metadata_destroy
= fifo_destroy
;
638 g_assert_not_reached();
643 int qemu_plugin_install(qemu_plugin_id_t id
, const qemu_info_t
*info
,
644 int argc
, char **argv
)
647 int iassoc
, iblksize
, icachesize
;
648 int dassoc
, dblksize
, dcachesize
;
651 sys
= info
->system_emulation
;
655 dcachesize
= dblksize
* dassoc
* 32;
659 icachesize
= iblksize
* iassoc
* 32;
663 cores
= sys
? qemu_plugin_n_vcpus() : 1;
665 for (i
= 0; i
< argc
; i
++) {
667 if (g_str_has_prefix(opt
, "iblksize=")) {
668 iblksize
= g_ascii_strtoll(opt
+ 9, NULL
, 10);
669 } else if (g_str_has_prefix(opt
, "iassoc=")) {
670 iassoc
= g_ascii_strtoll(opt
+ 7, NULL
, 10);
671 } else if (g_str_has_prefix(opt
, "icachesize=")) {
672 icachesize
= g_ascii_strtoll(opt
+ 11, NULL
, 10);
673 } else if (g_str_has_prefix(opt
, "dblksize=")) {
674 dblksize
= g_ascii_strtoll(opt
+ 9, NULL
, 10);
675 } else if (g_str_has_prefix(opt
, "dassoc=")) {
676 dassoc
= g_ascii_strtoll(opt
+ 7, NULL
, 10);
677 } else if (g_str_has_prefix(opt
, "dcachesize=")) {
678 dcachesize
= g_ascii_strtoll(opt
+ 11, NULL
, 10);
679 } else if (g_str_has_prefix(opt
, "limit=")) {
680 limit
= g_ascii_strtoll(opt
+ 6, NULL
, 10);
681 } else if (g_str_has_prefix(opt
, "cores=")) {
682 cores
= g_ascii_strtoll(opt
+ 6, NULL
, 10);
683 } else if (g_str_has_prefix(opt
, "evict=")) {
685 if (g_strcmp0(p
, "rand") == 0) {
687 } else if (g_strcmp0(p
, "lru") == 0) {
689 } else if (g_strcmp0(p
, "fifo") == 0) {
692 fprintf(stderr
, "invalid eviction policy: %s\n", opt
);
696 fprintf(stderr
, "option parsing failed: %s\n", opt
);
703 dcaches
= caches_init(dblksize
, dassoc
, dcachesize
);
705 const char *err
= cache_config_error(dblksize
, dassoc
, dcachesize
);
706 fprintf(stderr
, "dcache cannot be constructed from given parameters\n");
707 fprintf(stderr
, "%s\n", err
);
711 icaches
= caches_init(iblksize
, iassoc
, icachesize
);
713 const char *err
= cache_config_error(iblksize
, iassoc
, icachesize
);
714 fprintf(stderr
, "icache cannot be constructed from given parameters\n");
715 fprintf(stderr
, "%s\n", err
);
719 dcache_locks
= g_new0(GMutex
, cores
);
720 icache_locks
= g_new0(GMutex
, cores
);
722 qemu_plugin_register_vcpu_tb_trans_cb(id
, vcpu_tb_trans
);
723 qemu_plugin_register_atexit_cb(id
, plugin_exit
, NULL
);
725 miss_ht
= g_hash_table_new_full(NULL
, g_direct_equal
, NULL
, insn_free
);