2 * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
4 * How vectorised is this code?
6 * Attempt to measure the amount of vectorisation that has been done
7 * on some code by counting classes of instruction.
9 * License: GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
21 #include <qemu-plugin.h>
23 QEMU_PLUGIN_EXPORT
int qemu_plugin_version
= QEMU_PLUGIN_VERSION
;
25 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
33 static int limit
= 50;
34 static bool do_inline
;
38 static GHashTable
*insns
;
46 qemu_plugin_u64 count
;
52 qemu_plugin_u64 count
;
53 InsnClassExecCount
*class;
57 * Matchers for classes of instructions, order is important.
59 * Your most precise match must be before looser matches. If no match
60 * is found in the table we can create an individual entry.
62 * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0
64 static InsnClassExecCount aarch64_insn_classes
[] = {
66 { " UDEF", "udef", 0xffff0000, 0x00000000, COUNT_NONE
},
67 { " SVE", "sve", 0x1e000000, 0x04000000, COUNT_CLASS
},
68 { "Reserved", "res", 0x1e000000, 0x00000000, COUNT_CLASS
},
69 /* Data Processing Immediate */
70 { " PCrel addr", "pcrel", 0x1f000000, 0x10000000, COUNT_CLASS
},
71 { " Add/Sub (imm,tags)", "asit", 0x1f800000, 0x11800000, COUNT_CLASS
},
72 { " Add/Sub (imm)", "asi", 0x1f000000, 0x11000000, COUNT_CLASS
},
73 { " Logical (imm)", "logi", 0x1f800000, 0x12000000, COUNT_CLASS
},
74 { " Move Wide (imm)", "movwi", 0x1f800000, 0x12800000, COUNT_CLASS
},
75 { " Bitfield", "bitf", 0x1f800000, 0x13000000, COUNT_CLASS
},
76 { " Extract", "extr", 0x1f800000, 0x13800000, COUNT_CLASS
},
77 { "Data Proc Imm", "dpri", 0x1c000000, 0x10000000, COUNT_CLASS
},
79 { " Cond Branch (imm)", "cndb", 0xfe000000, 0x54000000, COUNT_CLASS
},
80 { " Exception Gen", "excp", 0xff000000, 0xd4000000, COUNT_CLASS
},
81 { " NOP", "nop", 0xffffffff, 0xd503201f, COUNT_NONE
},
82 { " Hints", "hint", 0xfffff000, 0xd5032000, COUNT_CLASS
},
83 { " Barriers", "barr", 0xfffff000, 0xd5033000, COUNT_CLASS
},
84 { " PSTATE", "psta", 0xfff8f000, 0xd5004000, COUNT_CLASS
},
85 { " System Insn", "sins", 0xffd80000, 0xd5080000, COUNT_CLASS
},
86 { " System Reg", "sreg", 0xffd00000, 0xd5100000, COUNT_CLASS
},
87 { " Branch (reg)", "breg", 0xfe000000, 0xd6000000, COUNT_CLASS
},
88 { " Branch (imm)", "bimm", 0x7c000000, 0x14000000, COUNT_CLASS
},
89 { " Cmp & Branch", "cmpb", 0x7e000000, 0x34000000, COUNT_CLASS
},
90 { " Tst & Branch", "tstb", 0x7e000000, 0x36000000, COUNT_CLASS
},
91 { "Branches", "branch", 0x1c000000, 0x14000000, COUNT_CLASS
},
92 /* Loads and Stores */
93 { " AdvSimd ldstmult", "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS
},
94 { " AdvSimd ldstmult++", "advlsmp", 0xbfb00000, 0x0c800000, COUNT_CLASS
},
95 { " AdvSimd ldst", "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS
},
96 { " AdvSimd ldst++", "advlssp", 0xbf800000, 0x0d800000, COUNT_CLASS
},
97 { " ldst excl", "ldstx", 0x3f000000, 0x08000000, COUNT_CLASS
},
98 { " Prefetch", "prfm", 0xff000000, 0xd8000000, COUNT_CLASS
},
99 { " Load Reg (lit)", "ldlit", 0x1b000000, 0x18000000, COUNT_CLASS
},
100 { " ldst noalloc pair", "ldstnap", 0x3b800000, 0x28000000, COUNT_CLASS
},
101 { " ldst pair", "ldstp", 0x38000000, 0x28000000, COUNT_CLASS
},
102 { " ldst reg", "ldstr", 0x3b200000, 0x38000000, COUNT_CLASS
},
103 { " Atomic ldst", "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS
},
104 { " ldst reg (reg off)", "ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS
},
105 { " ldst reg (pac)", "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS
},
106 { " ldst reg (imm)", "ldsti", 0x3b000000, 0x39000000, COUNT_CLASS
},
107 { "Loads & Stores", "ldst", 0x0a000000, 0x08000000, COUNT_CLASS
},
108 /* Data Processing Register */
109 { "Data Proc Reg", "dprr", 0x0e000000, 0x0a000000, COUNT_CLASS
},
111 { "Scalar FP ", "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS
},
113 { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_CLASS
},
116 static InsnClassExecCount sparc32_insn_classes
[] = {
117 { "Call", "call", 0xc0000000, 0x40000000, COUNT_CLASS
},
118 { "Branch ICond", "bcc", 0xc1c00000, 0x00800000, COUNT_CLASS
},
119 { "Branch Fcond", "fbcc", 0xc1c00000, 0x01800000, COUNT_CLASS
},
120 { "SetHi", "sethi", 0xc1c00000, 0x01000000, COUNT_CLASS
},
121 { "FPU ALU", "fpu", 0xc1f00000, 0x81a00000, COUNT_CLASS
},
122 { "ALU", "alu", 0xc0000000, 0x80000000, COUNT_CLASS
},
123 { "Load/Store", "ldst", 0xc0000000, 0xc0000000, COUNT_CLASS
},
125 { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL
},
128 static InsnClassExecCount sparc64_insn_classes
[] = {
129 { "SetHi & Branches", "op0", 0xc0000000, 0x00000000, COUNT_CLASS
},
130 { "Call", "op1", 0xc0000000, 0x40000000, COUNT_CLASS
},
131 { "Arith/Logical/Move", "op2", 0xc0000000, 0x80000000, COUNT_CLASS
},
132 { "Arith/Logical/Move", "op3", 0xc0000000, 0xc0000000, COUNT_CLASS
},
134 { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL
},
137 /* Default matcher for currently unclassified architectures */
138 static InsnClassExecCount default_insn_classes
[] = {
139 { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL
},
143 const char *qemu_target
;
144 InsnClassExecCount
*table
;
148 static ClassSelector class_tables
[] = {
149 { "aarch64", aarch64_insn_classes
, ARRAY_SIZE(aarch64_insn_classes
) },
150 { "sparc", sparc32_insn_classes
, ARRAY_SIZE(sparc32_insn_classes
) },
151 { "sparc64", sparc64_insn_classes
, ARRAY_SIZE(sparc64_insn_classes
) },
152 { NULL
, default_insn_classes
, ARRAY_SIZE(default_insn_classes
) },
155 static InsnClassExecCount
*class_table
;
156 static int class_table_sz
;
158 static gint
cmp_exec_count(gconstpointer a
, gconstpointer b
)
160 InsnExecCount
*ea
= (InsnExecCount
*) a
;
161 InsnExecCount
*eb
= (InsnExecCount
*) b
;
162 uint64_t count_a
= qemu_plugin_u64_sum(ea
->count
);
163 uint64_t count_b
= qemu_plugin_u64_sum(eb
->count
);
164 return count_a
> count_b
? -1 : 1;
167 static void free_record(gpointer data
)
169 InsnExecCount
*rec
= (InsnExecCount
*) data
;
170 qemu_plugin_scoreboard_free(rec
->count
.score
);
175 static void plugin_exit(qemu_plugin_id_t id
, void *p
)
177 g_autoptr(GString
) report
= g_string_new("Instruction Classes:\n");
179 uint64_t total_count
;
181 InsnClassExecCount
*class = NULL
;
183 for (i
= 0; i
< class_table_sz
; i
++) {
184 class = &class_table
[i
];
185 switch (class->what
) {
187 total_count
= qemu_plugin_u64_sum(class->count
);
188 if (total_count
|| verbose
) {
189 g_string_append_printf(report
,
190 "Class: %-24s\t(%" PRId64
" hits)\n",
195 case COUNT_INDIVIDUAL
:
196 g_string_append_printf(report
, "Class: %-24s\tcounted individually\n",
200 g_string_append_printf(report
, "Class: %-24s\tnot counted\n",
208 counts
= g_hash_table_get_values(insns
);
209 if (counts
&& g_list_next(counts
)) {
210 g_string_append_printf(report
, "Individual Instructions:\n");
211 counts
= g_list_sort(counts
, cmp_exec_count
);
213 for (i
= 0; i
< limit
&& g_list_next(counts
);
214 i
++, counts
= g_list_next(counts
)) {
215 InsnExecCount
*rec
= (InsnExecCount
*) counts
->data
;
216 g_string_append_printf(report
,
217 "Instr: %-24s\t(%" PRId64
" hits)"
218 "\t(op=0x%08x/%s)\n",
220 qemu_plugin_u64_sum(rec
->count
),
223 rec
->class->class : "un-categorised");
228 g_hash_table_destroy(insns
);
229 for (i
= 0; i
< ARRAY_SIZE(class_tables
); i
++) {
230 for (int j
= 0; j
< class_tables
[i
].table_sz
; ++j
) {
231 qemu_plugin_scoreboard_free(class_tables
[i
].table
[j
].count
.score
);
236 qemu_plugin_outs(report
->str
);
239 static void plugin_init(void)
241 insns
= g_hash_table_new_full(NULL
, g_direct_equal
, NULL
, &free_record
);
244 static void vcpu_insn_exec_before(unsigned int cpu_index
, void *udata
)
246 struct qemu_plugin_scoreboard
*score
= udata
;
247 qemu_plugin_u64_add(qemu_plugin_scoreboard_u64(score
), cpu_index
, 1);
250 static struct qemu_plugin_scoreboard
*find_counter(
251 struct qemu_plugin_insn
*insn
)
254 uint64_t *cnt
= NULL
;
256 InsnClassExecCount
*class = NULL
;
259 * We only match the first 32 bits of the instruction which is
260 * fine for most RISCs but a bit limiting for CISC architectures.
261 * They would probably benefit from a more tailored plugin.
262 * However we can fall back to individual instruction counting.
264 opcode
= *((uint32_t *)qemu_plugin_insn_data(insn
));
266 for (i
= 0; !cnt
&& i
< class_table_sz
; i
++) {
267 class = &class_table
[i
];
268 uint32_t masked_bits
= opcode
& class->mask
;
269 if (masked_bits
== class->pattern
) {
276 switch (class->what
) {
280 return class->count
.score
;
281 case COUNT_INDIVIDUAL
:
283 InsnExecCount
*icount
;
286 icount
= (InsnExecCount
*) g_hash_table_lookup(insns
,
287 GUINT_TO_POINTER(opcode
));
290 icount
= g_new0(InsnExecCount
, 1);
291 icount
->opcode
= opcode
;
292 icount
->insn
= qemu_plugin_insn_disas(insn
);
293 icount
->class = class;
294 struct qemu_plugin_scoreboard
*score
=
295 qemu_plugin_scoreboard_new(sizeof(uint64_t));
296 icount
->count
= qemu_plugin_scoreboard_u64(score
);
298 g_hash_table_insert(insns
, GUINT_TO_POINTER(opcode
),
301 g_mutex_unlock(&lock
);
303 return icount
->count
.score
;
306 g_assert_not_reached();
312 static void vcpu_tb_trans(qemu_plugin_id_t id
, struct qemu_plugin_tb
*tb
)
314 size_t n
= qemu_plugin_tb_n_insns(tb
);
317 for (i
= 0; i
< n
; i
++) {
318 struct qemu_plugin_insn
*insn
= qemu_plugin_tb_get_insn(tb
, i
);
319 struct qemu_plugin_scoreboard
*cnt
= find_counter(insn
);
323 qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(
324 insn
, QEMU_PLUGIN_INLINE_ADD_U64
,
325 qemu_plugin_scoreboard_u64(cnt
), 1);
327 qemu_plugin_register_vcpu_insn_exec_cb(
328 insn
, vcpu_insn_exec_before
, QEMU_PLUGIN_CB_NO_REGS
, cnt
);
334 QEMU_PLUGIN_EXPORT
int qemu_plugin_install(qemu_plugin_id_t id
,
335 const qemu_info_t
*info
,
336 int argc
, char **argv
)
340 for (i
= 0; i
< ARRAY_SIZE(class_tables
); i
++) {
341 for (int j
= 0; j
< class_tables
[i
].table_sz
; ++j
) {
342 struct qemu_plugin_scoreboard
*score
=
343 qemu_plugin_scoreboard_new(sizeof(uint64_t));
344 class_tables
[i
].table
[j
].count
= qemu_plugin_scoreboard_u64(score
);
348 /* Select a class table appropriate to the guest architecture */
349 for (i
= 0; i
< ARRAY_SIZE(class_tables
); i
++) {
350 ClassSelector
*entry
= &class_tables
[i
];
351 if (!entry
->qemu_target
||
352 strcmp(entry
->qemu_target
, info
->target_name
) == 0) {
353 class_table
= entry
->table
;
354 class_table_sz
= entry
->table_sz
;
359 for (i
= 0; i
< argc
; i
++) {
361 g_auto(GStrv
) tokens
= g_strsplit(p
, "=", -1);
362 if (g_strcmp0(tokens
[0], "inline") == 0) {
363 if (!qemu_plugin_bool_parse(tokens
[0], tokens
[1], &do_inline
)) {
364 fprintf(stderr
, "boolean argument parsing failed: %s\n", p
);
367 } else if (g_strcmp0(tokens
[0], "verbose") == 0) {
368 if (!qemu_plugin_bool_parse(tokens
[0], tokens
[1], &verbose
)) {
369 fprintf(stderr
, "boolean argument parsing failed: %s\n", p
);
372 } else if (g_strcmp0(tokens
[0], "count") == 0) {
373 char *value
= tokens
[1];
375 CountType type
= COUNT_INDIVIDUAL
;
380 for (j
= 0; j
< class_table_sz
; j
++) {
381 if (strcmp(value
, class_table
[j
].opt
) == 0) {
382 class_table
[j
].what
= type
;
387 fprintf(stderr
, "option parsing failed: %s\n", p
);
394 qemu_plugin_register_vcpu_tb_trans_cb(id
, vcpu_tb_trans
);
395 qemu_plugin_register_atexit_cb(id
, plugin_exit
, NULL
);