target/arm: Convert SMULL, UMULL, SMLAL, UMLAL, SMLSL, UMLSL to decodetree
[qemu/ar7.git] / tcg / perf.c
blob412a987d9561bfa21f8e961bd9d28d6d564fd5e8
1 /*
2 * Linux perf perf-<pid>.map and jit-<pid>.dump integration.
4 * The jitdump spec can be found at [1].
6 * [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/tools/perf/Documentation/jitdump-specification.txt
8 * SPDX-License-Identifier: GPL-2.0-or-later
9 */
11 #include "qemu/osdep.h"
12 #include "elf.h"
13 #include "exec/target_page.h"
14 #include "exec/translation-block.h"
15 #include "qemu/timer.h"
16 #include "tcg/debuginfo.h"
17 #include "tcg/perf.h"
18 #include "tcg/tcg.h"
20 static FILE *safe_fopen_w(const char *path)
22 int saved_errno;
23 FILE *f;
24 int fd;
26 /* Delete the old file, if any. */
27 unlink(path);
29 /* Avoid symlink attacks by using O_CREAT | O_EXCL. */
30 fd = open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
31 if (fd == -1) {
32 return NULL;
35 /* Convert fd to FILE*. */
36 f = fdopen(fd, "w");
37 if (f == NULL) {
38 saved_errno = errno;
39 close(fd);
40 errno = saved_errno;
41 return NULL;
44 return f;
47 static FILE *perfmap;
49 void perf_enable_perfmap(void)
51 char map_file[32];
53 snprintf(map_file, sizeof(map_file), "/tmp/perf-%d.map", getpid());
54 perfmap = safe_fopen_w(map_file);
55 if (perfmap == NULL) {
56 warn_report("Could not open %s: %s, proceeding without perfmap",
57 map_file, strerror(errno));
61 /* Get PC and size of code JITed for guest instruction #INSN. */
62 static void get_host_pc_size(uintptr_t *host_pc, uint16_t *host_size,
63 const void *start, size_t insn)
65 uint16_t start_off = insn ? tcg_ctx->gen_insn_end_off[insn - 1] : 0;
67 if (host_pc) {
68 *host_pc = (uintptr_t)start + start_off;
70 if (host_size) {
71 *host_size = tcg_ctx->gen_insn_end_off[insn] - start_off;
75 static const char *pretty_symbol(const struct debuginfo_query *q, size_t *len)
77 static __thread char buf[64];
78 int tmp;
80 if (!q->symbol) {
81 tmp = snprintf(buf, sizeof(buf), "guest-0x%"PRIx64, q->address);
82 if (len) {
83 *len = MIN(tmp + 1, sizeof(buf));
85 return buf;
88 if (!q->offset) {
89 if (len) {
90 *len = strlen(q->symbol) + 1;
92 return q->symbol;
95 tmp = snprintf(buf, sizeof(buf), "%s+0x%"PRIx64, q->symbol, q->offset);
96 if (len) {
97 *len = MIN(tmp + 1, sizeof(buf));
99 return buf;
102 static void write_perfmap_entry(const void *start, size_t insn,
103 const struct debuginfo_query *q)
105 uint16_t host_size;
106 uintptr_t host_pc;
108 get_host_pc_size(&host_pc, &host_size, start, insn);
109 fprintf(perfmap, "%"PRIxPTR" %"PRIx16" %s\n",
110 host_pc, host_size, pretty_symbol(q, NULL));
113 static FILE *jitdump;
114 static size_t perf_marker_size;
115 static void *perf_marker = MAP_FAILED;
117 #define JITHEADER_MAGIC 0x4A695444
118 #define JITHEADER_VERSION 1
120 struct jitheader {
121 uint32_t magic;
122 uint32_t version;
123 uint32_t total_size;
124 uint32_t elf_mach;
125 uint32_t pad1;
126 uint32_t pid;
127 uint64_t timestamp;
128 uint64_t flags;
131 enum jit_record_type {
132 JIT_CODE_LOAD = 0,
133 JIT_CODE_DEBUG_INFO = 2,
136 struct jr_prefix {
137 uint32_t id;
138 uint32_t total_size;
139 uint64_t timestamp;
142 struct jr_code_load {
143 struct jr_prefix p;
145 uint32_t pid;
146 uint32_t tid;
147 uint64_t vma;
148 uint64_t code_addr;
149 uint64_t code_size;
150 uint64_t code_index;
153 struct debug_entry {
154 uint64_t addr;
155 int lineno;
156 int discrim;
157 const char name[];
160 struct jr_code_debug_info {
161 struct jr_prefix p;
163 uint64_t code_addr;
164 uint64_t nr_entry;
165 struct debug_entry entries[];
168 static uint32_t get_e_machine(void)
170 Elf64_Ehdr elf_header;
171 FILE *exe;
172 size_t n;
174 QEMU_BUILD_BUG_ON(offsetof(Elf32_Ehdr, e_machine) !=
175 offsetof(Elf64_Ehdr, e_machine));
177 exe = fopen("/proc/self/exe", "r");
178 if (exe == NULL) {
179 return EM_NONE;
182 n = fread(&elf_header, sizeof(elf_header), 1, exe);
183 fclose(exe);
184 if (n != 1) {
185 return EM_NONE;
188 return elf_header.e_machine;
191 void perf_enable_jitdump(void)
193 struct jitheader header;
194 char jitdump_file[32];
196 if (!use_rt_clock) {
197 warn_report("CLOCK_MONOTONIC is not available, proceeding without jitdump");
198 return;
201 snprintf(jitdump_file, sizeof(jitdump_file), "jit-%d.dump", getpid());
202 jitdump = safe_fopen_w(jitdump_file);
203 if (jitdump == NULL) {
204 warn_report("Could not open %s: %s, proceeding without jitdump",
205 jitdump_file, strerror(errno));
206 return;
210 * `perf inject` will see that the mapped file name in the corresponding
211 * PERF_RECORD_MMAP or PERF_RECORD_MMAP2 event is of the form jit-%d.dump
212 * and will process it as a jitdump file.
214 perf_marker_size = qemu_real_host_page_size();
215 perf_marker = mmap(NULL, perf_marker_size, PROT_READ | PROT_EXEC,
216 MAP_PRIVATE, fileno(jitdump), 0);
217 if (perf_marker == MAP_FAILED) {
218 warn_report("Could not map %s: %s, proceeding without jitdump",
219 jitdump_file, strerror(errno));
220 fclose(jitdump);
221 jitdump = NULL;
222 return;
225 header.magic = JITHEADER_MAGIC;
226 header.version = JITHEADER_VERSION;
227 header.total_size = sizeof(header);
228 header.elf_mach = get_e_machine();
229 header.pad1 = 0;
230 header.pid = getpid();
231 header.timestamp = get_clock();
232 header.flags = 0;
233 fwrite(&header, sizeof(header), 1, jitdump);
236 void perf_report_prologue(const void *start, size_t size)
238 if (perfmap) {
239 fprintf(perfmap, "%"PRIxPTR" %zx tcg-prologue-buffer\n",
240 (uintptr_t)start, size);
244 /* Write a JIT_CODE_DEBUG_INFO jitdump entry. */
245 static void write_jr_code_debug_info(const void *start,
246 const struct debuginfo_query *q,
247 size_t icount)
249 struct jr_code_debug_info rec;
250 struct debug_entry ent;
251 uintptr_t host_pc;
252 int insn;
254 /* Write the header. */
255 rec.p.id = JIT_CODE_DEBUG_INFO;
256 rec.p.total_size = sizeof(rec) + sizeof(ent) + 1;
257 rec.p.timestamp = get_clock();
258 rec.code_addr = (uintptr_t)start;
259 rec.nr_entry = 1;
260 for (insn = 0; insn < icount; insn++) {
261 if (q[insn].file) {
262 rec.p.total_size += sizeof(ent) + strlen(q[insn].file) + 1;
263 rec.nr_entry++;
266 fwrite(&rec, sizeof(rec), 1, jitdump);
268 /* Write the main debug entries. */
269 for (insn = 0; insn < icount; insn++) {
270 if (q[insn].file) {
271 get_host_pc_size(&host_pc, NULL, start, insn);
272 ent.addr = host_pc;
273 ent.lineno = q[insn].line;
274 ent.discrim = 0;
275 fwrite(&ent, sizeof(ent), 1, jitdump);
276 fwrite(q[insn].file, strlen(q[insn].file) + 1, 1, jitdump);
280 /* Write the trailing debug_entry. */
281 ent.addr = (uintptr_t)start + tcg_ctx->gen_insn_end_off[icount - 1];
282 ent.lineno = 0;
283 ent.discrim = 0;
284 fwrite(&ent, sizeof(ent), 1, jitdump);
285 fwrite("", 1, 1, jitdump);
288 /* Write a JIT_CODE_LOAD jitdump entry. */
289 static void write_jr_code_load(const void *start, uint16_t host_size,
290 const struct debuginfo_query *q)
292 static uint64_t code_index;
293 struct jr_code_load rec;
294 const char *symbol;
295 size_t symbol_size;
297 symbol = pretty_symbol(q, &symbol_size);
298 rec.p.id = JIT_CODE_LOAD;
299 rec.p.total_size = sizeof(rec) + symbol_size + host_size;
300 rec.p.timestamp = get_clock();
301 rec.pid = getpid();
302 rec.tid = qemu_get_thread_id();
303 rec.vma = (uintptr_t)start;
304 rec.code_addr = (uintptr_t)start;
305 rec.code_size = host_size;
306 rec.code_index = code_index++;
307 fwrite(&rec, sizeof(rec), 1, jitdump);
308 fwrite(symbol, symbol_size, 1, jitdump);
309 fwrite(start, host_size, 1, jitdump);
312 void perf_report_code(uint64_t guest_pc, TranslationBlock *tb,
313 const void *start)
315 struct debuginfo_query *q;
316 size_t insn, start_words;
317 uint64_t *gen_insn_data;
319 if (!perfmap && !jitdump) {
320 return;
323 q = g_try_malloc0_n(tb->icount, sizeof(*q));
324 if (!q) {
325 return;
328 debuginfo_lock();
330 /* Query debuginfo for each guest instruction. */
331 gen_insn_data = tcg_ctx->gen_insn_data;
332 start_words = tcg_ctx->insn_start_words;
334 for (insn = 0; insn < tb->icount; insn++) {
335 /* FIXME: This replicates the restore_state_to_opc() logic. */
336 q[insn].address = gen_insn_data[insn * start_words + 0];
337 if (tb_cflags(tb) & CF_PCREL) {
338 q[insn].address |= (guest_pc & qemu_target_page_mask());
340 q[insn].flags = DEBUGINFO_SYMBOL | (jitdump ? DEBUGINFO_LINE : 0);
342 debuginfo_query(q, tb->icount);
344 /* Emit perfmap entries if needed. */
345 if (perfmap) {
346 flockfile(perfmap);
347 for (insn = 0; insn < tb->icount; insn++) {
348 write_perfmap_entry(start, insn, &q[insn]);
350 funlockfile(perfmap);
353 /* Emit jitdump entries if needed. */
354 if (jitdump) {
355 flockfile(jitdump);
356 write_jr_code_debug_info(start, q, tb->icount);
357 write_jr_code_load(start, tcg_ctx->gen_insn_end_off[tb->icount - 1],
359 funlockfile(jitdump);
362 debuginfo_unlock();
363 g_free(q);
366 void perf_exit(void)
368 if (perfmap) {
369 fclose(perfmap);
370 perfmap = NULL;
373 if (perf_marker != MAP_FAILED) {
374 munmap(perf_marker, perf_marker_size);
375 perf_marker = MAP_FAILED;
378 if (jitdump) {
379 fclose(jitdump);
380 jitdump = NULL;