2 * Linux perf perf-<pid>.map and jit-<pid>.dump integration.
4 * The jitdump spec can be found at [1].
6 * [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/tools/perf/Documentation/jitdump-specification.txt
8 * SPDX-License-Identifier: GPL-2.0-or-later
11 #include "qemu/osdep.h"
13 #include "exec/target_page.h"
14 #include "exec/translation-block.h"
15 #include "qemu/timer.h"
16 #include "tcg/debuginfo.h"
20 static FILE *safe_fopen_w(const char *path
)
26 /* Delete the old file, if any. */
29 /* Avoid symlink attacks by using O_CREAT | O_EXCL. */
30 fd
= open(path
, O_RDWR
| O_CREAT
| O_EXCL
, S_IRUSR
| S_IWUSR
);
35 /* Convert fd to FILE*. */
49 void perf_enable_perfmap(void)
53 snprintf(map_file
, sizeof(map_file
), "/tmp/perf-%d.map", getpid());
54 perfmap
= safe_fopen_w(map_file
);
55 if (perfmap
== NULL
) {
56 warn_report("Could not open %s: %s, proceeding without perfmap",
57 map_file
, strerror(errno
));
61 /* Get PC and size of code JITed for guest instruction #INSN. */
62 static void get_host_pc_size(uintptr_t *host_pc
, uint16_t *host_size
,
63 const void *start
, size_t insn
)
65 uint16_t start_off
= insn
? tcg_ctx
->gen_insn_end_off
[insn
- 1] : 0;
68 *host_pc
= (uintptr_t)start
+ start_off
;
71 *host_size
= tcg_ctx
->gen_insn_end_off
[insn
] - start_off
;
75 static const char *pretty_symbol(const struct debuginfo_query
*q
, size_t *len
)
77 static __thread
char buf
[64];
81 tmp
= snprintf(buf
, sizeof(buf
), "guest-0x%"PRIx64
, q
->address
);
83 *len
= MIN(tmp
+ 1, sizeof(buf
));
90 *len
= strlen(q
->symbol
) + 1;
95 tmp
= snprintf(buf
, sizeof(buf
), "%s+0x%"PRIx64
, q
->symbol
, q
->offset
);
97 *len
= MIN(tmp
+ 1, sizeof(buf
));
102 static void write_perfmap_entry(const void *start
, size_t insn
,
103 const struct debuginfo_query
*q
)
108 get_host_pc_size(&host_pc
, &host_size
, start
, insn
);
109 fprintf(perfmap
, "%"PRIxPTR
" %"PRIx16
" %s\n",
110 host_pc
, host_size
, pretty_symbol(q
, NULL
));
113 static FILE *jitdump
;
114 static size_t perf_marker_size
;
115 static void *perf_marker
= MAP_FAILED
;
117 #define JITHEADER_MAGIC 0x4A695444
118 #define JITHEADER_VERSION 1
131 enum jit_record_type
{
133 JIT_CODE_DEBUG_INFO
= 2,
142 struct jr_code_load
{
160 struct jr_code_debug_info
{
165 struct debug_entry entries
[];
168 static uint32_t get_e_machine(void)
170 Elf64_Ehdr elf_header
;
174 QEMU_BUILD_BUG_ON(offsetof(Elf32_Ehdr
, e_machine
) !=
175 offsetof(Elf64_Ehdr
, e_machine
));
177 exe
= fopen("/proc/self/exe", "r");
182 n
= fread(&elf_header
, sizeof(elf_header
), 1, exe
);
188 return elf_header
.e_machine
;
191 void perf_enable_jitdump(void)
193 struct jitheader header
;
194 char jitdump_file
[32];
197 warn_report("CLOCK_MONOTONIC is not available, proceeding without jitdump");
201 snprintf(jitdump_file
, sizeof(jitdump_file
), "jit-%d.dump", getpid());
202 jitdump
= safe_fopen_w(jitdump_file
);
203 if (jitdump
== NULL
) {
204 warn_report("Could not open %s: %s, proceeding without jitdump",
205 jitdump_file
, strerror(errno
));
210 * `perf inject` will see that the mapped file name in the corresponding
211 * PERF_RECORD_MMAP or PERF_RECORD_MMAP2 event is of the form jit-%d.dump
212 * and will process it as a jitdump file.
214 perf_marker_size
= qemu_real_host_page_size();
215 perf_marker
= mmap(NULL
, perf_marker_size
, PROT_READ
| PROT_EXEC
,
216 MAP_PRIVATE
, fileno(jitdump
), 0);
217 if (perf_marker
== MAP_FAILED
) {
218 warn_report("Could not map %s: %s, proceeding without jitdump",
219 jitdump_file
, strerror(errno
));
225 header
.magic
= JITHEADER_MAGIC
;
226 header
.version
= JITHEADER_VERSION
;
227 header
.total_size
= sizeof(header
);
228 header
.elf_mach
= get_e_machine();
230 header
.pid
= getpid();
231 header
.timestamp
= get_clock();
233 fwrite(&header
, sizeof(header
), 1, jitdump
);
236 void perf_report_prologue(const void *start
, size_t size
)
239 fprintf(perfmap
, "%"PRIxPTR
" %zx tcg-prologue-buffer\n",
240 (uintptr_t)start
, size
);
244 /* Write a JIT_CODE_DEBUG_INFO jitdump entry. */
245 static void write_jr_code_debug_info(const void *start
,
246 const struct debuginfo_query
*q
,
249 struct jr_code_debug_info rec
;
250 struct debug_entry ent
;
254 /* Write the header. */
255 rec
.p
.id
= JIT_CODE_DEBUG_INFO
;
256 rec
.p
.total_size
= sizeof(rec
) + sizeof(ent
) + 1;
257 rec
.p
.timestamp
= get_clock();
258 rec
.code_addr
= (uintptr_t)start
;
260 for (insn
= 0; insn
< icount
; insn
++) {
262 rec
.p
.total_size
+= sizeof(ent
) + strlen(q
[insn
].file
) + 1;
266 fwrite(&rec
, sizeof(rec
), 1, jitdump
);
268 /* Write the main debug entries. */
269 for (insn
= 0; insn
< icount
; insn
++) {
271 get_host_pc_size(&host_pc
, NULL
, start
, insn
);
273 ent
.lineno
= q
[insn
].line
;
275 fwrite(&ent
, sizeof(ent
), 1, jitdump
);
276 fwrite(q
[insn
].file
, strlen(q
[insn
].file
) + 1, 1, jitdump
);
280 /* Write the trailing debug_entry. */
281 ent
.addr
= (uintptr_t)start
+ tcg_ctx
->gen_insn_end_off
[icount
- 1];
284 fwrite(&ent
, sizeof(ent
), 1, jitdump
);
285 fwrite("", 1, 1, jitdump
);
288 /* Write a JIT_CODE_LOAD jitdump entry. */
289 static void write_jr_code_load(const void *start
, uint16_t host_size
,
290 const struct debuginfo_query
*q
)
292 static uint64_t code_index
;
293 struct jr_code_load rec
;
297 symbol
= pretty_symbol(q
, &symbol_size
);
298 rec
.p
.id
= JIT_CODE_LOAD
;
299 rec
.p
.total_size
= sizeof(rec
) + symbol_size
+ host_size
;
300 rec
.p
.timestamp
= get_clock();
302 rec
.tid
= qemu_get_thread_id();
303 rec
.vma
= (uintptr_t)start
;
304 rec
.code_addr
= (uintptr_t)start
;
305 rec
.code_size
= host_size
;
306 rec
.code_index
= code_index
++;
307 fwrite(&rec
, sizeof(rec
), 1, jitdump
);
308 fwrite(symbol
, symbol_size
, 1, jitdump
);
309 fwrite(start
, host_size
, 1, jitdump
);
312 void perf_report_code(uint64_t guest_pc
, TranslationBlock
*tb
,
315 struct debuginfo_query
*q
;
316 size_t insn
, start_words
;
317 uint64_t *gen_insn_data
;
319 if (!perfmap
&& !jitdump
) {
323 q
= g_try_malloc0_n(tb
->icount
, sizeof(*q
));
330 /* Query debuginfo for each guest instruction. */
331 gen_insn_data
= tcg_ctx
->gen_insn_data
;
332 start_words
= tcg_ctx
->insn_start_words
;
334 for (insn
= 0; insn
< tb
->icount
; insn
++) {
335 /* FIXME: This replicates the restore_state_to_opc() logic. */
336 q
[insn
].address
= gen_insn_data
[insn
* start_words
+ 0];
337 if (tb_cflags(tb
) & CF_PCREL
) {
338 q
[insn
].address
|= (guest_pc
& qemu_target_page_mask());
340 q
[insn
].flags
= DEBUGINFO_SYMBOL
| (jitdump
? DEBUGINFO_LINE
: 0);
342 debuginfo_query(q
, tb
->icount
);
344 /* Emit perfmap entries if needed. */
347 for (insn
= 0; insn
< tb
->icount
; insn
++) {
348 write_perfmap_entry(start
, insn
, &q
[insn
]);
350 funlockfile(perfmap
);
353 /* Emit jitdump entries if needed. */
356 write_jr_code_debug_info(start
, q
, tb
->icount
);
357 write_jr_code_load(start
, tcg_ctx
->gen_insn_end_off
[tb
->icount
- 1],
359 funlockfile(jitdump
);
373 if (perf_marker
!= MAP_FAILED
) {
374 munmap(perf_marker
, perf_marker_size
);
375 perf_marker
= MAP_FAILED
;