Merge tag 'v9.0.0-rc3'
[qemu/ar7.git] / accel / tcg / translate-all.c
blob83cc14fbde4933cd1b54926e1408b0c7b2ef4277
1 /*
2 * Host code generation
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "trace.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg.h"
26 #if defined(CONFIG_USER_ONLY)
27 #include "qemu.h"
28 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
29 #include <sys/param.h>
30 #if __FreeBSD_version >= 700104
31 #define HAVE_KINFO_GETVMMAP
32 #define sigqueue sigqueue_freebsd /* avoid redefinition */
33 #include <sys/proc.h>
34 #include <machine/profile.h>
35 #define _KERNEL
36 #include <sys/user.h>
37 #undef _KERNEL
38 #undef sigqueue
39 #include <libutil.h>
40 #endif
41 #endif
42 #else
43 #include "exec/ram_addr.h"
44 #endif
46 #include "exec/cputlb.h"
47 #include "exec/translate-all.h"
48 #include "exec/translator.h"
49 #include "exec/tb-flush.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/qemu-print.h"
52 #include "qemu/main-loop.h"
53 #include "qemu/cacheinfo.h"
54 #include "qemu/timer.h"
55 #include "exec/log.h"
56 #include "sysemu/cpus.h"
57 #include "sysemu/cpu-timers.h"
58 #include "sysemu/tcg.h"
59 #include "qapi/error.h"
60 #include "hw/core/tcg-cpu-ops.h"
61 #include "tb-jmp-cache.h"
62 #include "tb-hash.h"
63 #include "tb-context.h"
64 #include "internal-common.h"
65 #include "internal-target.h"
66 #include "tcg/perf.h"
67 #include "tcg/insn-start-words.h"
69 TBContext tb_ctx;
72 * Encode VAL as a signed leb128 sequence at P.
73 * Return P incremented past the encoded value.
75 static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
77 int more, byte;
79 do {
80 byte = val & 0x7f;
81 val >>= 7;
82 more = !((val == 0 && (byte & 0x40) == 0)
83 || (val == -1 && (byte & 0x40) != 0));
84 if (more) {
85 byte |= 0x80;
87 *p++ = byte;
88 } while (more);
90 return p;
94 * Decode a signed leb128 sequence at *PP; increment *PP past the
95 * decoded value. Return the decoded value.
97 static int64_t decode_sleb128(const uint8_t **pp)
99 const uint8_t *p = *pp;
100 int64_t val = 0;
101 int byte, shift = 0;
103 do {
104 byte = *p++;
105 val |= (int64_t)(byte & 0x7f) << shift;
106 shift += 7;
107 } while (byte & 0x80);
108 if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
109 val |= -(int64_t)1 << shift;
112 *pp = p;
113 return val;
116 /* Encode the data collected about the instructions while compiling TB.
117 Place the data at BLOCK, and return the number of bytes consumed.
119 The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
120 which come from the target's insn_start data, followed by a uintptr_t
121 which comes from the host pc of the end of the code implementing the insn.
123 Each line of the table is encoded as sleb128 deltas from the previous
124 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
125 That is, the first column is seeded with the guest pc, the last column
126 with the host pc, and the middle columns with zeros. */
128 static int encode_search(TranslationBlock *tb, uint8_t *block)
130 uint8_t *highwater = tcg_ctx->code_gen_highwater;
131 uint64_t *insn_data = tcg_ctx->gen_insn_data;
132 uint16_t *insn_end_off = tcg_ctx->gen_insn_end_off;
133 uint8_t *p = block;
134 int i, j, n;
136 for (i = 0, n = tb->icount; i < n; ++i) {
137 uint64_t prev, curr;
139 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
140 if (i == 0) {
141 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
142 } else {
143 prev = insn_data[(i - 1) * TARGET_INSN_START_WORDS + j];
145 curr = insn_data[i * TARGET_INSN_START_WORDS + j];
146 p = encode_sleb128(p, curr - prev);
148 prev = (i == 0 ? 0 : insn_end_off[i - 1]);
149 curr = insn_end_off[i];
150 p = encode_sleb128(p, curr - prev);
152 /* Test for (pending) buffer overflow. The assumption is that any
153 one row beginning below the high water mark cannot overrun
154 the buffer completely. Thus we can test for overflow after
155 encoding a row without having to check during encoding. */
156 if (unlikely(p > highwater)) {
157 return -1;
161 return p - block;
164 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
165 uint64_t *data)
167 uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
168 const uint8_t *p = tb->tc.ptr + tb->tc.size;
169 int i, j, num_insns = tb->icount;
171 host_pc -= GETPC_ADJ;
173 if (host_pc < iter_pc) {
174 return -1;
177 memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
178 if (!(tb_cflags(tb) & CF_PCREL)) {
179 data[0] = tb->pc;
183 * Reconstruct the stored insn data while looking for the point
184 * at which the end of the insn exceeds host_pc.
186 for (i = 0; i < num_insns; ++i) {
187 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
188 data[j] += decode_sleb128(&p);
190 iter_pc += decode_sleb128(&p);
191 if (iter_pc > host_pc) {
192 return num_insns - i;
195 return -1;
199 * The cpu state corresponding to 'host_pc' is restored in
200 * preparation for exiting the TB.
202 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
203 uintptr_t host_pc)
205 uint64_t data[TARGET_INSN_START_WORDS];
206 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
208 if (insns_left < 0) {
209 return;
212 if (tb_cflags(tb) & CF_USE_ICOUNT) {
213 assert(icount_enabled());
215 * Reset the cycle counter to the start of the block and
216 * shift if to the number of actually executed instructions.
218 cpu->neg.icount_decr.u16.low += insns_left;
221 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
224 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
227 * The host_pc has to be in the rx region of the code buffer.
228 * If it is not we will not be able to resolve it here.
229 * The two cases where host_pc will not be correct are:
231 * - fault during translation (instruction fetch)
232 * - fault from helper (not using GETPC() macro)
234 * Either way we need return early as we can't resolve it here.
236 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
237 TranslationBlock *tb = tcg_tb_lookup(host_pc);
238 if (tb) {
239 cpu_restore_state_from_tb(cpu, tb, host_pc);
240 return true;
243 return false;
246 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
248 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
249 TranslationBlock *tb = tcg_tb_lookup(host_pc);
250 if (tb) {
251 return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
254 return false;
257 void page_init(void)
259 page_table_config_init();
263 * Isolate the portion of code gen which can setjmp/longjmp.
264 * Return the size of the generated code, or negative on error.
266 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
267 vaddr pc, void *host_pc,
268 int *max_insns, int64_t *ti)
270 int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
271 if (unlikely(ret != 0)) {
272 return ret;
275 tcg_func_start(tcg_ctx);
277 tcg_ctx->cpu = env_cpu(env);
278 gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
279 assert(tb->size != 0);
280 tcg_ctx->cpu = NULL;
281 *max_insns = tb->icount;
283 return tcg_gen_code(tcg_ctx, tb, pc);
286 /* Called with mmap_lock held for user mode emulation. */
287 TranslationBlock *tb_gen_code(CPUState *cpu,
288 vaddr pc, uint64_t cs_base,
289 uint32_t flags, int cflags)
291 CPUArchState *env = cpu_env(cpu);
292 TranslationBlock *tb, *existing_tb;
293 tb_page_addr_t phys_pc, phys_p2;
294 tcg_insn_unit *gen_code_buf;
295 int gen_code_size, search_size, max_insns;
296 int64_t ti;
297 void *host_pc;
299 assert_memory_lock();
300 qemu_thread_jit_write();
302 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
304 if (phys_pc == -1) {
305 /* Generate a one-shot TB with 1 insn in it */
306 cflags = (cflags & ~CF_COUNT_MASK) | 1;
309 max_insns = cflags & CF_COUNT_MASK;
310 if (max_insns == 0) {
311 max_insns = TCG_MAX_INSNS;
313 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
315 buffer_overflow:
316 assert_no_pages_locked();
317 tb = tcg_tb_alloc(tcg_ctx);
318 if (unlikely(!tb)) {
319 /* flush must be done */
320 tb_flush(cpu);
321 mmap_unlock();
322 /* Make the execution loop process the flush as soon as possible. */
323 cpu->exception_index = EXCP_INTERRUPT;
324 cpu_loop_exit(cpu);
327 gen_code_buf = tcg_ctx->code_gen_ptr;
328 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
329 if (!(cflags & CF_PCREL)) {
330 tb->pc = pc;
332 tb->cs_base = cs_base;
333 tb->flags = flags;
334 tb->cflags = cflags;
335 tb_set_page_addr0(tb, phys_pc);
336 tb_set_page_addr1(tb, -1);
337 if (phys_pc != -1) {
338 tb_lock_page0(phys_pc);
341 tcg_ctx->gen_tb = tb;
342 tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64;
343 #ifdef CONFIG_SOFTMMU
344 tcg_ctx->page_bits = TARGET_PAGE_BITS;
345 tcg_ctx->page_mask = TARGET_PAGE_MASK;
346 tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
347 #endif
348 tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS;
349 #ifdef TCG_GUEST_DEFAULT_MO
350 tcg_ctx->guest_mo = TCG_GUEST_DEFAULT_MO;
351 #else
352 tcg_ctx->guest_mo = TCG_MO_ALL;
353 #endif
355 restart_translate:
356 trace_translate_block(tb, pc, tb->tc.ptr);
358 gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
359 if (unlikely(gen_code_size < 0)) {
360 switch (gen_code_size) {
361 case -1:
363 * Overflow of code_gen_buffer, or the current slice of it.
365 * TODO: We don't need to re-do gen_intermediate_code, nor
366 * should we re-do the tcg optimization currently hidden
367 * inside tcg_gen_code. All that should be required is to
368 * flush the TBs, allocate a new TB, re-initialize it per
369 * above, and re-do the actual code generation.
371 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
372 "Restarting code generation for "
373 "code_gen_buffer overflow\n");
374 tb_unlock_pages(tb);
375 tcg_ctx->gen_tb = NULL;
376 goto buffer_overflow;
378 case -2:
380 * The code generated for the TranslationBlock is too large.
381 * The maximum size allowed by the unwind info is 64k.
382 * There may be stricter constraints from relocations
383 * in the tcg backend.
385 * Try again with half as many insns as we attempted this time.
386 * If a single insn overflows, there's a bug somewhere...
388 assert(max_insns > 1);
389 max_insns /= 2;
390 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
391 "Restarting code generation with "
392 "smaller translation block (max %d insns)\n",
393 max_insns);
396 * The half-sized TB may not cross pages.
397 * TODO: Fix all targets that cross pages except with
398 * the first insn, at which point this can't be reached.
400 phys_p2 = tb_page_addr1(tb);
401 if (unlikely(phys_p2 != -1)) {
402 tb_unlock_page1(phys_pc, phys_p2);
403 tb_set_page_addr1(tb, -1);
405 goto restart_translate;
407 case -3:
409 * We had a page lock ordering problem. In order to avoid
410 * deadlock we had to drop the lock on page0, which means
411 * that everything we translated so far is compromised.
412 * Restart with locks held on both pages.
414 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
415 "Restarting code generation with re-locked pages");
416 goto restart_translate;
418 default:
419 g_assert_not_reached();
422 tcg_ctx->gen_tb = NULL;
424 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
425 if (unlikely(search_size < 0)) {
426 tb_unlock_pages(tb);
427 goto buffer_overflow;
429 tb->tc.size = gen_code_size;
432 * For CF_PCREL, attribute all executions of the generated code
433 * to its first mapping.
435 perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
437 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
438 qemu_log_in_addr_range(pc)) {
439 FILE *logfile = qemu_log_trylock();
440 if (logfile) {
441 int code_size, data_size;
442 const tcg_target_ulong *rx_data_gen_ptr;
443 size_t chunk_start;
444 int insn = 0;
446 if (tcg_ctx->data_gen_ptr) {
447 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
448 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
449 data_size = gen_code_size - code_size;
450 } else {
451 rx_data_gen_ptr = 0;
452 code_size = gen_code_size;
453 data_size = 0;
456 /* Dump header and the first instruction */
457 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
458 fprintf(logfile,
459 " -- guest addr 0x%016" PRIx64 " + tb prologue\n",
460 tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
461 chunk_start = tcg_ctx->gen_insn_end_off[insn];
462 disas(logfile, tb->tc.ptr, chunk_start);
465 * Dump each instruction chunk, wrapping up empty chunks into
466 * the next instruction. The whole array is offset so the
467 * first entry is the beginning of the 2nd instruction.
469 while (insn < tb->icount) {
470 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
471 if (chunk_end > chunk_start) {
472 fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n",
473 tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
474 disas(logfile, tb->tc.ptr + chunk_start,
475 chunk_end - chunk_start);
476 chunk_start = chunk_end;
478 insn++;
481 if (chunk_start < code_size) {
482 fprintf(logfile, " -- tb slow paths + alignment\n");
483 disas(logfile, tb->tc.ptr + chunk_start,
484 code_size - chunk_start);
487 /* Finally dump any data we may have after the block */
488 if (data_size) {
489 int i;
490 fprintf(logfile, " data: [size=%d]\n", data_size);
491 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
492 if (sizeof(tcg_target_ulong) == 8) {
493 fprintf(logfile,
494 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n",
495 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
496 } else if (sizeof(tcg_target_ulong) == 4) {
497 fprintf(logfile,
498 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n",
499 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
500 } else {
501 qemu_build_not_reached();
505 fprintf(logfile, "\n");
506 qemu_log_unlock(logfile);
510 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
511 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
512 CODE_GEN_ALIGN));
514 /* init jump list */
515 qemu_spin_init(&tb->jmp_lock);
516 tb->jmp_list_head = (uintptr_t)NULL;
517 tb->jmp_list_next[0] = (uintptr_t)NULL;
518 tb->jmp_list_next[1] = (uintptr_t)NULL;
519 tb->jmp_dest[0] = (uintptr_t)NULL;
520 tb->jmp_dest[1] = (uintptr_t)NULL;
522 /* init original jump addresses which have been set during tcg_gen_code() */
523 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
524 tb_reset_jump(tb, 0);
526 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
527 tb_reset_jump(tb, 1);
531 * If the TB is not associated with a physical RAM page then it must be
532 * a temporary one-insn TB, and we have nothing left to do. Return early
533 * before attempting to link to other TBs or add to the lookup table.
535 if (tb_page_addr0(tb) == -1) {
536 assert_no_pages_locked();
537 return tb;
541 * Insert TB into the corresponding region tree before publishing it
542 * through QHT. Otherwise rewinding happened in the TB might fail to
543 * lookup itself using host PC.
545 tcg_tb_insert(tb);
548 * No explicit memory barrier is required -- tb_link_page() makes the
549 * TB visible in a consistent state.
551 existing_tb = tb_link_page(tb);
552 assert_no_pages_locked();
554 /* if the TB already exists, discard what we just translated */
555 if (unlikely(existing_tb != tb)) {
556 uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
558 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
559 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
560 tcg_tb_remove(tb);
561 return existing_tb;
563 return tb;
566 /* user-mode: call with mmap_lock held */
567 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
569 TranslationBlock *tb;
571 assert_memory_lock();
573 tb = tcg_tb_lookup(retaddr);
574 if (tb) {
575 /* We can use retranslation to find the PC. */
576 cpu_restore_state_from_tb(cpu, tb, retaddr);
577 tb_phys_invalidate(tb, -1);
578 } else {
579 /* The exception probably happened in a helper. The CPU state should
580 have been saved before calling it. Fetch the PC from there. */
581 CPUArchState *env = cpu_env(cpu);
582 vaddr pc;
583 uint64_t cs_base;
584 tb_page_addr_t addr;
585 uint32_t flags;
587 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
588 addr = get_page_addr_code(env, pc);
589 if (addr != -1) {
590 tb_invalidate_phys_range(addr, addr);
595 #ifndef CONFIG_USER_ONLY
597 * In deterministic execution mode, instructions doing device I/Os
598 * must be at the end of the TB.
600 * Called by softmmu_template.h, with iothread mutex not held.
602 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
604 TranslationBlock *tb;
605 CPUClass *cc;
606 uint32_t n;
608 tb = tcg_tb_lookup(retaddr);
609 if (!tb) {
610 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
611 (void *)retaddr);
613 cpu_restore_state_from_tb(cpu, tb, retaddr);
616 * Some guests must re-execute the branch when re-executing a delay
617 * slot instruction. When this is the case, adjust icount and N
618 * to account for the re-execution of the branch.
620 n = 1;
621 cc = CPU_GET_CLASS(cpu);
622 if (cc->tcg_ops->io_recompile_replay_branch &&
623 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
624 cpu->neg.icount_decr.u16.low++;
625 n = 2;
629 * Exit the loop and potentially generate a new TB executing the
630 * just the I/O insns. We also limit instrumentation to memory
631 * operations only (which execute after completion) so we don't
632 * double instrument the instruction.
634 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | n;
636 if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
637 vaddr pc = cpu->cc->get_pc(cpu);
638 if (qemu_log_in_addr_range(pc)) {
639 qemu_log("cpu_io_recompile: rewound execution of TB to %016"
640 VADDR_PRIx "\n", pc);
644 cpu_loop_exit_noexc(cpu);
647 #else /* CONFIG_USER_ONLY */
649 void cpu_interrupt(CPUState *cpu, int mask)
651 g_assert(bql_locked());
652 cpu->interrupt_request |= mask;
653 qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
656 #endif /* CONFIG_USER_ONLY */
659 * Called by generic code at e.g. cpu reset after cpu creation,
660 * therefore we must be prepared to allocate the jump cache.
662 void tcg_flush_jmp_cache(CPUState *cpu)
664 CPUJumpCache *jc = cpu->tb_jmp_cache;
666 /* During early initialization, the cache may not yet be allocated. */
667 if (unlikely(jc == NULL)) {
668 return;
671 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
672 qatomic_set(&jc->array[i].tb, NULL);