target/mips: Declare gen_msa/_branch() in 'translate.h'
[qemu/ar7.git] / tcg / tcg.c
blob8f8badb61c927355cfd1d24be76463a7262974b1
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
42 instructions */
43 #define NO_CPU_IO_DEFS
44 #include "cpu.h"
46 #include "exec/exec-all.h"
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
52 #include "tcg/tcg-op.h"
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS ELFCLASS32
56 #else
57 # define ELF_CLASS ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA ELFDATA2MSB
61 #else
62 # define ELF_DATA ELFDATA2LSB
63 #endif
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70 used here. */
71 static void tcg_target_init(TCGContext *s);
72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
73 static void tcg_target_qemu_prologue(TCGContext *s);
74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
75 intptr_t value, intptr_t addend);
77 /* The CIE and FDE header definitions will be common to all hosts. */
78 typedef struct {
79 uint32_t len __attribute__((aligned((sizeof(void *)))));
80 uint32_t id;
81 uint8_t version;
82 char augmentation[1];
83 uint8_t code_align;
84 uint8_t data_align;
85 uint8_t return_column;
86 } DebugFrameCIE;
88 typedef struct QEMU_PACKED {
89 uint32_t len __attribute__((aligned((sizeof(void *)))));
90 uint32_t cie_offset;
91 uintptr_t func_start;
92 uintptr_t func_len;
93 } DebugFrameFDEHeader;
95 typedef struct QEMU_PACKED {
96 DebugFrameCIE cie;
97 DebugFrameFDEHeader fde;
98 } DebugFrameHeader;
100 static void tcg_register_jit_int(const void *buf, size_t size,
101 const void *debug_frame,
102 size_t debug_frame_size)
103 __attribute__((unused));
105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
106 static const char *target_parse_constraint(TCGArgConstraint *ct,
107 const char *ct_str, TCGType type);
108 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
109 intptr_t arg2);
110 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
111 static void tcg_out_movi(TCGContext *s, TCGType type,
112 TCGReg ret, tcg_target_long arg);
113 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
114 const int *const_args);
115 #if TCG_TARGET_MAYBE_vec
116 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
117 TCGReg dst, TCGReg src);
118 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
119 TCGReg dst, TCGReg base, intptr_t offset);
120 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
121 TCGReg dst, int64_t arg);
122 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
123 unsigned vece, const TCGArg *args,
124 const int *const_args);
125 #else
126 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
127 TCGReg dst, TCGReg src)
129 g_assert_not_reached();
131 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
132 TCGReg dst, TCGReg base, intptr_t offset)
134 g_assert_not_reached();
136 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
137 TCGReg dst, int64_t arg)
139 g_assert_not_reached();
141 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
142 unsigned vece, const TCGArg *args,
143 const int *const_args)
145 g_assert_not_reached();
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149 intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151 TCGReg base, intptr_t ofs);
152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
153 static int tcg_target_const_match(tcg_target_long val, TCGType type,
154 const TCGArgConstraint *arg_ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
157 #endif
159 #define TCG_HIGHWATER 1024
161 static TCGContext **tcg_ctxs;
162 static unsigned int n_tcg_ctxs;
163 TCGv_env cpu_env = 0;
164 const void *tcg_code_gen_epilogue;
165 uintptr_t tcg_splitwx_diff;
167 #ifndef CONFIG_TCG_INTERPRETER
168 tcg_prologue_fn *tcg_qemu_tb_exec;
169 #endif
171 struct tcg_region_tree {
172 QemuMutex lock;
173 GTree *tree;
174 /* padding to avoid false sharing is computed at run-time */
178 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
179 * dynamically allocate from as demand dictates. Given appropriate region
180 * sizing, this minimizes flushes even when some TCG threads generate a lot
181 * more code than others.
183 struct tcg_region_state {
184 QemuMutex lock;
186 /* fields set at init time */
187 void *start;
188 void *start_aligned;
189 void *end;
190 size_t n;
191 size_t size; /* size of one region */
192 size_t stride; /* .size + guard size */
194 /* fields protected by the lock */
195 size_t current; /* current region index */
196 size_t agg_size_full; /* aggregate size of full regions */
199 static struct tcg_region_state region;
201 * This is an array of struct tcg_region_tree's, with padding.
202 * We use void * to simplify the computation of region_trees[i]; each
203 * struct is found every tree_size bytes.
205 static void *region_trees;
206 static size_t tree_size;
207 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
208 static TCGRegSet tcg_target_call_clobber_regs;
210 #if TCG_TARGET_INSN_UNIT_SIZE == 1
211 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
213 *s->code_ptr++ = v;
216 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
217 uint8_t v)
219 *p = v;
221 #endif
223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
224 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
226 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
227 *s->code_ptr++ = v;
228 } else {
229 tcg_insn_unit *p = s->code_ptr;
230 memcpy(p, &v, sizeof(v));
231 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
235 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
236 uint16_t v)
238 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
239 *p = v;
240 } else {
241 memcpy(p, &v, sizeof(v));
244 #endif
246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
247 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
249 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
250 *s->code_ptr++ = v;
251 } else {
252 tcg_insn_unit *p = s->code_ptr;
253 memcpy(p, &v, sizeof(v));
254 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
258 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
259 uint32_t v)
261 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
262 *p = v;
263 } else {
264 memcpy(p, &v, sizeof(v));
267 #endif
269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
270 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
272 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
273 *s->code_ptr++ = v;
274 } else {
275 tcg_insn_unit *p = s->code_ptr;
276 memcpy(p, &v, sizeof(v));
277 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
281 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
282 uint64_t v)
284 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
285 *p = v;
286 } else {
287 memcpy(p, &v, sizeof(v));
290 #endif
292 /* label relocation processing */
294 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
295 TCGLabel *l, intptr_t addend)
297 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
299 r->type = type;
300 r->ptr = code_ptr;
301 r->addend = addend;
302 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
305 static void tcg_out_label(TCGContext *s, TCGLabel *l)
307 tcg_debug_assert(!l->has_value);
308 l->has_value = 1;
309 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
312 TCGLabel *gen_new_label(void)
314 TCGContext *s = tcg_ctx;
315 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
317 memset(l, 0, sizeof(TCGLabel));
318 l->id = s->nb_labels++;
319 QSIMPLEQ_INIT(&l->relocs);
321 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
323 return l;
326 static bool tcg_resolve_relocs(TCGContext *s)
328 TCGLabel *l;
330 QSIMPLEQ_FOREACH(l, &s->labels, next) {
331 TCGRelocation *r;
332 uintptr_t value = l->u.value;
334 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
335 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
336 return false;
340 return true;
343 static void set_jmp_reset_offset(TCGContext *s, int which)
346 * We will check for overflow at the end of the opcode loop in
347 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
349 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
352 #include "tcg-target.c.inc"
354 /* compare a pointer @ptr and a tb_tc @s */
355 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
357 if (ptr >= s->ptr + s->size) {
358 return 1;
359 } else if (ptr < s->ptr) {
360 return -1;
362 return 0;
365 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
367 const struct tb_tc *a = ap;
368 const struct tb_tc *b = bp;
371 * When both sizes are set, we know this isn't a lookup.
372 * This is the most likely case: every TB must be inserted; lookups
373 * are a lot less frequent.
375 if (likely(a->size && b->size)) {
376 if (a->ptr > b->ptr) {
377 return 1;
378 } else if (a->ptr < b->ptr) {
379 return -1;
381 /* a->ptr == b->ptr should happen only on deletions */
382 g_assert(a->size == b->size);
383 return 0;
386 * All lookups have either .size field set to 0.
387 * From the glib sources we see that @ap is always the lookup key. However
388 * the docs provide no guarantee, so we just mark this case as likely.
390 if (likely(a->size == 0)) {
391 return ptr_cmp_tb_tc(a->ptr, b);
393 return ptr_cmp_tb_tc(b->ptr, a);
396 static void tcg_region_trees_init(void)
398 size_t i;
400 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
401 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
402 for (i = 0; i < region.n; i++) {
403 struct tcg_region_tree *rt = region_trees + i * tree_size;
405 qemu_mutex_init(&rt->lock);
406 rt->tree = g_tree_new(tb_tc_cmp);
410 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
412 void *p = tcg_splitwx_to_rw(cp);
413 size_t region_idx;
415 if (p < region.start_aligned) {
416 region_idx = 0;
417 } else {
418 ptrdiff_t offset = p - region.start_aligned;
420 if (offset > region.stride * (region.n - 1)) {
421 region_idx = region.n - 1;
422 } else {
423 region_idx = offset / region.stride;
426 return region_trees + region_idx * tree_size;
429 void tcg_tb_insert(TranslationBlock *tb)
431 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
433 qemu_mutex_lock(&rt->lock);
434 g_tree_insert(rt->tree, &tb->tc, tb);
435 qemu_mutex_unlock(&rt->lock);
438 void tcg_tb_remove(TranslationBlock *tb)
440 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
442 qemu_mutex_lock(&rt->lock);
443 g_tree_remove(rt->tree, &tb->tc);
444 qemu_mutex_unlock(&rt->lock);
448 * Find the TB 'tb' such that
449 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
450 * Return NULL if not found.
452 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
454 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
455 TranslationBlock *tb;
456 struct tb_tc s = { .ptr = (void *)tc_ptr };
458 qemu_mutex_lock(&rt->lock);
459 tb = g_tree_lookup(rt->tree, &s);
460 qemu_mutex_unlock(&rt->lock);
461 return tb;
464 static void tcg_region_tree_lock_all(void)
466 size_t i;
468 for (i = 0; i < region.n; i++) {
469 struct tcg_region_tree *rt = region_trees + i * tree_size;
471 qemu_mutex_lock(&rt->lock);
475 static void tcg_region_tree_unlock_all(void)
477 size_t i;
479 for (i = 0; i < region.n; i++) {
480 struct tcg_region_tree *rt = region_trees + i * tree_size;
482 qemu_mutex_unlock(&rt->lock);
486 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
488 size_t i;
490 tcg_region_tree_lock_all();
491 for (i = 0; i < region.n; i++) {
492 struct tcg_region_tree *rt = region_trees + i * tree_size;
494 g_tree_foreach(rt->tree, func, user_data);
496 tcg_region_tree_unlock_all();
499 size_t tcg_nb_tbs(void)
501 size_t nb_tbs = 0;
502 size_t i;
504 tcg_region_tree_lock_all();
505 for (i = 0; i < region.n; i++) {
506 struct tcg_region_tree *rt = region_trees + i * tree_size;
508 nb_tbs += g_tree_nnodes(rt->tree);
510 tcg_region_tree_unlock_all();
511 return nb_tbs;
514 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
516 TranslationBlock *tb = v;
518 tb_destroy(tb);
519 return FALSE;
522 static void tcg_region_tree_reset_all(void)
524 size_t i;
526 tcg_region_tree_lock_all();
527 for (i = 0; i < region.n; i++) {
528 struct tcg_region_tree *rt = region_trees + i * tree_size;
530 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
531 /* Increment the refcount first so that destroy acts as a reset */
532 g_tree_ref(rt->tree);
533 g_tree_destroy(rt->tree);
535 tcg_region_tree_unlock_all();
538 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
540 void *start, *end;
542 start = region.start_aligned + curr_region * region.stride;
543 end = start + region.size;
545 if (curr_region == 0) {
546 start = region.start;
548 if (curr_region == region.n - 1) {
549 end = region.end;
552 *pstart = start;
553 *pend = end;
556 static void tcg_region_assign(TCGContext *s, size_t curr_region)
558 void *start, *end;
560 tcg_region_bounds(curr_region, &start, &end);
562 s->code_gen_buffer = start;
563 s->code_gen_ptr = start;
564 s->code_gen_buffer_size = end - start;
565 s->code_gen_highwater = end - TCG_HIGHWATER;
568 static bool tcg_region_alloc__locked(TCGContext *s)
570 if (region.current == region.n) {
571 return true;
573 tcg_region_assign(s, region.current);
574 region.current++;
575 return false;
579 * Request a new region once the one in use has filled up.
580 * Returns true on error.
582 static bool tcg_region_alloc(TCGContext *s)
584 bool err;
585 /* read the region size now; alloc__locked will overwrite it on success */
586 size_t size_full = s->code_gen_buffer_size;
588 qemu_mutex_lock(&region.lock);
589 err = tcg_region_alloc__locked(s);
590 if (!err) {
591 region.agg_size_full += size_full - TCG_HIGHWATER;
593 qemu_mutex_unlock(&region.lock);
594 return err;
598 * Perform a context's first region allocation.
599 * This function does _not_ increment region.agg_size_full.
601 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
603 return tcg_region_alloc__locked(s);
606 /* Call from a safe-work context */
607 void tcg_region_reset_all(void)
609 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
610 unsigned int i;
612 qemu_mutex_lock(&region.lock);
613 region.current = 0;
614 region.agg_size_full = 0;
616 for (i = 0; i < n_ctxs; i++) {
617 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
618 bool err = tcg_region_initial_alloc__locked(s);
620 g_assert(!err);
622 qemu_mutex_unlock(&region.lock);
624 tcg_region_tree_reset_all();
627 #ifdef CONFIG_USER_ONLY
628 static size_t tcg_n_regions(void)
630 return 1;
632 #else
634 * It is likely that some vCPUs will translate more code than others, so we
635 * first try to set more regions than max_cpus, with those regions being of
636 * reasonable size. If that's not possible we make do by evenly dividing
637 * the code_gen_buffer among the vCPUs.
639 static size_t tcg_n_regions(void)
641 size_t i;
643 /* Use a single region if all we have is one vCPU thread */
644 #if !defined(CONFIG_USER_ONLY)
645 MachineState *ms = MACHINE(qdev_get_machine());
646 unsigned int max_cpus = ms->smp.max_cpus;
647 #endif
648 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
649 return 1;
652 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
653 for (i = 8; i > 0; i--) {
654 size_t regions_per_thread = i;
655 size_t region_size;
657 region_size = tcg_init_ctx.code_gen_buffer_size;
658 region_size /= max_cpus * regions_per_thread;
660 if (region_size >= 2 * 1024u * 1024) {
661 return max_cpus * regions_per_thread;
664 /* If we can't, then just allocate one region per vCPU thread */
665 return max_cpus;
667 #endif
670 * Initializes region partitioning.
672 * Called at init time from the parent thread (i.e. the one calling
673 * tcg_context_init), after the target's TCG globals have been set.
675 * Region partitioning works by splitting code_gen_buffer into separate regions,
676 * and then assigning regions to TCG threads so that the threads can translate
677 * code in parallel without synchronization.
679 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
680 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
681 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
682 * must have been parsed before calling this function, since it calls
683 * qemu_tcg_mttcg_enabled().
685 * In user-mode we use a single region. Having multiple regions in user-mode
686 * is not supported, because the number of vCPU threads (recall that each thread
687 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
688 * OS, and usually this number is huge (tens of thousands is not uncommon).
689 * Thus, given this large bound on the number of vCPU threads and the fact
690 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
691 * that the availability of at least one region per vCPU thread.
693 * However, this user-mode limitation is unlikely to be a significant problem
694 * in practice. Multi-threaded guests share most if not all of their translated
695 * code, which makes parallel code generation less appealing than in softmmu.
697 void tcg_region_init(void)
699 void *buf = tcg_init_ctx.code_gen_buffer;
700 void *aligned;
701 size_t size = tcg_init_ctx.code_gen_buffer_size;
702 size_t page_size = qemu_real_host_page_size;
703 size_t region_size;
704 size_t n_regions;
705 size_t i;
706 uintptr_t splitwx_diff;
708 n_regions = tcg_n_regions();
710 /* The first region will be 'aligned - buf' bytes larger than the others */
711 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
712 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
714 * Make region_size a multiple of page_size, using aligned as the start.
715 * As a result of this we might end up with a few extra pages at the end of
716 * the buffer; we will assign those to the last region.
718 region_size = (size - (aligned - buf)) / n_regions;
719 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
721 /* A region must have at least 2 pages; one code, one guard */
722 g_assert(region_size >= 2 * page_size);
724 /* init the region struct */
725 qemu_mutex_init(&region.lock);
726 region.n = n_regions;
727 region.size = region_size - page_size;
728 region.stride = region_size;
729 region.start = buf;
730 region.start_aligned = aligned;
731 /* page-align the end, since its last page will be a guard page */
732 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
733 /* account for that last guard page */
734 region.end -= page_size;
736 /* set guard pages */
737 splitwx_diff = tcg_splitwx_diff;
738 for (i = 0; i < region.n; i++) {
739 void *start, *end;
740 int rc;
742 tcg_region_bounds(i, &start, &end);
743 rc = qemu_mprotect_none(end, page_size);
744 g_assert(!rc);
745 if (splitwx_diff) {
746 rc = qemu_mprotect_none(end + splitwx_diff, page_size);
747 g_assert(!rc);
751 tcg_region_trees_init();
753 /* In user-mode we support only one ctx, so do the initial allocation now */
754 #ifdef CONFIG_USER_ONLY
756 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
758 g_assert(!err);
760 #endif
763 #ifdef CONFIG_DEBUG_TCG
764 const void *tcg_splitwx_to_rx(void *rw)
766 /* Pass NULL pointers unchanged. */
767 if (rw) {
768 g_assert(in_code_gen_buffer(rw));
769 rw += tcg_splitwx_diff;
771 return rw;
774 void *tcg_splitwx_to_rw(const void *rx)
776 /* Pass NULL pointers unchanged. */
777 if (rx) {
778 rx -= tcg_splitwx_diff;
779 /* Assert that we end with a pointer in the rw region. */
780 g_assert(in_code_gen_buffer(rx));
782 return (void *)rx;
784 #endif /* CONFIG_DEBUG_TCG */
786 static void alloc_tcg_plugin_context(TCGContext *s)
788 #ifdef CONFIG_PLUGIN
789 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
790 s->plugin_tb->insns =
791 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
792 #endif
796 * All TCG threads except the parent (i.e. the one that called tcg_context_init
797 * and registered the target's TCG globals) must register with this function
798 * before initiating translation.
800 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
801 * of tcg_region_init() for the reasoning behind this.
803 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
804 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
805 * is not used anymore for translation once this function is called.
807 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
808 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
810 #ifdef CONFIG_USER_ONLY
811 void tcg_register_thread(void)
813 tcg_ctx = &tcg_init_ctx;
815 #else
816 void tcg_register_thread(void)
818 MachineState *ms = MACHINE(qdev_get_machine());
819 TCGContext *s = g_malloc(sizeof(*s));
820 unsigned int i, n;
821 bool err;
823 *s = tcg_init_ctx;
825 /* Relink mem_base. */
826 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
827 if (tcg_init_ctx.temps[i].mem_base) {
828 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
829 tcg_debug_assert(b >= 0 && b < n);
830 s->temps[i].mem_base = &s->temps[b];
834 /* Claim an entry in tcg_ctxs */
835 n = qatomic_fetch_inc(&n_tcg_ctxs);
836 g_assert(n < ms->smp.max_cpus);
837 qatomic_set(&tcg_ctxs[n], s);
839 if (n > 0) {
840 alloc_tcg_plugin_context(s);
843 tcg_ctx = s;
844 qemu_mutex_lock(&region.lock);
845 err = tcg_region_initial_alloc__locked(tcg_ctx);
846 g_assert(!err);
847 qemu_mutex_unlock(&region.lock);
849 #endif /* !CONFIG_USER_ONLY */
852 * Returns the size (in bytes) of all translated code (i.e. from all regions)
853 * currently in the cache.
854 * See also: tcg_code_capacity()
855 * Do not confuse with tcg_current_code_size(); that one applies to a single
856 * TCG context.
858 size_t tcg_code_size(void)
860 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
861 unsigned int i;
862 size_t total;
864 qemu_mutex_lock(&region.lock);
865 total = region.agg_size_full;
866 for (i = 0; i < n_ctxs; i++) {
867 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
868 size_t size;
870 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
871 g_assert(size <= s->code_gen_buffer_size);
872 total += size;
874 qemu_mutex_unlock(&region.lock);
875 return total;
879 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
880 * regions.
881 * See also: tcg_code_size()
883 size_t tcg_code_capacity(void)
885 size_t guard_size, capacity;
887 /* no need for synchronization; these variables are set at init time */
888 guard_size = region.stride - region.size;
889 capacity = region.end + guard_size - region.start;
890 capacity -= region.n * (guard_size + TCG_HIGHWATER);
891 return capacity;
894 size_t tcg_tb_phys_invalidate_count(void)
896 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
897 unsigned int i;
898 size_t total = 0;
900 for (i = 0; i < n_ctxs; i++) {
901 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
903 total += qatomic_read(&s->tb_phys_invalidate_count);
905 return total;
908 /* pool based memory allocation */
909 void *tcg_malloc_internal(TCGContext *s, int size)
911 TCGPool *p;
912 int pool_size;
914 if (size > TCG_POOL_CHUNK_SIZE) {
915 /* big malloc: insert a new pool (XXX: could optimize) */
916 p = g_malloc(sizeof(TCGPool) + size);
917 p->size = size;
918 p->next = s->pool_first_large;
919 s->pool_first_large = p;
920 return p->data;
921 } else {
922 p = s->pool_current;
923 if (!p) {
924 p = s->pool_first;
925 if (!p)
926 goto new_pool;
927 } else {
928 if (!p->next) {
929 new_pool:
930 pool_size = TCG_POOL_CHUNK_SIZE;
931 p = g_malloc(sizeof(TCGPool) + pool_size);
932 p->size = pool_size;
933 p->next = NULL;
934 if (s->pool_current)
935 s->pool_current->next = p;
936 else
937 s->pool_first = p;
938 } else {
939 p = p->next;
943 s->pool_current = p;
944 s->pool_cur = p->data + size;
945 s->pool_end = p->data + p->size;
946 return p->data;
949 void tcg_pool_reset(TCGContext *s)
951 TCGPool *p, *t;
952 for (p = s->pool_first_large; p; p = t) {
953 t = p->next;
954 g_free(p);
956 s->pool_first_large = NULL;
957 s->pool_cur = s->pool_end = NULL;
958 s->pool_current = NULL;
961 typedef struct TCGHelperInfo {
962 void *func;
963 const char *name;
964 unsigned flags;
965 unsigned sizemask;
966 } TCGHelperInfo;
968 #include "exec/helper-proto.h"
970 static const TCGHelperInfo all_helpers[] = {
971 #include "exec/helper-tcg.h"
973 static GHashTable *helper_table;
975 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
976 static void process_op_defs(TCGContext *s);
977 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
978 TCGReg reg, const char *name);
980 void tcg_context_init(TCGContext *s)
982 int op, total_args, n, i;
983 TCGOpDef *def;
984 TCGArgConstraint *args_ct;
985 TCGTemp *ts;
987 memset(s, 0, sizeof(*s));
988 s->nb_globals = 0;
990 /* Count total number of arguments and allocate the corresponding
991 space */
992 total_args = 0;
993 for(op = 0; op < NB_OPS; op++) {
994 def = &tcg_op_defs[op];
995 n = def->nb_iargs + def->nb_oargs;
996 total_args += n;
999 args_ct = g_new0(TCGArgConstraint, total_args);
1001 for(op = 0; op < NB_OPS; op++) {
1002 def = &tcg_op_defs[op];
1003 def->args_ct = args_ct;
1004 n = def->nb_iargs + def->nb_oargs;
1005 args_ct += n;
1008 /* Register helpers. */
1009 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
1010 helper_table = g_hash_table_new(NULL, NULL);
1012 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1013 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1014 (gpointer)&all_helpers[i]);
1017 tcg_target_init(s);
1018 process_op_defs(s);
1020 /* Reverse the order of the saved registers, assuming they're all at
1021 the start of tcg_target_reg_alloc_order. */
1022 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1023 int r = tcg_target_reg_alloc_order[n];
1024 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1025 break;
1028 for (i = 0; i < n; ++i) {
1029 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1031 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1032 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1035 alloc_tcg_plugin_context(s);
1037 tcg_ctx = s;
1039 * In user-mode we simply share the init context among threads, since we
1040 * use a single region. See the documentation tcg_region_init() for the
1041 * reasoning behind this.
1042 * In softmmu we will have at most max_cpus TCG threads.
1044 #ifdef CONFIG_USER_ONLY
1045 tcg_ctxs = &tcg_ctx;
1046 n_tcg_ctxs = 1;
1047 #else
1048 MachineState *ms = MACHINE(qdev_get_machine());
1049 unsigned int max_cpus = ms->smp.max_cpus;
1050 tcg_ctxs = g_new(TCGContext *, max_cpus);
1051 #endif
1053 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1054 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1055 cpu_env = temp_tcgv_ptr(ts);
1059 * Allocate TBs right before their corresponding translated code, making
1060 * sure that TBs and code are on different cache lines.
1062 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1064 uintptr_t align = qemu_icache_linesize;
1065 TranslationBlock *tb;
1066 void *next;
1068 retry:
1069 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1070 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1072 if (unlikely(next > s->code_gen_highwater)) {
1073 if (tcg_region_alloc(s)) {
1074 return NULL;
1076 goto retry;
1078 qatomic_set(&s->code_gen_ptr, next);
1079 s->data_gen_ptr = NULL;
1080 return tb;
1083 void tcg_prologue_init(TCGContext *s)
1085 size_t prologue_size, total_size;
1086 void *buf0, *buf1;
1088 /* Put the prologue at the beginning of code_gen_buffer. */
1089 buf0 = s->code_gen_buffer;
1090 total_size = s->code_gen_buffer_size;
1091 s->code_ptr = buf0;
1092 s->code_buf = buf0;
1093 s->data_gen_ptr = NULL;
1096 * The region trees are not yet configured, but tcg_splitwx_to_rx
1097 * needs the bounds for an assert.
1099 region.start = buf0;
1100 region.end = buf0 + total_size;
1102 #ifndef CONFIG_TCG_INTERPRETER
1103 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1104 #endif
1106 /* Compute a high-water mark, at which we voluntarily flush the buffer
1107 and start over. The size here is arbitrary, significantly larger
1108 than we expect the code generation for any one opcode to require. */
1109 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1111 #ifdef TCG_TARGET_NEED_POOL_LABELS
1112 s->pool_labels = NULL;
1113 #endif
1115 /* Generate the prologue. */
1116 tcg_target_qemu_prologue(s);
1118 #ifdef TCG_TARGET_NEED_POOL_LABELS
1119 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1121 int result = tcg_out_pool_finalize(s);
1122 tcg_debug_assert(result == 0);
1124 #endif
1126 buf1 = s->code_ptr;
1127 #ifndef CONFIG_TCG_INTERPRETER
1128 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1129 tcg_ptr_byte_diff(buf1, buf0));
1130 #endif
1132 /* Deduct the prologue from the buffer. */
1133 prologue_size = tcg_current_code_size(s);
1134 s->code_gen_ptr = buf1;
1135 s->code_gen_buffer = buf1;
1136 s->code_buf = buf1;
1137 total_size -= prologue_size;
1138 s->code_gen_buffer_size = total_size;
1140 tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1142 #ifdef DEBUG_DISAS
1143 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1144 FILE *logfile = qemu_log_lock();
1145 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1146 if (s->data_gen_ptr) {
1147 size_t code_size = s->data_gen_ptr - buf0;
1148 size_t data_size = prologue_size - code_size;
1149 size_t i;
1151 log_disas(buf0, code_size);
1153 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1154 if (sizeof(tcg_target_ulong) == 8) {
1155 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1156 (uintptr_t)s->data_gen_ptr + i,
1157 *(uint64_t *)(s->data_gen_ptr + i));
1158 } else {
1159 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
1160 (uintptr_t)s->data_gen_ptr + i,
1161 *(uint32_t *)(s->data_gen_ptr + i));
1164 } else {
1165 log_disas(buf0, prologue_size);
1167 qemu_log("\n");
1168 qemu_log_flush();
1169 qemu_log_unlock(logfile);
1171 #endif
1173 /* Assert that goto_ptr is implemented completely. */
1174 if (TCG_TARGET_HAS_goto_ptr) {
1175 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1179 void tcg_func_start(TCGContext *s)
1181 tcg_pool_reset(s);
1182 s->nb_temps = s->nb_globals;
1184 /* No temps have been previously allocated for size or locality. */
1185 memset(s->free_temps, 0, sizeof(s->free_temps));
1187 /* No constant temps have been previously allocated. */
1188 for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1189 if (s->const_table[i]) {
1190 g_hash_table_remove_all(s->const_table[i]);
1194 s->nb_ops = 0;
1195 s->nb_labels = 0;
1196 s->current_frame_offset = s->frame_start;
1198 #ifdef CONFIG_DEBUG_TCG
1199 s->goto_tb_issue_mask = 0;
1200 #endif
1202 QTAILQ_INIT(&s->ops);
1203 QTAILQ_INIT(&s->free_ops);
1204 QSIMPLEQ_INIT(&s->labels);
1207 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1209 int n = s->nb_temps++;
1210 tcg_debug_assert(n < TCG_MAX_TEMPS);
1211 return memset(&s->temps[n], 0, sizeof(TCGTemp));
1214 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1216 TCGTemp *ts;
1218 tcg_debug_assert(s->nb_globals == s->nb_temps);
1219 s->nb_globals++;
1220 ts = tcg_temp_alloc(s);
1221 ts->kind = TEMP_GLOBAL;
1223 return ts;
1226 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1227 TCGReg reg, const char *name)
1229 TCGTemp *ts;
1231 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1232 tcg_abort();
1235 ts = tcg_global_alloc(s);
1236 ts->base_type = type;
1237 ts->type = type;
1238 ts->kind = TEMP_FIXED;
1239 ts->reg = reg;
1240 ts->name = name;
1241 tcg_regset_set_reg(s->reserved_regs, reg);
1243 return ts;
1246 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1248 s->frame_start = start;
1249 s->frame_end = start + size;
1250 s->frame_temp
1251 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1254 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1255 intptr_t offset, const char *name)
1257 TCGContext *s = tcg_ctx;
1258 TCGTemp *base_ts = tcgv_ptr_temp(base);
1259 TCGTemp *ts = tcg_global_alloc(s);
1260 int indirect_reg = 0, bigendian = 0;
1261 #ifdef HOST_WORDS_BIGENDIAN
1262 bigendian = 1;
1263 #endif
1265 switch (base_ts->kind) {
1266 case TEMP_FIXED:
1267 break;
1268 case TEMP_GLOBAL:
1269 /* We do not support double-indirect registers. */
1270 tcg_debug_assert(!base_ts->indirect_reg);
1271 base_ts->indirect_base = 1;
1272 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1273 ? 2 : 1);
1274 indirect_reg = 1;
1275 break;
1276 default:
1277 g_assert_not_reached();
1280 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1281 TCGTemp *ts2 = tcg_global_alloc(s);
1282 char buf[64];
1284 ts->base_type = TCG_TYPE_I64;
1285 ts->type = TCG_TYPE_I32;
1286 ts->indirect_reg = indirect_reg;
1287 ts->mem_allocated = 1;
1288 ts->mem_base = base_ts;
1289 ts->mem_offset = offset + bigendian * 4;
1290 pstrcpy(buf, sizeof(buf), name);
1291 pstrcat(buf, sizeof(buf), "_0");
1292 ts->name = strdup(buf);
1294 tcg_debug_assert(ts2 == ts + 1);
1295 ts2->base_type = TCG_TYPE_I64;
1296 ts2->type = TCG_TYPE_I32;
1297 ts2->indirect_reg = indirect_reg;
1298 ts2->mem_allocated = 1;
1299 ts2->mem_base = base_ts;
1300 ts2->mem_offset = offset + (1 - bigendian) * 4;
1301 pstrcpy(buf, sizeof(buf), name);
1302 pstrcat(buf, sizeof(buf), "_1");
1303 ts2->name = strdup(buf);
1304 } else {
1305 ts->base_type = type;
1306 ts->type = type;
1307 ts->indirect_reg = indirect_reg;
1308 ts->mem_allocated = 1;
1309 ts->mem_base = base_ts;
1310 ts->mem_offset = offset;
1311 ts->name = name;
1313 return ts;
1316 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1318 TCGContext *s = tcg_ctx;
1319 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1320 TCGTemp *ts;
1321 int idx, k;
1323 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1324 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1325 if (idx < TCG_MAX_TEMPS) {
1326 /* There is already an available temp with the right type. */
1327 clear_bit(idx, s->free_temps[k].l);
1329 ts = &s->temps[idx];
1330 ts->temp_allocated = 1;
1331 tcg_debug_assert(ts->base_type == type);
1332 tcg_debug_assert(ts->kind == kind);
1333 } else {
1334 ts = tcg_temp_alloc(s);
1335 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1336 TCGTemp *ts2 = tcg_temp_alloc(s);
1338 ts->base_type = type;
1339 ts->type = TCG_TYPE_I32;
1340 ts->temp_allocated = 1;
1341 ts->kind = kind;
1343 tcg_debug_assert(ts2 == ts + 1);
1344 ts2->base_type = TCG_TYPE_I64;
1345 ts2->type = TCG_TYPE_I32;
1346 ts2->temp_allocated = 1;
1347 ts2->kind = kind;
1348 } else {
1349 ts->base_type = type;
1350 ts->type = type;
1351 ts->temp_allocated = 1;
1352 ts->kind = kind;
1356 #if defined(CONFIG_DEBUG_TCG)
1357 s->temps_in_use++;
1358 #endif
1359 return ts;
1362 TCGv_vec tcg_temp_new_vec(TCGType type)
1364 TCGTemp *t;
1366 #ifdef CONFIG_DEBUG_TCG
1367 switch (type) {
1368 case TCG_TYPE_V64:
1369 assert(TCG_TARGET_HAS_v64);
1370 break;
1371 case TCG_TYPE_V128:
1372 assert(TCG_TARGET_HAS_v128);
1373 break;
1374 case TCG_TYPE_V256:
1375 assert(TCG_TARGET_HAS_v256);
1376 break;
1377 default:
1378 g_assert_not_reached();
1380 #endif
1382 t = tcg_temp_new_internal(type, 0);
1383 return temp_tcgv_vec(t);
1386 /* Create a new temp of the same type as an existing temp. */
1387 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1389 TCGTemp *t = tcgv_vec_temp(match);
1391 tcg_debug_assert(t->temp_allocated != 0);
1393 t = tcg_temp_new_internal(t->base_type, 0);
1394 return temp_tcgv_vec(t);
1397 void tcg_temp_free_internal(TCGTemp *ts)
1399 TCGContext *s = tcg_ctx;
1400 int k, idx;
1402 /* In order to simplify users of tcg_constant_*, silently ignore free. */
1403 if (ts->kind == TEMP_CONST) {
1404 return;
1407 #if defined(CONFIG_DEBUG_TCG)
1408 s->temps_in_use--;
1409 if (s->temps_in_use < 0) {
1410 fprintf(stderr, "More temporaries freed than allocated!\n");
1412 #endif
1414 tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1415 tcg_debug_assert(ts->temp_allocated != 0);
1416 ts->temp_allocated = 0;
1418 idx = temp_idx(ts);
1419 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1420 set_bit(idx, s->free_temps[k].l);
1423 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1425 TCGContext *s = tcg_ctx;
1426 GHashTable *h = s->const_table[type];
1427 TCGTemp *ts;
1429 if (h == NULL) {
1430 h = g_hash_table_new(g_int64_hash, g_int64_equal);
1431 s->const_table[type] = h;
1434 ts = g_hash_table_lookup(h, &val);
1435 if (ts == NULL) {
1436 ts = tcg_temp_alloc(s);
1438 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1439 TCGTemp *ts2 = tcg_temp_alloc(s);
1441 ts->base_type = TCG_TYPE_I64;
1442 ts->type = TCG_TYPE_I32;
1443 ts->kind = TEMP_CONST;
1444 ts->temp_allocated = 1;
1446 * Retain the full value of the 64-bit constant in the low
1447 * part, so that the hash table works. Actual uses will
1448 * truncate the value to the low part.
1450 ts->val = val;
1452 tcg_debug_assert(ts2 == ts + 1);
1453 ts2->base_type = TCG_TYPE_I64;
1454 ts2->type = TCG_TYPE_I32;
1455 ts2->kind = TEMP_CONST;
1456 ts2->temp_allocated = 1;
1457 ts2->val = val >> 32;
1458 } else {
1459 ts->base_type = type;
1460 ts->type = type;
1461 ts->kind = TEMP_CONST;
1462 ts->temp_allocated = 1;
1463 ts->val = val;
1465 g_hash_table_insert(h, &ts->val, ts);
1468 return ts;
1471 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1473 val = dup_const(vece, val);
1474 return temp_tcgv_vec(tcg_constant_internal(type, val));
1477 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1479 TCGTemp *t = tcgv_vec_temp(match);
1481 tcg_debug_assert(t->temp_allocated != 0);
1482 return tcg_constant_vec(t->base_type, vece, val);
1485 TCGv_i32 tcg_const_i32(int32_t val)
1487 TCGv_i32 t0;
1488 t0 = tcg_temp_new_i32();
1489 tcg_gen_movi_i32(t0, val);
1490 return t0;
1493 TCGv_i64 tcg_const_i64(int64_t val)
1495 TCGv_i64 t0;
1496 t0 = tcg_temp_new_i64();
1497 tcg_gen_movi_i64(t0, val);
1498 return t0;
1501 TCGv_i32 tcg_const_local_i32(int32_t val)
1503 TCGv_i32 t0;
1504 t0 = tcg_temp_local_new_i32();
1505 tcg_gen_movi_i32(t0, val);
1506 return t0;
1509 TCGv_i64 tcg_const_local_i64(int64_t val)
1511 TCGv_i64 t0;
1512 t0 = tcg_temp_local_new_i64();
1513 tcg_gen_movi_i64(t0, val);
1514 return t0;
1517 #if defined(CONFIG_DEBUG_TCG)
1518 void tcg_clear_temp_count(void)
1520 TCGContext *s = tcg_ctx;
1521 s->temps_in_use = 0;
1524 int tcg_check_temp_count(void)
1526 TCGContext *s = tcg_ctx;
1527 if (s->temps_in_use) {
1528 /* Clear the count so that we don't give another
1529 * warning immediately next time around.
1531 s->temps_in_use = 0;
1532 return 1;
1534 return 0;
1536 #endif
1538 /* Return true if OP may appear in the opcode stream.
1539 Test the runtime variable that controls each opcode. */
1540 bool tcg_op_supported(TCGOpcode op)
1542 const bool have_vec
1543 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1545 switch (op) {
1546 case INDEX_op_discard:
1547 case INDEX_op_set_label:
1548 case INDEX_op_call:
1549 case INDEX_op_br:
1550 case INDEX_op_mb:
1551 case INDEX_op_insn_start:
1552 case INDEX_op_exit_tb:
1553 case INDEX_op_goto_tb:
1554 case INDEX_op_qemu_ld_i32:
1555 case INDEX_op_qemu_st_i32:
1556 case INDEX_op_qemu_ld_i64:
1557 case INDEX_op_qemu_st_i64:
1558 return true;
1560 case INDEX_op_qemu_st8_i32:
1561 return TCG_TARGET_HAS_qemu_st8_i32;
1563 case INDEX_op_goto_ptr:
1564 return TCG_TARGET_HAS_goto_ptr;
1566 case INDEX_op_mov_i32:
1567 case INDEX_op_setcond_i32:
1568 case INDEX_op_brcond_i32:
1569 case INDEX_op_ld8u_i32:
1570 case INDEX_op_ld8s_i32:
1571 case INDEX_op_ld16u_i32:
1572 case INDEX_op_ld16s_i32:
1573 case INDEX_op_ld_i32:
1574 case INDEX_op_st8_i32:
1575 case INDEX_op_st16_i32:
1576 case INDEX_op_st_i32:
1577 case INDEX_op_add_i32:
1578 case INDEX_op_sub_i32:
1579 case INDEX_op_mul_i32:
1580 case INDEX_op_and_i32:
1581 case INDEX_op_or_i32:
1582 case INDEX_op_xor_i32:
1583 case INDEX_op_shl_i32:
1584 case INDEX_op_shr_i32:
1585 case INDEX_op_sar_i32:
1586 return true;
1588 case INDEX_op_movcond_i32:
1589 return TCG_TARGET_HAS_movcond_i32;
1590 case INDEX_op_div_i32:
1591 case INDEX_op_divu_i32:
1592 return TCG_TARGET_HAS_div_i32;
1593 case INDEX_op_rem_i32:
1594 case INDEX_op_remu_i32:
1595 return TCG_TARGET_HAS_rem_i32;
1596 case INDEX_op_div2_i32:
1597 case INDEX_op_divu2_i32:
1598 return TCG_TARGET_HAS_div2_i32;
1599 case INDEX_op_rotl_i32:
1600 case INDEX_op_rotr_i32:
1601 return TCG_TARGET_HAS_rot_i32;
1602 case INDEX_op_deposit_i32:
1603 return TCG_TARGET_HAS_deposit_i32;
1604 case INDEX_op_extract_i32:
1605 return TCG_TARGET_HAS_extract_i32;
1606 case INDEX_op_sextract_i32:
1607 return TCG_TARGET_HAS_sextract_i32;
1608 case INDEX_op_extract2_i32:
1609 return TCG_TARGET_HAS_extract2_i32;
1610 case INDEX_op_add2_i32:
1611 return TCG_TARGET_HAS_add2_i32;
1612 case INDEX_op_sub2_i32:
1613 return TCG_TARGET_HAS_sub2_i32;
1614 case INDEX_op_mulu2_i32:
1615 return TCG_TARGET_HAS_mulu2_i32;
1616 case INDEX_op_muls2_i32:
1617 return TCG_TARGET_HAS_muls2_i32;
1618 case INDEX_op_muluh_i32:
1619 return TCG_TARGET_HAS_muluh_i32;
1620 case INDEX_op_mulsh_i32:
1621 return TCG_TARGET_HAS_mulsh_i32;
1622 case INDEX_op_ext8s_i32:
1623 return TCG_TARGET_HAS_ext8s_i32;
1624 case INDEX_op_ext16s_i32:
1625 return TCG_TARGET_HAS_ext16s_i32;
1626 case INDEX_op_ext8u_i32:
1627 return TCG_TARGET_HAS_ext8u_i32;
1628 case INDEX_op_ext16u_i32:
1629 return TCG_TARGET_HAS_ext16u_i32;
1630 case INDEX_op_bswap16_i32:
1631 return TCG_TARGET_HAS_bswap16_i32;
1632 case INDEX_op_bswap32_i32:
1633 return TCG_TARGET_HAS_bswap32_i32;
1634 case INDEX_op_not_i32:
1635 return TCG_TARGET_HAS_not_i32;
1636 case INDEX_op_neg_i32:
1637 return TCG_TARGET_HAS_neg_i32;
1638 case INDEX_op_andc_i32:
1639 return TCG_TARGET_HAS_andc_i32;
1640 case INDEX_op_orc_i32:
1641 return TCG_TARGET_HAS_orc_i32;
1642 case INDEX_op_eqv_i32:
1643 return TCG_TARGET_HAS_eqv_i32;
1644 case INDEX_op_nand_i32:
1645 return TCG_TARGET_HAS_nand_i32;
1646 case INDEX_op_nor_i32:
1647 return TCG_TARGET_HAS_nor_i32;
1648 case INDEX_op_clz_i32:
1649 return TCG_TARGET_HAS_clz_i32;
1650 case INDEX_op_ctz_i32:
1651 return TCG_TARGET_HAS_ctz_i32;
1652 case INDEX_op_ctpop_i32:
1653 return TCG_TARGET_HAS_ctpop_i32;
1655 case INDEX_op_brcond2_i32:
1656 case INDEX_op_setcond2_i32:
1657 return TCG_TARGET_REG_BITS == 32;
1659 case INDEX_op_mov_i64:
1660 case INDEX_op_setcond_i64:
1661 case INDEX_op_brcond_i64:
1662 case INDEX_op_ld8u_i64:
1663 case INDEX_op_ld8s_i64:
1664 case INDEX_op_ld16u_i64:
1665 case INDEX_op_ld16s_i64:
1666 case INDEX_op_ld32u_i64:
1667 case INDEX_op_ld32s_i64:
1668 case INDEX_op_ld_i64:
1669 case INDEX_op_st8_i64:
1670 case INDEX_op_st16_i64:
1671 case INDEX_op_st32_i64:
1672 case INDEX_op_st_i64:
1673 case INDEX_op_add_i64:
1674 case INDEX_op_sub_i64:
1675 case INDEX_op_mul_i64:
1676 case INDEX_op_and_i64:
1677 case INDEX_op_or_i64:
1678 case INDEX_op_xor_i64:
1679 case INDEX_op_shl_i64:
1680 case INDEX_op_shr_i64:
1681 case INDEX_op_sar_i64:
1682 case INDEX_op_ext_i32_i64:
1683 case INDEX_op_extu_i32_i64:
1684 return TCG_TARGET_REG_BITS == 64;
1686 case INDEX_op_movcond_i64:
1687 return TCG_TARGET_HAS_movcond_i64;
1688 case INDEX_op_div_i64:
1689 case INDEX_op_divu_i64:
1690 return TCG_TARGET_HAS_div_i64;
1691 case INDEX_op_rem_i64:
1692 case INDEX_op_remu_i64:
1693 return TCG_TARGET_HAS_rem_i64;
1694 case INDEX_op_div2_i64:
1695 case INDEX_op_divu2_i64:
1696 return TCG_TARGET_HAS_div2_i64;
1697 case INDEX_op_rotl_i64:
1698 case INDEX_op_rotr_i64:
1699 return TCG_TARGET_HAS_rot_i64;
1700 case INDEX_op_deposit_i64:
1701 return TCG_TARGET_HAS_deposit_i64;
1702 case INDEX_op_extract_i64:
1703 return TCG_TARGET_HAS_extract_i64;
1704 case INDEX_op_sextract_i64:
1705 return TCG_TARGET_HAS_sextract_i64;
1706 case INDEX_op_extract2_i64:
1707 return TCG_TARGET_HAS_extract2_i64;
1708 case INDEX_op_extrl_i64_i32:
1709 return TCG_TARGET_HAS_extrl_i64_i32;
1710 case INDEX_op_extrh_i64_i32:
1711 return TCG_TARGET_HAS_extrh_i64_i32;
1712 case INDEX_op_ext8s_i64:
1713 return TCG_TARGET_HAS_ext8s_i64;
1714 case INDEX_op_ext16s_i64:
1715 return TCG_TARGET_HAS_ext16s_i64;
1716 case INDEX_op_ext32s_i64:
1717 return TCG_TARGET_HAS_ext32s_i64;
1718 case INDEX_op_ext8u_i64:
1719 return TCG_TARGET_HAS_ext8u_i64;
1720 case INDEX_op_ext16u_i64:
1721 return TCG_TARGET_HAS_ext16u_i64;
1722 case INDEX_op_ext32u_i64:
1723 return TCG_TARGET_HAS_ext32u_i64;
1724 case INDEX_op_bswap16_i64:
1725 return TCG_TARGET_HAS_bswap16_i64;
1726 case INDEX_op_bswap32_i64:
1727 return TCG_TARGET_HAS_bswap32_i64;
1728 case INDEX_op_bswap64_i64:
1729 return TCG_TARGET_HAS_bswap64_i64;
1730 case INDEX_op_not_i64:
1731 return TCG_TARGET_HAS_not_i64;
1732 case INDEX_op_neg_i64:
1733 return TCG_TARGET_HAS_neg_i64;
1734 case INDEX_op_andc_i64:
1735 return TCG_TARGET_HAS_andc_i64;
1736 case INDEX_op_orc_i64:
1737 return TCG_TARGET_HAS_orc_i64;
1738 case INDEX_op_eqv_i64:
1739 return TCG_TARGET_HAS_eqv_i64;
1740 case INDEX_op_nand_i64:
1741 return TCG_TARGET_HAS_nand_i64;
1742 case INDEX_op_nor_i64:
1743 return TCG_TARGET_HAS_nor_i64;
1744 case INDEX_op_clz_i64:
1745 return TCG_TARGET_HAS_clz_i64;
1746 case INDEX_op_ctz_i64:
1747 return TCG_TARGET_HAS_ctz_i64;
1748 case INDEX_op_ctpop_i64:
1749 return TCG_TARGET_HAS_ctpop_i64;
1750 case INDEX_op_add2_i64:
1751 return TCG_TARGET_HAS_add2_i64;
1752 case INDEX_op_sub2_i64:
1753 return TCG_TARGET_HAS_sub2_i64;
1754 case INDEX_op_mulu2_i64:
1755 return TCG_TARGET_HAS_mulu2_i64;
1756 case INDEX_op_muls2_i64:
1757 return TCG_TARGET_HAS_muls2_i64;
1758 case INDEX_op_muluh_i64:
1759 return TCG_TARGET_HAS_muluh_i64;
1760 case INDEX_op_mulsh_i64:
1761 return TCG_TARGET_HAS_mulsh_i64;
1763 case INDEX_op_mov_vec:
1764 case INDEX_op_dup_vec:
1765 case INDEX_op_dupm_vec:
1766 case INDEX_op_ld_vec:
1767 case INDEX_op_st_vec:
1768 case INDEX_op_add_vec:
1769 case INDEX_op_sub_vec:
1770 case INDEX_op_and_vec:
1771 case INDEX_op_or_vec:
1772 case INDEX_op_xor_vec:
1773 case INDEX_op_cmp_vec:
1774 return have_vec;
1775 case INDEX_op_dup2_vec:
1776 return have_vec && TCG_TARGET_REG_BITS == 32;
1777 case INDEX_op_not_vec:
1778 return have_vec && TCG_TARGET_HAS_not_vec;
1779 case INDEX_op_neg_vec:
1780 return have_vec && TCG_TARGET_HAS_neg_vec;
1781 case INDEX_op_abs_vec:
1782 return have_vec && TCG_TARGET_HAS_abs_vec;
1783 case INDEX_op_andc_vec:
1784 return have_vec && TCG_TARGET_HAS_andc_vec;
1785 case INDEX_op_orc_vec:
1786 return have_vec && TCG_TARGET_HAS_orc_vec;
1787 case INDEX_op_mul_vec:
1788 return have_vec && TCG_TARGET_HAS_mul_vec;
1789 case INDEX_op_shli_vec:
1790 case INDEX_op_shri_vec:
1791 case INDEX_op_sari_vec:
1792 return have_vec && TCG_TARGET_HAS_shi_vec;
1793 case INDEX_op_shls_vec:
1794 case INDEX_op_shrs_vec:
1795 case INDEX_op_sars_vec:
1796 return have_vec && TCG_TARGET_HAS_shs_vec;
1797 case INDEX_op_shlv_vec:
1798 case INDEX_op_shrv_vec:
1799 case INDEX_op_sarv_vec:
1800 return have_vec && TCG_TARGET_HAS_shv_vec;
1801 case INDEX_op_rotli_vec:
1802 return have_vec && TCG_TARGET_HAS_roti_vec;
1803 case INDEX_op_rotls_vec:
1804 return have_vec && TCG_TARGET_HAS_rots_vec;
1805 case INDEX_op_rotlv_vec:
1806 case INDEX_op_rotrv_vec:
1807 return have_vec && TCG_TARGET_HAS_rotv_vec;
1808 case INDEX_op_ssadd_vec:
1809 case INDEX_op_usadd_vec:
1810 case INDEX_op_sssub_vec:
1811 case INDEX_op_ussub_vec:
1812 return have_vec && TCG_TARGET_HAS_sat_vec;
1813 case INDEX_op_smin_vec:
1814 case INDEX_op_umin_vec:
1815 case INDEX_op_smax_vec:
1816 case INDEX_op_umax_vec:
1817 return have_vec && TCG_TARGET_HAS_minmax_vec;
1818 case INDEX_op_bitsel_vec:
1819 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1820 case INDEX_op_cmpsel_vec:
1821 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1823 default:
1824 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1825 return true;
1829 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1830 and endian swap. Maybe it would be better to do the alignment
1831 and endian swap in tcg_reg_alloc_call(). */
1832 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1834 int i, real_args, nb_rets, pi;
1835 unsigned sizemask, flags;
1836 TCGHelperInfo *info;
1837 TCGOp *op;
1839 info = g_hash_table_lookup(helper_table, (gpointer)func);
1840 flags = info->flags;
1841 sizemask = info->sizemask;
1843 #ifdef CONFIG_PLUGIN
1844 /* detect non-plugin helpers */
1845 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1846 tcg_ctx->plugin_insn->calls_helpers = true;
1848 #endif
1850 #if defined(__sparc__) && !defined(__arch64__) \
1851 && !defined(CONFIG_TCG_INTERPRETER)
1852 /* We have 64-bit values in one register, but need to pass as two
1853 separate parameters. Split them. */
1854 int orig_sizemask = sizemask;
1855 int orig_nargs = nargs;
1856 TCGv_i64 retl, reth;
1857 TCGTemp *split_args[MAX_OPC_PARAM];
1859 retl = NULL;
1860 reth = NULL;
1861 if (sizemask != 0) {
1862 for (i = real_args = 0; i < nargs; ++i) {
1863 int is_64bit = sizemask & (1 << (i+1)*2);
1864 if (is_64bit) {
1865 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1866 TCGv_i32 h = tcg_temp_new_i32();
1867 TCGv_i32 l = tcg_temp_new_i32();
1868 tcg_gen_extr_i64_i32(l, h, orig);
1869 split_args[real_args++] = tcgv_i32_temp(h);
1870 split_args[real_args++] = tcgv_i32_temp(l);
1871 } else {
1872 split_args[real_args++] = args[i];
1875 nargs = real_args;
1876 args = split_args;
1877 sizemask = 0;
1879 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1880 for (i = 0; i < nargs; ++i) {
1881 int is_64bit = sizemask & (1 << (i+1)*2);
1882 int is_signed = sizemask & (2 << (i+1)*2);
1883 if (!is_64bit) {
1884 TCGv_i64 temp = tcg_temp_new_i64();
1885 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1886 if (is_signed) {
1887 tcg_gen_ext32s_i64(temp, orig);
1888 } else {
1889 tcg_gen_ext32u_i64(temp, orig);
1891 args[i] = tcgv_i64_temp(temp);
1894 #endif /* TCG_TARGET_EXTEND_ARGS */
1896 op = tcg_emit_op(INDEX_op_call);
1898 pi = 0;
1899 if (ret != NULL) {
1900 #if defined(__sparc__) && !defined(__arch64__) \
1901 && !defined(CONFIG_TCG_INTERPRETER)
1902 if (orig_sizemask & 1) {
1903 /* The 32-bit ABI is going to return the 64-bit value in
1904 the %o0/%o1 register pair. Prepare for this by using
1905 two return temporaries, and reassemble below. */
1906 retl = tcg_temp_new_i64();
1907 reth = tcg_temp_new_i64();
1908 op->args[pi++] = tcgv_i64_arg(reth);
1909 op->args[pi++] = tcgv_i64_arg(retl);
1910 nb_rets = 2;
1911 } else {
1912 op->args[pi++] = temp_arg(ret);
1913 nb_rets = 1;
1915 #else
1916 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1917 #ifdef HOST_WORDS_BIGENDIAN
1918 op->args[pi++] = temp_arg(ret + 1);
1919 op->args[pi++] = temp_arg(ret);
1920 #else
1921 op->args[pi++] = temp_arg(ret);
1922 op->args[pi++] = temp_arg(ret + 1);
1923 #endif
1924 nb_rets = 2;
1925 } else {
1926 op->args[pi++] = temp_arg(ret);
1927 nb_rets = 1;
1929 #endif
1930 } else {
1931 nb_rets = 0;
1933 TCGOP_CALLO(op) = nb_rets;
1935 real_args = 0;
1936 for (i = 0; i < nargs; i++) {
1937 int is_64bit = sizemask & (1 << (i+1)*2);
1938 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1939 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1940 /* some targets want aligned 64 bit args */
1941 if (real_args & 1) {
1942 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1943 real_args++;
1945 #endif
1946 /* If stack grows up, then we will be placing successive
1947 arguments at lower addresses, which means we need to
1948 reverse the order compared to how we would normally
1949 treat either big or little-endian. For those arguments
1950 that will wind up in registers, this still works for
1951 HPPA (the only current STACK_GROWSUP target) since the
1952 argument registers are *also* allocated in decreasing
1953 order. If another such target is added, this logic may
1954 have to get more complicated to differentiate between
1955 stack arguments and register arguments. */
1956 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1957 op->args[pi++] = temp_arg(args[i] + 1);
1958 op->args[pi++] = temp_arg(args[i]);
1959 #else
1960 op->args[pi++] = temp_arg(args[i]);
1961 op->args[pi++] = temp_arg(args[i] + 1);
1962 #endif
1963 real_args += 2;
1964 continue;
1967 op->args[pi++] = temp_arg(args[i]);
1968 real_args++;
1970 op->args[pi++] = (uintptr_t)func;
1971 op->args[pi++] = flags;
1972 TCGOP_CALLI(op) = real_args;
1974 /* Make sure the fields didn't overflow. */
1975 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1976 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1978 #if defined(__sparc__) && !defined(__arch64__) \
1979 && !defined(CONFIG_TCG_INTERPRETER)
1980 /* Free all of the parts we allocated above. */
1981 for (i = real_args = 0; i < orig_nargs; ++i) {
1982 int is_64bit = orig_sizemask & (1 << (i+1)*2);
1983 if (is_64bit) {
1984 tcg_temp_free_internal(args[real_args++]);
1985 tcg_temp_free_internal(args[real_args++]);
1986 } else {
1987 real_args++;
1990 if (orig_sizemask & 1) {
1991 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1992 Note that describing these as TCGv_i64 eliminates an unnecessary
1993 zero-extension that tcg_gen_concat_i32_i64 would create. */
1994 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1995 tcg_temp_free_i64(retl);
1996 tcg_temp_free_i64(reth);
1998 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1999 for (i = 0; i < nargs; ++i) {
2000 int is_64bit = sizemask & (1 << (i+1)*2);
2001 if (!is_64bit) {
2002 tcg_temp_free_internal(args[i]);
2005 #endif /* TCG_TARGET_EXTEND_ARGS */
2008 static void tcg_reg_alloc_start(TCGContext *s)
2010 int i, n;
2012 for (i = 0, n = s->nb_temps; i < n; i++) {
2013 TCGTemp *ts = &s->temps[i];
2014 TCGTempVal val = TEMP_VAL_MEM;
2016 switch (ts->kind) {
2017 case TEMP_CONST:
2018 val = TEMP_VAL_CONST;
2019 break;
2020 case TEMP_FIXED:
2021 val = TEMP_VAL_REG;
2022 break;
2023 case TEMP_GLOBAL:
2024 break;
2025 case TEMP_NORMAL:
2026 val = TEMP_VAL_DEAD;
2027 /* fall through */
2028 case TEMP_LOCAL:
2029 ts->mem_allocated = 0;
2030 break;
2031 default:
2032 g_assert_not_reached();
2034 ts->val_type = val;
2037 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2040 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2041 TCGTemp *ts)
2043 int idx = temp_idx(ts);
2045 switch (ts->kind) {
2046 case TEMP_FIXED:
2047 case TEMP_GLOBAL:
2048 pstrcpy(buf, buf_size, ts->name);
2049 break;
2050 case TEMP_LOCAL:
2051 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2052 break;
2053 case TEMP_NORMAL:
2054 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2055 break;
2056 case TEMP_CONST:
2057 switch (ts->type) {
2058 case TCG_TYPE_I32:
2059 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2060 break;
2061 #if TCG_TARGET_REG_BITS > 32
2062 case TCG_TYPE_I64:
2063 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2064 break;
2065 #endif
2066 case TCG_TYPE_V64:
2067 case TCG_TYPE_V128:
2068 case TCG_TYPE_V256:
2069 snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2070 64 << (ts->type - TCG_TYPE_V64), ts->val);
2071 break;
2072 default:
2073 g_assert_not_reached();
2075 break;
2077 return buf;
2080 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2081 int buf_size, TCGArg arg)
2083 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2086 /* Find helper name. */
2087 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2089 const char *ret = NULL;
2090 if (helper_table) {
2091 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2092 if (info) {
2093 ret = info->name;
2096 return ret;
2099 static const char * const cond_name[] =
2101 [TCG_COND_NEVER] = "never",
2102 [TCG_COND_ALWAYS] = "always",
2103 [TCG_COND_EQ] = "eq",
2104 [TCG_COND_NE] = "ne",
2105 [TCG_COND_LT] = "lt",
2106 [TCG_COND_GE] = "ge",
2107 [TCG_COND_LE] = "le",
2108 [TCG_COND_GT] = "gt",
2109 [TCG_COND_LTU] = "ltu",
2110 [TCG_COND_GEU] = "geu",
2111 [TCG_COND_LEU] = "leu",
2112 [TCG_COND_GTU] = "gtu"
2115 static const char * const ldst_name[] =
2117 [MO_UB] = "ub",
2118 [MO_SB] = "sb",
2119 [MO_LEUW] = "leuw",
2120 [MO_LESW] = "lesw",
2121 [MO_LEUL] = "leul",
2122 [MO_LESL] = "lesl",
2123 [MO_LEQ] = "leq",
2124 [MO_BEUW] = "beuw",
2125 [MO_BESW] = "besw",
2126 [MO_BEUL] = "beul",
2127 [MO_BESL] = "besl",
2128 [MO_BEQ] = "beq",
2131 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2132 #ifdef TARGET_ALIGNED_ONLY
2133 [MO_UNALN >> MO_ASHIFT] = "un+",
2134 [MO_ALIGN >> MO_ASHIFT] = "",
2135 #else
2136 [MO_UNALN >> MO_ASHIFT] = "",
2137 [MO_ALIGN >> MO_ASHIFT] = "al+",
2138 #endif
2139 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
2140 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
2141 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
2142 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2143 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2144 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2147 static inline bool tcg_regset_single(TCGRegSet d)
2149 return (d & (d - 1)) == 0;
2152 static inline TCGReg tcg_regset_first(TCGRegSet d)
2154 if (TCG_TARGET_NB_REGS <= 32) {
2155 return ctz32(d);
2156 } else {
2157 return ctz64(d);
2161 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2163 char buf[128];
2164 TCGOp *op;
2166 QTAILQ_FOREACH(op, &s->ops, link) {
2167 int i, k, nb_oargs, nb_iargs, nb_cargs;
2168 const TCGOpDef *def;
2169 TCGOpcode c;
2170 int col = 0;
2172 c = op->opc;
2173 def = &tcg_op_defs[c];
2175 if (c == INDEX_op_insn_start) {
2176 nb_oargs = 0;
2177 col += qemu_log("\n ----");
2179 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2180 target_ulong a;
2181 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2182 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2183 #else
2184 a = op->args[i];
2185 #endif
2186 col += qemu_log(" " TARGET_FMT_lx, a);
2188 } else if (c == INDEX_op_call) {
2189 /* variable number of arguments */
2190 nb_oargs = TCGOP_CALLO(op);
2191 nb_iargs = TCGOP_CALLI(op);
2192 nb_cargs = def->nb_cargs;
2194 /* function name, flags, out args */
2195 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2196 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2197 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2198 for (i = 0; i < nb_oargs; i++) {
2199 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2200 op->args[i]));
2202 for (i = 0; i < nb_iargs; i++) {
2203 TCGArg arg = op->args[nb_oargs + i];
2204 const char *t = "<dummy>";
2205 if (arg != TCG_CALL_DUMMY_ARG) {
2206 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2208 col += qemu_log(",%s", t);
2210 } else {
2211 col += qemu_log(" %s ", def->name);
2213 nb_oargs = def->nb_oargs;
2214 nb_iargs = def->nb_iargs;
2215 nb_cargs = def->nb_cargs;
2217 if (def->flags & TCG_OPF_VECTOR) {
2218 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2219 8 << TCGOP_VECE(op));
2222 k = 0;
2223 for (i = 0; i < nb_oargs; i++) {
2224 if (k != 0) {
2225 col += qemu_log(",");
2227 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2228 op->args[k++]));
2230 for (i = 0; i < nb_iargs; i++) {
2231 if (k != 0) {
2232 col += qemu_log(",");
2234 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2235 op->args[k++]));
2237 switch (c) {
2238 case INDEX_op_brcond_i32:
2239 case INDEX_op_setcond_i32:
2240 case INDEX_op_movcond_i32:
2241 case INDEX_op_brcond2_i32:
2242 case INDEX_op_setcond2_i32:
2243 case INDEX_op_brcond_i64:
2244 case INDEX_op_setcond_i64:
2245 case INDEX_op_movcond_i64:
2246 case INDEX_op_cmp_vec:
2247 case INDEX_op_cmpsel_vec:
2248 if (op->args[k] < ARRAY_SIZE(cond_name)
2249 && cond_name[op->args[k]]) {
2250 col += qemu_log(",%s", cond_name[op->args[k++]]);
2251 } else {
2252 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2254 i = 1;
2255 break;
2256 case INDEX_op_qemu_ld_i32:
2257 case INDEX_op_qemu_st_i32:
2258 case INDEX_op_qemu_st8_i32:
2259 case INDEX_op_qemu_ld_i64:
2260 case INDEX_op_qemu_st_i64:
2262 TCGMemOpIdx oi = op->args[k++];
2263 MemOp op = get_memop(oi);
2264 unsigned ix = get_mmuidx(oi);
2266 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2267 col += qemu_log(",$0x%x,%u", op, ix);
2268 } else {
2269 const char *s_al, *s_op;
2270 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2271 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2272 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2274 i = 1;
2276 break;
2277 default:
2278 i = 0;
2279 break;
2281 switch (c) {
2282 case INDEX_op_set_label:
2283 case INDEX_op_br:
2284 case INDEX_op_brcond_i32:
2285 case INDEX_op_brcond_i64:
2286 case INDEX_op_brcond2_i32:
2287 col += qemu_log("%s$L%d", k ? "," : "",
2288 arg_label(op->args[k])->id);
2289 i++, k++;
2290 break;
2291 default:
2292 break;
2294 for (; i < nb_cargs; i++, k++) {
2295 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2299 if (have_prefs || op->life) {
2301 QemuLogFile *logfile;
2303 rcu_read_lock();
2304 logfile = qatomic_rcu_read(&qemu_logfile);
2305 if (logfile) {
2306 for (; col < 40; ++col) {
2307 putc(' ', logfile->fd);
2310 rcu_read_unlock();
2313 if (op->life) {
2314 unsigned life = op->life;
2316 if (life & (SYNC_ARG * 3)) {
2317 qemu_log(" sync:");
2318 for (i = 0; i < 2; ++i) {
2319 if (life & (SYNC_ARG << i)) {
2320 qemu_log(" %d", i);
2324 life /= DEAD_ARG;
2325 if (life) {
2326 qemu_log(" dead:");
2327 for (i = 0; life; ++i, life >>= 1) {
2328 if (life & 1) {
2329 qemu_log(" %d", i);
2335 if (have_prefs) {
2336 for (i = 0; i < nb_oargs; ++i) {
2337 TCGRegSet set = op->output_pref[i];
2339 if (i == 0) {
2340 qemu_log(" pref=");
2341 } else {
2342 qemu_log(",");
2344 if (set == 0) {
2345 qemu_log("none");
2346 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2347 qemu_log("all");
2348 #ifdef CONFIG_DEBUG_TCG
2349 } else if (tcg_regset_single(set)) {
2350 TCGReg reg = tcg_regset_first(set);
2351 qemu_log("%s", tcg_target_reg_names[reg]);
2352 #endif
2353 } else if (TCG_TARGET_NB_REGS <= 32) {
2354 qemu_log("%#x", (uint32_t)set);
2355 } else {
2356 qemu_log("%#" PRIx64, (uint64_t)set);
2361 qemu_log("\n");
2365 /* we give more priority to constraints with less registers */
2366 static int get_constraint_priority(const TCGOpDef *def, int k)
2368 const TCGArgConstraint *arg_ct = &def->args_ct[k];
2369 int n;
2371 if (arg_ct->oalias) {
2372 /* an alias is equivalent to a single register */
2373 n = 1;
2374 } else {
2375 n = ctpop64(arg_ct->regs);
2377 return TCG_TARGET_NB_REGS - n + 1;
2380 /* sort from highest priority to lowest */
2381 static void sort_constraints(TCGOpDef *def, int start, int n)
2383 int i, j;
2384 TCGArgConstraint *a = def->args_ct;
2386 for (i = 0; i < n; i++) {
2387 a[start + i].sort_index = start + i;
2389 if (n <= 1) {
2390 return;
2392 for (i = 0; i < n - 1; i++) {
2393 for (j = i + 1; j < n; j++) {
2394 int p1 = get_constraint_priority(def, a[start + i].sort_index);
2395 int p2 = get_constraint_priority(def, a[start + j].sort_index);
2396 if (p1 < p2) {
2397 int tmp = a[start + i].sort_index;
2398 a[start + i].sort_index = a[start + j].sort_index;
2399 a[start + j].sort_index = tmp;
2405 static void process_op_defs(TCGContext *s)
2407 TCGOpcode op;
2409 for (op = 0; op < NB_OPS; op++) {
2410 TCGOpDef *def = &tcg_op_defs[op];
2411 const TCGTargetOpDef *tdefs;
2412 TCGType type;
2413 int i, nb_args;
2415 if (def->flags & TCG_OPF_NOT_PRESENT) {
2416 continue;
2419 nb_args = def->nb_iargs + def->nb_oargs;
2420 if (nb_args == 0) {
2421 continue;
2424 tdefs = tcg_target_op_def(op);
2425 /* Missing TCGTargetOpDef entry. */
2426 tcg_debug_assert(tdefs != NULL);
2428 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2429 for (i = 0; i < nb_args; i++) {
2430 const char *ct_str = tdefs->args_ct_str[i];
2431 /* Incomplete TCGTargetOpDef entry. */
2432 tcg_debug_assert(ct_str != NULL);
2434 while (*ct_str != '\0') {
2435 switch(*ct_str) {
2436 case '0' ... '9':
2438 int oarg = *ct_str - '0';
2439 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2440 tcg_debug_assert(oarg < def->nb_oargs);
2441 tcg_debug_assert(def->args_ct[oarg].regs != 0);
2442 def->args_ct[i] = def->args_ct[oarg];
2443 /* The output sets oalias. */
2444 def->args_ct[oarg].oalias = true;
2445 def->args_ct[oarg].alias_index = i;
2446 /* The input sets ialias. */
2447 def->args_ct[i].ialias = true;
2448 def->args_ct[i].alias_index = oarg;
2450 ct_str++;
2451 break;
2452 case '&':
2453 def->args_ct[i].newreg = true;
2454 ct_str++;
2455 break;
2456 case 'i':
2457 def->args_ct[i].ct |= TCG_CT_CONST;
2458 ct_str++;
2459 break;
2460 default:
2461 ct_str = target_parse_constraint(&def->args_ct[i],
2462 ct_str, type);
2463 /* Typo in TCGTargetOpDef constraint. */
2464 tcg_debug_assert(ct_str != NULL);
2469 /* TCGTargetOpDef entry with too much information? */
2470 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2472 /* sort the constraints (XXX: this is just an heuristic) */
2473 sort_constraints(def, 0, def->nb_oargs);
2474 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2478 void tcg_op_remove(TCGContext *s, TCGOp *op)
2480 TCGLabel *label;
2482 switch (op->opc) {
2483 case INDEX_op_br:
2484 label = arg_label(op->args[0]);
2485 label->refs--;
2486 break;
2487 case INDEX_op_brcond_i32:
2488 case INDEX_op_brcond_i64:
2489 label = arg_label(op->args[3]);
2490 label->refs--;
2491 break;
2492 case INDEX_op_brcond2_i32:
2493 label = arg_label(op->args[5]);
2494 label->refs--;
2495 break;
2496 default:
2497 break;
2500 QTAILQ_REMOVE(&s->ops, op, link);
2501 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2502 s->nb_ops--;
2504 #ifdef CONFIG_PROFILER
2505 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2506 #endif
2509 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2511 TCGContext *s = tcg_ctx;
2512 TCGOp *op;
2514 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2515 op = tcg_malloc(sizeof(TCGOp));
2516 } else {
2517 op = QTAILQ_FIRST(&s->free_ops);
2518 QTAILQ_REMOVE(&s->free_ops, op, link);
2520 memset(op, 0, offsetof(TCGOp, link));
2521 op->opc = opc;
2522 s->nb_ops++;
2524 return op;
2527 TCGOp *tcg_emit_op(TCGOpcode opc)
2529 TCGOp *op = tcg_op_alloc(opc);
2530 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2531 return op;
2534 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2536 TCGOp *new_op = tcg_op_alloc(opc);
2537 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2538 return new_op;
2541 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2543 TCGOp *new_op = tcg_op_alloc(opc);
2544 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2545 return new_op;
2548 /* Reachable analysis : remove unreachable code. */
2549 static void reachable_code_pass(TCGContext *s)
2551 TCGOp *op, *op_next;
2552 bool dead = false;
2554 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2555 bool remove = dead;
2556 TCGLabel *label;
2557 int call_flags;
2559 switch (op->opc) {
2560 case INDEX_op_set_label:
2561 label = arg_label(op->args[0]);
2562 if (label->refs == 0) {
2564 * While there is an occasional backward branch, virtually
2565 * all branches generated by the translators are forward.
2566 * Which means that generally we will have already removed
2567 * all references to the label that will be, and there is
2568 * little to be gained by iterating.
2570 remove = true;
2571 } else {
2572 /* Once we see a label, insns become live again. */
2573 dead = false;
2574 remove = false;
2577 * Optimization can fold conditional branches to unconditional.
2578 * If we find a label with one reference which is preceded by
2579 * an unconditional branch to it, remove both. This needed to
2580 * wait until the dead code in between them was removed.
2582 if (label->refs == 1) {
2583 TCGOp *op_prev = QTAILQ_PREV(op, link);
2584 if (op_prev->opc == INDEX_op_br &&
2585 label == arg_label(op_prev->args[0])) {
2586 tcg_op_remove(s, op_prev);
2587 remove = true;
2591 break;
2593 case INDEX_op_br:
2594 case INDEX_op_exit_tb:
2595 case INDEX_op_goto_ptr:
2596 /* Unconditional branches; everything following is dead. */
2597 dead = true;
2598 break;
2600 case INDEX_op_call:
2601 /* Notice noreturn helper calls, raising exceptions. */
2602 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2603 if (call_flags & TCG_CALL_NO_RETURN) {
2604 dead = true;
2606 break;
2608 case INDEX_op_insn_start:
2609 /* Never remove -- we need to keep these for unwind. */
2610 remove = false;
2611 break;
2613 default:
2614 break;
2617 if (remove) {
2618 tcg_op_remove(s, op);
2623 #define TS_DEAD 1
2624 #define TS_MEM 2
2626 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2627 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2629 /* For liveness_pass_1, the register preferences for a given temp. */
2630 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2632 return ts->state_ptr;
2635 /* For liveness_pass_1, reset the preferences for a given temp to the
2636 * maximal regset for its type.
2638 static inline void la_reset_pref(TCGTemp *ts)
2640 *la_temp_pref(ts)
2641 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2644 /* liveness analysis: end of function: all temps are dead, and globals
2645 should be in memory. */
2646 static void la_func_end(TCGContext *s, int ng, int nt)
2648 int i;
2650 for (i = 0; i < ng; ++i) {
2651 s->temps[i].state = TS_DEAD | TS_MEM;
2652 la_reset_pref(&s->temps[i]);
2654 for (i = ng; i < nt; ++i) {
2655 s->temps[i].state = TS_DEAD;
2656 la_reset_pref(&s->temps[i]);
2660 /* liveness analysis: end of basic block: all temps are dead, globals
2661 and local temps should be in memory. */
2662 static void la_bb_end(TCGContext *s, int ng, int nt)
2664 int i;
2666 for (i = 0; i < nt; ++i) {
2667 TCGTemp *ts = &s->temps[i];
2668 int state;
2670 switch (ts->kind) {
2671 case TEMP_FIXED:
2672 case TEMP_GLOBAL:
2673 case TEMP_LOCAL:
2674 state = TS_DEAD | TS_MEM;
2675 break;
2676 case TEMP_NORMAL:
2677 case TEMP_CONST:
2678 state = TS_DEAD;
2679 break;
2680 default:
2681 g_assert_not_reached();
2683 ts->state = state;
2684 la_reset_pref(ts);
2688 /* liveness analysis: sync globals back to memory. */
2689 static void la_global_sync(TCGContext *s, int ng)
2691 int i;
2693 for (i = 0; i < ng; ++i) {
2694 int state = s->temps[i].state;
2695 s->temps[i].state = state | TS_MEM;
2696 if (state == TS_DEAD) {
2697 /* If the global was previously dead, reset prefs. */
2698 la_reset_pref(&s->temps[i]);
2704 * liveness analysis: conditional branch: all temps are dead,
2705 * globals and local temps should be synced.
2707 static void la_bb_sync(TCGContext *s, int ng, int nt)
2709 la_global_sync(s, ng);
2711 for (int i = ng; i < nt; ++i) {
2712 TCGTemp *ts = &s->temps[i];
2713 int state;
2715 switch (ts->kind) {
2716 case TEMP_LOCAL:
2717 state = ts->state;
2718 ts->state = state | TS_MEM;
2719 if (state != TS_DEAD) {
2720 continue;
2722 break;
2723 case TEMP_NORMAL:
2724 s->temps[i].state = TS_DEAD;
2725 break;
2726 case TEMP_CONST:
2727 continue;
2728 default:
2729 g_assert_not_reached();
2731 la_reset_pref(&s->temps[i]);
2735 /* liveness analysis: sync globals back to memory and kill. */
2736 static void la_global_kill(TCGContext *s, int ng)
2738 int i;
2740 for (i = 0; i < ng; i++) {
2741 s->temps[i].state = TS_DEAD | TS_MEM;
2742 la_reset_pref(&s->temps[i]);
2746 /* liveness analysis: note live globals crossing calls. */
2747 static void la_cross_call(TCGContext *s, int nt)
2749 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2750 int i;
2752 for (i = 0; i < nt; i++) {
2753 TCGTemp *ts = &s->temps[i];
2754 if (!(ts->state & TS_DEAD)) {
2755 TCGRegSet *pset = la_temp_pref(ts);
2756 TCGRegSet set = *pset;
2758 set &= mask;
2759 /* If the combination is not possible, restart. */
2760 if (set == 0) {
2761 set = tcg_target_available_regs[ts->type] & mask;
2763 *pset = set;
2768 /* Liveness analysis : update the opc_arg_life array to tell if a
2769 given input arguments is dead. Instructions updating dead
2770 temporaries are removed. */
2771 static void liveness_pass_1(TCGContext *s)
2773 int nb_globals = s->nb_globals;
2774 int nb_temps = s->nb_temps;
2775 TCGOp *op, *op_prev;
2776 TCGRegSet *prefs;
2777 int i;
2779 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2780 for (i = 0; i < nb_temps; ++i) {
2781 s->temps[i].state_ptr = prefs + i;
2784 /* ??? Should be redundant with the exit_tb that ends the TB. */
2785 la_func_end(s, nb_globals, nb_temps);
2787 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2788 int nb_iargs, nb_oargs;
2789 TCGOpcode opc_new, opc_new2;
2790 bool have_opc_new2;
2791 TCGLifeData arg_life = 0;
2792 TCGTemp *ts;
2793 TCGOpcode opc = op->opc;
2794 const TCGOpDef *def = &tcg_op_defs[opc];
2796 switch (opc) {
2797 case INDEX_op_call:
2799 int call_flags;
2800 int nb_call_regs;
2802 nb_oargs = TCGOP_CALLO(op);
2803 nb_iargs = TCGOP_CALLI(op);
2804 call_flags = op->args[nb_oargs + nb_iargs + 1];
2806 /* pure functions can be removed if their result is unused */
2807 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2808 for (i = 0; i < nb_oargs; i++) {
2809 ts = arg_temp(op->args[i]);
2810 if (ts->state != TS_DEAD) {
2811 goto do_not_remove_call;
2814 goto do_remove;
2816 do_not_remove_call:
2818 /* Output args are dead. */
2819 for (i = 0; i < nb_oargs; i++) {
2820 ts = arg_temp(op->args[i]);
2821 if (ts->state & TS_DEAD) {
2822 arg_life |= DEAD_ARG << i;
2824 if (ts->state & TS_MEM) {
2825 arg_life |= SYNC_ARG << i;
2827 ts->state = TS_DEAD;
2828 la_reset_pref(ts);
2830 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2831 op->output_pref[i] = 0;
2834 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2835 TCG_CALL_NO_READ_GLOBALS))) {
2836 la_global_kill(s, nb_globals);
2837 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2838 la_global_sync(s, nb_globals);
2841 /* Record arguments that die in this helper. */
2842 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2843 ts = arg_temp(op->args[i]);
2844 if (ts && ts->state & TS_DEAD) {
2845 arg_life |= DEAD_ARG << i;
2849 /* For all live registers, remove call-clobbered prefs. */
2850 la_cross_call(s, nb_temps);
2852 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2854 /* Input arguments are live for preceding opcodes. */
2855 for (i = 0; i < nb_iargs; i++) {
2856 ts = arg_temp(op->args[i + nb_oargs]);
2857 if (ts && ts->state & TS_DEAD) {
2858 /* For those arguments that die, and will be allocated
2859 * in registers, clear the register set for that arg,
2860 * to be filled in below. For args that will be on
2861 * the stack, reset to any available reg.
2863 *la_temp_pref(ts)
2864 = (i < nb_call_regs ? 0 :
2865 tcg_target_available_regs[ts->type]);
2866 ts->state &= ~TS_DEAD;
2870 /* For each input argument, add its input register to prefs.
2871 If a temp is used once, this produces a single set bit. */
2872 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2873 ts = arg_temp(op->args[i + nb_oargs]);
2874 if (ts) {
2875 tcg_regset_set_reg(*la_temp_pref(ts),
2876 tcg_target_call_iarg_regs[i]);
2880 break;
2881 case INDEX_op_insn_start:
2882 break;
2883 case INDEX_op_discard:
2884 /* mark the temporary as dead */
2885 ts = arg_temp(op->args[0]);
2886 ts->state = TS_DEAD;
2887 la_reset_pref(ts);
2888 break;
2890 case INDEX_op_add2_i32:
2891 opc_new = INDEX_op_add_i32;
2892 goto do_addsub2;
2893 case INDEX_op_sub2_i32:
2894 opc_new = INDEX_op_sub_i32;
2895 goto do_addsub2;
2896 case INDEX_op_add2_i64:
2897 opc_new = INDEX_op_add_i64;
2898 goto do_addsub2;
2899 case INDEX_op_sub2_i64:
2900 opc_new = INDEX_op_sub_i64;
2901 do_addsub2:
2902 nb_iargs = 4;
2903 nb_oargs = 2;
2904 /* Test if the high part of the operation is dead, but not
2905 the low part. The result can be optimized to a simple
2906 add or sub. This happens often for x86_64 guest when the
2907 cpu mode is set to 32 bit. */
2908 if (arg_temp(op->args[1])->state == TS_DEAD) {
2909 if (arg_temp(op->args[0])->state == TS_DEAD) {
2910 goto do_remove;
2912 /* Replace the opcode and adjust the args in place,
2913 leaving 3 unused args at the end. */
2914 op->opc = opc = opc_new;
2915 op->args[1] = op->args[2];
2916 op->args[2] = op->args[4];
2917 /* Fall through and mark the single-word operation live. */
2918 nb_iargs = 2;
2919 nb_oargs = 1;
2921 goto do_not_remove;
2923 case INDEX_op_mulu2_i32:
2924 opc_new = INDEX_op_mul_i32;
2925 opc_new2 = INDEX_op_muluh_i32;
2926 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2927 goto do_mul2;
2928 case INDEX_op_muls2_i32:
2929 opc_new = INDEX_op_mul_i32;
2930 opc_new2 = INDEX_op_mulsh_i32;
2931 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2932 goto do_mul2;
2933 case INDEX_op_mulu2_i64:
2934 opc_new = INDEX_op_mul_i64;
2935 opc_new2 = INDEX_op_muluh_i64;
2936 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2937 goto do_mul2;
2938 case INDEX_op_muls2_i64:
2939 opc_new = INDEX_op_mul_i64;
2940 opc_new2 = INDEX_op_mulsh_i64;
2941 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2942 goto do_mul2;
2943 do_mul2:
2944 nb_iargs = 2;
2945 nb_oargs = 2;
2946 if (arg_temp(op->args[1])->state == TS_DEAD) {
2947 if (arg_temp(op->args[0])->state == TS_DEAD) {
2948 /* Both parts of the operation are dead. */
2949 goto do_remove;
2951 /* The high part of the operation is dead; generate the low. */
2952 op->opc = opc = opc_new;
2953 op->args[1] = op->args[2];
2954 op->args[2] = op->args[3];
2955 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2956 /* The low part of the operation is dead; generate the high. */
2957 op->opc = opc = opc_new2;
2958 op->args[0] = op->args[1];
2959 op->args[1] = op->args[2];
2960 op->args[2] = op->args[3];
2961 } else {
2962 goto do_not_remove;
2964 /* Mark the single-word operation live. */
2965 nb_oargs = 1;
2966 goto do_not_remove;
2968 default:
2969 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2970 nb_iargs = def->nb_iargs;
2971 nb_oargs = def->nb_oargs;
2973 /* Test if the operation can be removed because all
2974 its outputs are dead. We assume that nb_oargs == 0
2975 implies side effects */
2976 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2977 for (i = 0; i < nb_oargs; i++) {
2978 if (arg_temp(op->args[i])->state != TS_DEAD) {
2979 goto do_not_remove;
2982 goto do_remove;
2984 goto do_not_remove;
2986 do_remove:
2987 tcg_op_remove(s, op);
2988 break;
2990 do_not_remove:
2991 for (i = 0; i < nb_oargs; i++) {
2992 ts = arg_temp(op->args[i]);
2994 /* Remember the preference of the uses that followed. */
2995 op->output_pref[i] = *la_temp_pref(ts);
2997 /* Output args are dead. */
2998 if (ts->state & TS_DEAD) {
2999 arg_life |= DEAD_ARG << i;
3001 if (ts->state & TS_MEM) {
3002 arg_life |= SYNC_ARG << i;
3004 ts->state = TS_DEAD;
3005 la_reset_pref(ts);
3008 /* If end of basic block, update. */
3009 if (def->flags & TCG_OPF_BB_EXIT) {
3010 la_func_end(s, nb_globals, nb_temps);
3011 } else if (def->flags & TCG_OPF_COND_BRANCH) {
3012 la_bb_sync(s, nb_globals, nb_temps);
3013 } else if (def->flags & TCG_OPF_BB_END) {
3014 la_bb_end(s, nb_globals, nb_temps);
3015 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3016 la_global_sync(s, nb_globals);
3017 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3018 la_cross_call(s, nb_temps);
3022 /* Record arguments that die in this opcode. */
3023 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3024 ts = arg_temp(op->args[i]);
3025 if (ts->state & TS_DEAD) {
3026 arg_life |= DEAD_ARG << i;
3030 /* Input arguments are live for preceding opcodes. */
3031 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3032 ts = arg_temp(op->args[i]);
3033 if (ts->state & TS_DEAD) {
3034 /* For operands that were dead, initially allow
3035 all regs for the type. */
3036 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3037 ts->state &= ~TS_DEAD;
3041 /* Incorporate constraints for this operand. */
3042 switch (opc) {
3043 case INDEX_op_mov_i32:
3044 case INDEX_op_mov_i64:
3045 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3046 have proper constraints. That said, special case
3047 moves to propagate preferences backward. */
3048 if (IS_DEAD_ARG(1)) {
3049 *la_temp_pref(arg_temp(op->args[0]))
3050 = *la_temp_pref(arg_temp(op->args[1]));
3052 break;
3054 default:
3055 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3056 const TCGArgConstraint *ct = &def->args_ct[i];
3057 TCGRegSet set, *pset;
3059 ts = arg_temp(op->args[i]);
3060 pset = la_temp_pref(ts);
3061 set = *pset;
3063 set &= ct->regs;
3064 if (ct->ialias) {
3065 set &= op->output_pref[ct->alias_index];
3067 /* If the combination is not possible, restart. */
3068 if (set == 0) {
3069 set = ct->regs;
3071 *pset = set;
3073 break;
3075 break;
3077 op->life = arg_life;
3081 /* Liveness analysis: Convert indirect regs to direct temporaries. */
3082 static bool liveness_pass_2(TCGContext *s)
3084 int nb_globals = s->nb_globals;
3085 int nb_temps, i;
3086 bool changes = false;
3087 TCGOp *op, *op_next;
3089 /* Create a temporary for each indirect global. */
3090 for (i = 0; i < nb_globals; ++i) {
3091 TCGTemp *its = &s->temps[i];
3092 if (its->indirect_reg) {
3093 TCGTemp *dts = tcg_temp_alloc(s);
3094 dts->type = its->type;
3095 dts->base_type = its->base_type;
3096 its->state_ptr = dts;
3097 } else {
3098 its->state_ptr = NULL;
3100 /* All globals begin dead. */
3101 its->state = TS_DEAD;
3103 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3104 TCGTemp *its = &s->temps[i];
3105 its->state_ptr = NULL;
3106 its->state = TS_DEAD;
3109 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3110 TCGOpcode opc = op->opc;
3111 const TCGOpDef *def = &tcg_op_defs[opc];
3112 TCGLifeData arg_life = op->life;
3113 int nb_iargs, nb_oargs, call_flags;
3114 TCGTemp *arg_ts, *dir_ts;
3116 if (opc == INDEX_op_call) {
3117 nb_oargs = TCGOP_CALLO(op);
3118 nb_iargs = TCGOP_CALLI(op);
3119 call_flags = op->args[nb_oargs + nb_iargs + 1];
3120 } else {
3121 nb_iargs = def->nb_iargs;
3122 nb_oargs = def->nb_oargs;
3124 /* Set flags similar to how calls require. */
3125 if (def->flags & TCG_OPF_COND_BRANCH) {
3126 /* Like reading globals: sync_globals */
3127 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3128 } else if (def->flags & TCG_OPF_BB_END) {
3129 /* Like writing globals: save_globals */
3130 call_flags = 0;
3131 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3132 /* Like reading globals: sync_globals */
3133 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3134 } else {
3135 /* No effect on globals. */
3136 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3137 TCG_CALL_NO_WRITE_GLOBALS);
3141 /* Make sure that input arguments are available. */
3142 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3143 arg_ts = arg_temp(op->args[i]);
3144 if (arg_ts) {
3145 dir_ts = arg_ts->state_ptr;
3146 if (dir_ts && arg_ts->state == TS_DEAD) {
3147 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3148 ? INDEX_op_ld_i32
3149 : INDEX_op_ld_i64);
3150 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3152 lop->args[0] = temp_arg(dir_ts);
3153 lop->args[1] = temp_arg(arg_ts->mem_base);
3154 lop->args[2] = arg_ts->mem_offset;
3156 /* Loaded, but synced with memory. */
3157 arg_ts->state = TS_MEM;
3162 /* Perform input replacement, and mark inputs that became dead.
3163 No action is required except keeping temp_state up to date
3164 so that we reload when needed. */
3165 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3166 arg_ts = arg_temp(op->args[i]);
3167 if (arg_ts) {
3168 dir_ts = arg_ts->state_ptr;
3169 if (dir_ts) {
3170 op->args[i] = temp_arg(dir_ts);
3171 changes = true;
3172 if (IS_DEAD_ARG(i)) {
3173 arg_ts->state = TS_DEAD;
3179 /* Liveness analysis should ensure that the following are
3180 all correct, for call sites and basic block end points. */
3181 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3182 /* Nothing to do */
3183 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3184 for (i = 0; i < nb_globals; ++i) {
3185 /* Liveness should see that globals are synced back,
3186 that is, either TS_DEAD or TS_MEM. */
3187 arg_ts = &s->temps[i];
3188 tcg_debug_assert(arg_ts->state_ptr == 0
3189 || arg_ts->state != 0);
3191 } else {
3192 for (i = 0; i < nb_globals; ++i) {
3193 /* Liveness should see that globals are saved back,
3194 that is, TS_DEAD, waiting to be reloaded. */
3195 arg_ts = &s->temps[i];
3196 tcg_debug_assert(arg_ts->state_ptr == 0
3197 || arg_ts->state == TS_DEAD);
3201 /* Outputs become available. */
3202 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3203 arg_ts = arg_temp(op->args[0]);
3204 dir_ts = arg_ts->state_ptr;
3205 if (dir_ts) {
3206 op->args[0] = temp_arg(dir_ts);
3207 changes = true;
3209 /* The output is now live and modified. */
3210 arg_ts->state = 0;
3212 if (NEED_SYNC_ARG(0)) {
3213 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3214 ? INDEX_op_st_i32
3215 : INDEX_op_st_i64);
3216 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3217 TCGTemp *out_ts = dir_ts;
3219 if (IS_DEAD_ARG(0)) {
3220 out_ts = arg_temp(op->args[1]);
3221 arg_ts->state = TS_DEAD;
3222 tcg_op_remove(s, op);
3223 } else {
3224 arg_ts->state = TS_MEM;
3227 sop->args[0] = temp_arg(out_ts);
3228 sop->args[1] = temp_arg(arg_ts->mem_base);
3229 sop->args[2] = arg_ts->mem_offset;
3230 } else {
3231 tcg_debug_assert(!IS_DEAD_ARG(0));
3234 } else {
3235 for (i = 0; i < nb_oargs; i++) {
3236 arg_ts = arg_temp(op->args[i]);
3237 dir_ts = arg_ts->state_ptr;
3238 if (!dir_ts) {
3239 continue;
3241 op->args[i] = temp_arg(dir_ts);
3242 changes = true;
3244 /* The output is now live and modified. */
3245 arg_ts->state = 0;
3247 /* Sync outputs upon their last write. */
3248 if (NEED_SYNC_ARG(i)) {
3249 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3250 ? INDEX_op_st_i32
3251 : INDEX_op_st_i64);
3252 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3254 sop->args[0] = temp_arg(dir_ts);
3255 sop->args[1] = temp_arg(arg_ts->mem_base);
3256 sop->args[2] = arg_ts->mem_offset;
3258 arg_ts->state = TS_MEM;
3260 /* Drop outputs that are dead. */
3261 if (IS_DEAD_ARG(i)) {
3262 arg_ts->state = TS_DEAD;
3268 return changes;
3271 #ifdef CONFIG_DEBUG_TCG
3272 static void dump_regs(TCGContext *s)
3274 TCGTemp *ts;
3275 int i;
3276 char buf[64];
3278 for(i = 0; i < s->nb_temps; i++) {
3279 ts = &s->temps[i];
3280 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3281 switch(ts->val_type) {
3282 case TEMP_VAL_REG:
3283 printf("%s", tcg_target_reg_names[ts->reg]);
3284 break;
3285 case TEMP_VAL_MEM:
3286 printf("%d(%s)", (int)ts->mem_offset,
3287 tcg_target_reg_names[ts->mem_base->reg]);
3288 break;
3289 case TEMP_VAL_CONST:
3290 printf("$0x%" PRIx64, ts->val);
3291 break;
3292 case TEMP_VAL_DEAD:
3293 printf("D");
3294 break;
3295 default:
3296 printf("???");
3297 break;
3299 printf("\n");
3302 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3303 if (s->reg_to_temp[i] != NULL) {
3304 printf("%s: %s\n",
3305 tcg_target_reg_names[i],
3306 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3311 static void check_regs(TCGContext *s)
3313 int reg;
3314 int k;
3315 TCGTemp *ts;
3316 char buf[64];
3318 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3319 ts = s->reg_to_temp[reg];
3320 if (ts != NULL) {
3321 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3322 printf("Inconsistency for register %s:\n",
3323 tcg_target_reg_names[reg]);
3324 goto fail;
3328 for (k = 0; k < s->nb_temps; k++) {
3329 ts = &s->temps[k];
3330 if (ts->val_type == TEMP_VAL_REG
3331 && ts->kind != TEMP_FIXED
3332 && s->reg_to_temp[ts->reg] != ts) {
3333 printf("Inconsistency for temp %s:\n",
3334 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3335 fail:
3336 printf("reg state:\n");
3337 dump_regs(s);
3338 tcg_abort();
3342 #endif
3344 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3346 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3347 /* Sparc64 stack is accessed with offset of 2047 */
3348 s->current_frame_offset = (s->current_frame_offset +
3349 (tcg_target_long)sizeof(tcg_target_long) - 1) &
3350 ~(sizeof(tcg_target_long) - 1);
3351 #endif
3352 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3353 s->frame_end) {
3354 tcg_abort();
3356 ts->mem_offset = s->current_frame_offset;
3357 ts->mem_base = s->frame_temp;
3358 ts->mem_allocated = 1;
3359 s->current_frame_offset += sizeof(tcg_target_long);
3362 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3364 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3365 mark it free; otherwise mark it dead. */
3366 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3368 TCGTempVal new_type;
3370 switch (ts->kind) {
3371 case TEMP_FIXED:
3372 return;
3373 case TEMP_GLOBAL:
3374 case TEMP_LOCAL:
3375 new_type = TEMP_VAL_MEM;
3376 break;
3377 case TEMP_NORMAL:
3378 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3379 break;
3380 case TEMP_CONST:
3381 new_type = TEMP_VAL_CONST;
3382 break;
3383 default:
3384 g_assert_not_reached();
3386 if (ts->val_type == TEMP_VAL_REG) {
3387 s->reg_to_temp[ts->reg] = NULL;
3389 ts->val_type = new_type;
3392 /* Mark a temporary as dead. */
3393 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3395 temp_free_or_dead(s, ts, 1);
3398 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3399 registers needs to be allocated to store a constant. If 'free_or_dead'
3400 is non-zero, subsequently release the temporary; if it is positive, the
3401 temp is dead; if it is negative, the temp is free. */
3402 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3403 TCGRegSet preferred_regs, int free_or_dead)
3405 if (!temp_readonly(ts) && !ts->mem_coherent) {
3406 if (!ts->mem_allocated) {
3407 temp_allocate_frame(s, ts);
3409 switch (ts->val_type) {
3410 case TEMP_VAL_CONST:
3411 /* If we're going to free the temp immediately, then we won't
3412 require it later in a register, so attempt to store the
3413 constant to memory directly. */
3414 if (free_or_dead
3415 && tcg_out_sti(s, ts->type, ts->val,
3416 ts->mem_base->reg, ts->mem_offset)) {
3417 break;
3419 temp_load(s, ts, tcg_target_available_regs[ts->type],
3420 allocated_regs, preferred_regs);
3421 /* fallthrough */
3423 case TEMP_VAL_REG:
3424 tcg_out_st(s, ts->type, ts->reg,
3425 ts->mem_base->reg, ts->mem_offset);
3426 break;
3428 case TEMP_VAL_MEM:
3429 break;
3431 case TEMP_VAL_DEAD:
3432 default:
3433 tcg_abort();
3435 ts->mem_coherent = 1;
3437 if (free_or_dead) {
3438 temp_free_or_dead(s, ts, free_or_dead);
3442 /* free register 'reg' by spilling the corresponding temporary if necessary */
3443 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3445 TCGTemp *ts = s->reg_to_temp[reg];
3446 if (ts != NULL) {
3447 temp_sync(s, ts, allocated_regs, 0, -1);
3452 * tcg_reg_alloc:
3453 * @required_regs: Set of registers in which we must allocate.
3454 * @allocated_regs: Set of registers which must be avoided.
3455 * @preferred_regs: Set of registers we should prefer.
3456 * @rev: True if we search the registers in "indirect" order.
3458 * The allocated register must be in @required_regs & ~@allocated_regs,
3459 * but if we can put it in @preferred_regs we may save a move later.
3461 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3462 TCGRegSet allocated_regs,
3463 TCGRegSet preferred_regs, bool rev)
3465 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3466 TCGRegSet reg_ct[2];
3467 const int *order;
3469 reg_ct[1] = required_regs & ~allocated_regs;
3470 tcg_debug_assert(reg_ct[1] != 0);
3471 reg_ct[0] = reg_ct[1] & preferred_regs;
3473 /* Skip the preferred_regs option if it cannot be satisfied,
3474 or if the preference made no difference. */
3475 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3477 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3479 /* Try free registers, preferences first. */
3480 for (j = f; j < 2; j++) {
3481 TCGRegSet set = reg_ct[j];
3483 if (tcg_regset_single(set)) {
3484 /* One register in the set. */
3485 TCGReg reg = tcg_regset_first(set);
3486 if (s->reg_to_temp[reg] == NULL) {
3487 return reg;
3489 } else {
3490 for (i = 0; i < n; i++) {
3491 TCGReg reg = order[i];
3492 if (s->reg_to_temp[reg] == NULL &&
3493 tcg_regset_test_reg(set, reg)) {
3494 return reg;
3500 /* We must spill something. */
3501 for (j = f; j < 2; j++) {
3502 TCGRegSet set = reg_ct[j];
3504 if (tcg_regset_single(set)) {
3505 /* One register in the set. */
3506 TCGReg reg = tcg_regset_first(set);
3507 tcg_reg_free(s, reg, allocated_regs);
3508 return reg;
3509 } else {
3510 for (i = 0; i < n; i++) {
3511 TCGReg reg = order[i];
3512 if (tcg_regset_test_reg(set, reg)) {
3513 tcg_reg_free(s, reg, allocated_regs);
3514 return reg;
3520 tcg_abort();
3523 /* Make sure the temporary is in a register. If needed, allocate the register
3524 from DESIRED while avoiding ALLOCATED. */
3525 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3526 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3528 TCGReg reg;
3530 switch (ts->val_type) {
3531 case TEMP_VAL_REG:
3532 return;
3533 case TEMP_VAL_CONST:
3534 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3535 preferred_regs, ts->indirect_base);
3536 if (ts->type <= TCG_TYPE_I64) {
3537 tcg_out_movi(s, ts->type, reg, ts->val);
3538 } else {
3539 uint64_t val = ts->val;
3540 MemOp vece = MO_64;
3543 * Find the minimal vector element that matches the constant.
3544 * The targets will, in general, have to do this search anyway,
3545 * do this generically.
3547 if (val == dup_const(MO_8, val)) {
3548 vece = MO_8;
3549 } else if (val == dup_const(MO_16, val)) {
3550 vece = MO_16;
3551 } else if (val == dup_const(MO_32, val)) {
3552 vece = MO_32;
3555 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3557 ts->mem_coherent = 0;
3558 break;
3559 case TEMP_VAL_MEM:
3560 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3561 preferred_regs, ts->indirect_base);
3562 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3563 ts->mem_coherent = 1;
3564 break;
3565 case TEMP_VAL_DEAD:
3566 default:
3567 tcg_abort();
3569 ts->reg = reg;
3570 ts->val_type = TEMP_VAL_REG;
3571 s->reg_to_temp[reg] = ts;
3574 /* Save a temporary to memory. 'allocated_regs' is used in case a
3575 temporary registers needs to be allocated to store a constant. */
3576 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3578 /* The liveness analysis already ensures that globals are back
3579 in memory. Keep an tcg_debug_assert for safety. */
3580 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3583 /* save globals to their canonical location and assume they can be
3584 modified be the following code. 'allocated_regs' is used in case a
3585 temporary registers needs to be allocated to store a constant. */
3586 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3588 int i, n;
3590 for (i = 0, n = s->nb_globals; i < n; i++) {
3591 temp_save(s, &s->temps[i], allocated_regs);
3595 /* sync globals to their canonical location and assume they can be
3596 read by the following code. 'allocated_regs' is used in case a
3597 temporary registers needs to be allocated to store a constant. */
3598 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3600 int i, n;
3602 for (i = 0, n = s->nb_globals; i < n; i++) {
3603 TCGTemp *ts = &s->temps[i];
3604 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3605 || ts->kind == TEMP_FIXED
3606 || ts->mem_coherent);
3610 /* at the end of a basic block, we assume all temporaries are dead and
3611 all globals are stored at their canonical location. */
3612 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3614 int i;
3616 for (i = s->nb_globals; i < s->nb_temps; i++) {
3617 TCGTemp *ts = &s->temps[i];
3619 switch (ts->kind) {
3620 case TEMP_LOCAL:
3621 temp_save(s, ts, allocated_regs);
3622 break;
3623 case TEMP_NORMAL:
3624 /* The liveness analysis already ensures that temps are dead.
3625 Keep an tcg_debug_assert for safety. */
3626 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3627 break;
3628 case TEMP_CONST:
3629 /* Similarly, we should have freed any allocated register. */
3630 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3631 break;
3632 default:
3633 g_assert_not_reached();
3637 save_globals(s, allocated_regs);
3641 * At a conditional branch, we assume all temporaries are dead and
3642 * all globals and local temps are synced to their location.
3644 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3646 sync_globals(s, allocated_regs);
3648 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3649 TCGTemp *ts = &s->temps[i];
3651 * The liveness analysis already ensures that temps are dead.
3652 * Keep tcg_debug_asserts for safety.
3654 switch (ts->kind) {
3655 case TEMP_LOCAL:
3656 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3657 break;
3658 case TEMP_NORMAL:
3659 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3660 break;
3661 case TEMP_CONST:
3662 break;
3663 default:
3664 g_assert_not_reached();
3670 * Specialized code generation for INDEX_op_mov_* with a constant.
3672 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3673 tcg_target_ulong val, TCGLifeData arg_life,
3674 TCGRegSet preferred_regs)
3676 /* ENV should not be modified. */
3677 tcg_debug_assert(!temp_readonly(ots));
3679 /* The movi is not explicitly generated here. */
3680 if (ots->val_type == TEMP_VAL_REG) {
3681 s->reg_to_temp[ots->reg] = NULL;
3683 ots->val_type = TEMP_VAL_CONST;
3684 ots->val = val;
3685 ots->mem_coherent = 0;
3686 if (NEED_SYNC_ARG(0)) {
3687 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3688 } else if (IS_DEAD_ARG(0)) {
3689 temp_dead(s, ots);
3694 * Specialized code generation for INDEX_op_mov_*.
3696 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3698 const TCGLifeData arg_life = op->life;
3699 TCGRegSet allocated_regs, preferred_regs;
3700 TCGTemp *ts, *ots;
3701 TCGType otype, itype;
3703 allocated_regs = s->reserved_regs;
3704 preferred_regs = op->output_pref[0];
3705 ots = arg_temp(op->args[0]);
3706 ts = arg_temp(op->args[1]);
3708 /* ENV should not be modified. */
3709 tcg_debug_assert(!temp_readonly(ots));
3711 /* Note that otype != itype for no-op truncation. */
3712 otype = ots->type;
3713 itype = ts->type;
3715 if (ts->val_type == TEMP_VAL_CONST) {
3716 /* propagate constant or generate sti */
3717 tcg_target_ulong val = ts->val;
3718 if (IS_DEAD_ARG(1)) {
3719 temp_dead(s, ts);
3721 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3722 return;
3725 /* If the source value is in memory we're going to be forced
3726 to have it in a register in order to perform the copy. Copy
3727 the SOURCE value into its own register first, that way we
3728 don't have to reload SOURCE the next time it is used. */
3729 if (ts->val_type == TEMP_VAL_MEM) {
3730 temp_load(s, ts, tcg_target_available_regs[itype],
3731 allocated_regs, preferred_regs);
3734 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3735 if (IS_DEAD_ARG(0)) {
3736 /* mov to a non-saved dead register makes no sense (even with
3737 liveness analysis disabled). */
3738 tcg_debug_assert(NEED_SYNC_ARG(0));
3739 if (!ots->mem_allocated) {
3740 temp_allocate_frame(s, ots);
3742 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3743 if (IS_DEAD_ARG(1)) {
3744 temp_dead(s, ts);
3746 temp_dead(s, ots);
3747 } else {
3748 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3749 /* the mov can be suppressed */
3750 if (ots->val_type == TEMP_VAL_REG) {
3751 s->reg_to_temp[ots->reg] = NULL;
3753 ots->reg = ts->reg;
3754 temp_dead(s, ts);
3755 } else {
3756 if (ots->val_type != TEMP_VAL_REG) {
3757 /* When allocating a new register, make sure to not spill the
3758 input one. */
3759 tcg_regset_set_reg(allocated_regs, ts->reg);
3760 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3761 allocated_regs, preferred_regs,
3762 ots->indirect_base);
3764 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3766 * Cross register class move not supported.
3767 * Store the source register into the destination slot
3768 * and leave the destination temp as TEMP_VAL_MEM.
3770 assert(!temp_readonly(ots));
3771 if (!ts->mem_allocated) {
3772 temp_allocate_frame(s, ots);
3774 tcg_out_st(s, ts->type, ts->reg,
3775 ots->mem_base->reg, ots->mem_offset);
3776 ots->mem_coherent = 1;
3777 temp_free_or_dead(s, ots, -1);
3778 return;
3781 ots->val_type = TEMP_VAL_REG;
3782 ots->mem_coherent = 0;
3783 s->reg_to_temp[ots->reg] = ots;
3784 if (NEED_SYNC_ARG(0)) {
3785 temp_sync(s, ots, allocated_regs, 0, 0);
3791 * Specialized code generation for INDEX_op_dup_vec.
3793 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3795 const TCGLifeData arg_life = op->life;
3796 TCGRegSet dup_out_regs, dup_in_regs;
3797 TCGTemp *its, *ots;
3798 TCGType itype, vtype;
3799 intptr_t endian_fixup;
3800 unsigned vece;
3801 bool ok;
3803 ots = arg_temp(op->args[0]);
3804 its = arg_temp(op->args[1]);
3806 /* ENV should not be modified. */
3807 tcg_debug_assert(!temp_readonly(ots));
3809 itype = its->type;
3810 vece = TCGOP_VECE(op);
3811 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3813 if (its->val_type == TEMP_VAL_CONST) {
3814 /* Propagate constant via movi -> dupi. */
3815 tcg_target_ulong val = its->val;
3816 if (IS_DEAD_ARG(1)) {
3817 temp_dead(s, its);
3819 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3820 return;
3823 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3824 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3826 /* Allocate the output register now. */
3827 if (ots->val_type != TEMP_VAL_REG) {
3828 TCGRegSet allocated_regs = s->reserved_regs;
3830 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3831 /* Make sure to not spill the input register. */
3832 tcg_regset_set_reg(allocated_regs, its->reg);
3834 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3835 op->output_pref[0], ots->indirect_base);
3836 ots->val_type = TEMP_VAL_REG;
3837 ots->mem_coherent = 0;
3838 s->reg_to_temp[ots->reg] = ots;
3841 switch (its->val_type) {
3842 case TEMP_VAL_REG:
3844 * The dup constriaints must be broad, covering all possible VECE.
3845 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3846 * to fail, indicating that extra moves are required for that case.
3848 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3849 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3850 goto done;
3852 /* Try again from memory or a vector input register. */
3854 if (!its->mem_coherent) {
3856 * The input register is not synced, and so an extra store
3857 * would be required to use memory. Attempt an integer-vector
3858 * register move first. We do not have a TCGRegSet for this.
3860 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3861 break;
3863 /* Sync the temp back to its slot and load from there. */
3864 temp_sync(s, its, s->reserved_regs, 0, 0);
3866 /* fall through */
3868 case TEMP_VAL_MEM:
3869 #ifdef HOST_WORDS_BIGENDIAN
3870 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3871 endian_fixup -= 1 << vece;
3872 #else
3873 endian_fixup = 0;
3874 #endif
3875 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3876 its->mem_offset + endian_fixup)) {
3877 goto done;
3879 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3880 break;
3882 default:
3883 g_assert_not_reached();
3886 /* We now have a vector input register, so dup must succeed. */
3887 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3888 tcg_debug_assert(ok);
3890 done:
3891 if (IS_DEAD_ARG(1)) {
3892 temp_dead(s, its);
3894 if (NEED_SYNC_ARG(0)) {
3895 temp_sync(s, ots, s->reserved_regs, 0, 0);
3897 if (IS_DEAD_ARG(0)) {
3898 temp_dead(s, ots);
3902 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3904 const TCGLifeData arg_life = op->life;
3905 const TCGOpDef * const def = &tcg_op_defs[op->opc];
3906 TCGRegSet i_allocated_regs;
3907 TCGRegSet o_allocated_regs;
3908 int i, k, nb_iargs, nb_oargs;
3909 TCGReg reg;
3910 TCGArg arg;
3911 const TCGArgConstraint *arg_ct;
3912 TCGTemp *ts;
3913 TCGArg new_args[TCG_MAX_OP_ARGS];
3914 int const_args[TCG_MAX_OP_ARGS];
3916 nb_oargs = def->nb_oargs;
3917 nb_iargs = def->nb_iargs;
3919 /* copy constants */
3920 memcpy(new_args + nb_oargs + nb_iargs,
3921 op->args + nb_oargs + nb_iargs,
3922 sizeof(TCGArg) * def->nb_cargs);
3924 i_allocated_regs = s->reserved_regs;
3925 o_allocated_regs = s->reserved_regs;
3927 /* satisfy input constraints */
3928 for (k = 0; k < nb_iargs; k++) {
3929 TCGRegSet i_preferred_regs, o_preferred_regs;
3931 i = def->args_ct[nb_oargs + k].sort_index;
3932 arg = op->args[i];
3933 arg_ct = &def->args_ct[i];
3934 ts = arg_temp(arg);
3936 if (ts->val_type == TEMP_VAL_CONST
3937 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3938 /* constant is OK for instruction */
3939 const_args[i] = 1;
3940 new_args[i] = ts->val;
3941 continue;
3944 i_preferred_regs = o_preferred_regs = 0;
3945 if (arg_ct->ialias) {
3946 o_preferred_regs = op->output_pref[arg_ct->alias_index];
3949 * If the input is readonly, then it cannot also be an
3950 * output and aliased to itself. If the input is not
3951 * dead after the instruction, we must allocate a new
3952 * register and move it.
3954 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3955 goto allocate_in_reg;
3959 * Check if the current register has already been allocated
3960 * for another input aliased to an output.
3962 if (ts->val_type == TEMP_VAL_REG) {
3963 reg = ts->reg;
3964 for (int k2 = 0; k2 < k; k2++) {
3965 int i2 = def->args_ct[nb_oargs + k2].sort_index;
3966 if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3967 goto allocate_in_reg;
3971 i_preferred_regs = o_preferred_regs;
3974 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3975 reg = ts->reg;
3977 if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3978 allocate_in_reg:
3980 * Allocate a new register matching the constraint
3981 * and move the temporary register into it.
3983 temp_load(s, ts, tcg_target_available_regs[ts->type],
3984 i_allocated_regs, 0);
3985 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3986 o_preferred_regs, ts->indirect_base);
3987 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3989 * Cross register class move not supported. Sync the
3990 * temp back to its slot and load from there.
3992 temp_sync(s, ts, i_allocated_regs, 0, 0);
3993 tcg_out_ld(s, ts->type, reg,
3994 ts->mem_base->reg, ts->mem_offset);
3997 new_args[i] = reg;
3998 const_args[i] = 0;
3999 tcg_regset_set_reg(i_allocated_regs, reg);
4002 /* mark dead temporaries and free the associated registers */
4003 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4004 if (IS_DEAD_ARG(i)) {
4005 temp_dead(s, arg_temp(op->args[i]));
4009 if (def->flags & TCG_OPF_COND_BRANCH) {
4010 tcg_reg_alloc_cbranch(s, i_allocated_regs);
4011 } else if (def->flags & TCG_OPF_BB_END) {
4012 tcg_reg_alloc_bb_end(s, i_allocated_regs);
4013 } else {
4014 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4015 /* XXX: permit generic clobber register list ? */
4016 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4017 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4018 tcg_reg_free(s, i, i_allocated_regs);
4022 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4023 /* sync globals if the op has side effects and might trigger
4024 an exception. */
4025 sync_globals(s, i_allocated_regs);
4028 /* satisfy the output constraints */
4029 for(k = 0; k < nb_oargs; k++) {
4030 i = def->args_ct[k].sort_index;
4031 arg = op->args[i];
4032 arg_ct = &def->args_ct[i];
4033 ts = arg_temp(arg);
4035 /* ENV should not be modified. */
4036 tcg_debug_assert(!temp_readonly(ts));
4038 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4039 reg = new_args[arg_ct->alias_index];
4040 } else if (arg_ct->newreg) {
4041 reg = tcg_reg_alloc(s, arg_ct->regs,
4042 i_allocated_regs | o_allocated_regs,
4043 op->output_pref[k], ts->indirect_base);
4044 } else {
4045 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4046 op->output_pref[k], ts->indirect_base);
4048 tcg_regset_set_reg(o_allocated_regs, reg);
4049 if (ts->val_type == TEMP_VAL_REG) {
4050 s->reg_to_temp[ts->reg] = NULL;
4052 ts->val_type = TEMP_VAL_REG;
4053 ts->reg = reg;
4055 * Temp value is modified, so the value kept in memory is
4056 * potentially not the same.
4058 ts->mem_coherent = 0;
4059 s->reg_to_temp[reg] = ts;
4060 new_args[i] = reg;
4064 /* emit instruction */
4065 if (def->flags & TCG_OPF_VECTOR) {
4066 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4067 new_args, const_args);
4068 } else {
4069 tcg_out_op(s, op->opc, new_args, const_args);
4072 /* move the outputs in the correct register if needed */
4073 for(i = 0; i < nb_oargs; i++) {
4074 ts = arg_temp(op->args[i]);
4076 /* ENV should not be modified. */
4077 tcg_debug_assert(!temp_readonly(ts));
4079 if (NEED_SYNC_ARG(i)) {
4080 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4081 } else if (IS_DEAD_ARG(i)) {
4082 temp_dead(s, ts);
4087 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4089 const TCGLifeData arg_life = op->life;
4090 TCGTemp *ots, *itsl, *itsh;
4091 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4093 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4094 tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4095 tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4097 ots = arg_temp(op->args[0]);
4098 itsl = arg_temp(op->args[1]);
4099 itsh = arg_temp(op->args[2]);
4101 /* ENV should not be modified. */
4102 tcg_debug_assert(!temp_readonly(ots));
4104 /* Allocate the output register now. */
4105 if (ots->val_type != TEMP_VAL_REG) {
4106 TCGRegSet allocated_regs = s->reserved_regs;
4107 TCGRegSet dup_out_regs =
4108 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4110 /* Make sure to not spill the input registers. */
4111 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4112 tcg_regset_set_reg(allocated_regs, itsl->reg);
4114 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4115 tcg_regset_set_reg(allocated_regs, itsh->reg);
4118 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4119 op->output_pref[0], ots->indirect_base);
4120 ots->val_type = TEMP_VAL_REG;
4121 ots->mem_coherent = 0;
4122 s->reg_to_temp[ots->reg] = ots;
4125 /* Promote dup2 of immediates to dupi_vec. */
4126 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4127 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4128 MemOp vece = MO_64;
4130 if (val == dup_const(MO_8, val)) {
4131 vece = MO_8;
4132 } else if (val == dup_const(MO_16, val)) {
4133 vece = MO_16;
4134 } else if (val == dup_const(MO_32, val)) {
4135 vece = MO_32;
4138 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4139 goto done;
4142 /* If the two inputs form one 64-bit value, try dupm_vec. */
4143 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4144 if (!itsl->mem_coherent) {
4145 temp_sync(s, itsl, s->reserved_regs, 0, 0);
4147 if (!itsh->mem_coherent) {
4148 temp_sync(s, itsh, s->reserved_regs, 0, 0);
4150 #ifdef HOST_WORDS_BIGENDIAN
4151 TCGTemp *its = itsh;
4152 #else
4153 TCGTemp *its = itsl;
4154 #endif
4155 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4156 its->mem_base->reg, its->mem_offset)) {
4157 goto done;
4161 /* Fall back to generic expansion. */
4162 return false;
4164 done:
4165 if (IS_DEAD_ARG(1)) {
4166 temp_dead(s, itsl);
4168 if (IS_DEAD_ARG(2)) {
4169 temp_dead(s, itsh);
4171 if (NEED_SYNC_ARG(0)) {
4172 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4173 } else if (IS_DEAD_ARG(0)) {
4174 temp_dead(s, ots);
4176 return true;
4179 #ifdef TCG_TARGET_STACK_GROWSUP
4180 #define STACK_DIR(x) (-(x))
4181 #else
4182 #define STACK_DIR(x) (x)
4183 #endif
4185 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4187 const int nb_oargs = TCGOP_CALLO(op);
4188 const int nb_iargs = TCGOP_CALLI(op);
4189 const TCGLifeData arg_life = op->life;
4190 int flags, nb_regs, i;
4191 TCGReg reg;
4192 TCGArg arg;
4193 TCGTemp *ts;
4194 intptr_t stack_offset;
4195 size_t call_stack_size;
4196 tcg_insn_unit *func_addr;
4197 int allocate_args;
4198 TCGRegSet allocated_regs;
4200 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4201 flags = op->args[nb_oargs + nb_iargs + 1];
4203 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4204 if (nb_regs > nb_iargs) {
4205 nb_regs = nb_iargs;
4208 /* assign stack slots first */
4209 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4210 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4211 ~(TCG_TARGET_STACK_ALIGN - 1);
4212 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4213 if (allocate_args) {
4214 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4215 preallocate call stack */
4216 tcg_abort();
4219 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4220 for (i = nb_regs; i < nb_iargs; i++) {
4221 arg = op->args[nb_oargs + i];
4222 #ifdef TCG_TARGET_STACK_GROWSUP
4223 stack_offset -= sizeof(tcg_target_long);
4224 #endif
4225 if (arg != TCG_CALL_DUMMY_ARG) {
4226 ts = arg_temp(arg);
4227 temp_load(s, ts, tcg_target_available_regs[ts->type],
4228 s->reserved_regs, 0);
4229 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4231 #ifndef TCG_TARGET_STACK_GROWSUP
4232 stack_offset += sizeof(tcg_target_long);
4233 #endif
4236 /* assign input registers */
4237 allocated_regs = s->reserved_regs;
4238 for (i = 0; i < nb_regs; i++) {
4239 arg = op->args[nb_oargs + i];
4240 if (arg != TCG_CALL_DUMMY_ARG) {
4241 ts = arg_temp(arg);
4242 reg = tcg_target_call_iarg_regs[i];
4244 if (ts->val_type == TEMP_VAL_REG) {
4245 if (ts->reg != reg) {
4246 tcg_reg_free(s, reg, allocated_regs);
4247 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4249 * Cross register class move not supported. Sync the
4250 * temp back to its slot and load from there.
4252 temp_sync(s, ts, allocated_regs, 0, 0);
4253 tcg_out_ld(s, ts->type, reg,
4254 ts->mem_base->reg, ts->mem_offset);
4257 } else {
4258 TCGRegSet arg_set = 0;
4260 tcg_reg_free(s, reg, allocated_regs);
4261 tcg_regset_set_reg(arg_set, reg);
4262 temp_load(s, ts, arg_set, allocated_regs, 0);
4265 tcg_regset_set_reg(allocated_regs, reg);
4269 /* mark dead temporaries and free the associated registers */
4270 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4271 if (IS_DEAD_ARG(i)) {
4272 temp_dead(s, arg_temp(op->args[i]));
4276 /* clobber call registers */
4277 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4278 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4279 tcg_reg_free(s, i, allocated_regs);
4283 /* Save globals if they might be written by the helper, sync them if
4284 they might be read. */
4285 if (flags & TCG_CALL_NO_READ_GLOBALS) {
4286 /* Nothing to do */
4287 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4288 sync_globals(s, allocated_regs);
4289 } else {
4290 save_globals(s, allocated_regs);
4293 tcg_out_call(s, func_addr);
4295 /* assign output registers and emit moves if needed */
4296 for(i = 0; i < nb_oargs; i++) {
4297 arg = op->args[i];
4298 ts = arg_temp(arg);
4300 /* ENV should not be modified. */
4301 tcg_debug_assert(!temp_readonly(ts));
4303 reg = tcg_target_call_oarg_regs[i];
4304 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4305 if (ts->val_type == TEMP_VAL_REG) {
4306 s->reg_to_temp[ts->reg] = NULL;
4308 ts->val_type = TEMP_VAL_REG;
4309 ts->reg = reg;
4310 ts->mem_coherent = 0;
4311 s->reg_to_temp[reg] = ts;
4312 if (NEED_SYNC_ARG(i)) {
4313 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4314 } else if (IS_DEAD_ARG(i)) {
4315 temp_dead(s, ts);
4320 #ifdef CONFIG_PROFILER
4322 /* avoid copy/paste errors */
4323 #define PROF_ADD(to, from, field) \
4324 do { \
4325 (to)->field += qatomic_read(&((from)->field)); \
4326 } while (0)
4328 #define PROF_MAX(to, from, field) \
4329 do { \
4330 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4331 if (val__ > (to)->field) { \
4332 (to)->field = val__; \
4334 } while (0)
4336 /* Pass in a zero'ed @prof */
4337 static inline
4338 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4340 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4341 unsigned int i;
4343 for (i = 0; i < n_ctxs; i++) {
4344 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4345 const TCGProfile *orig = &s->prof;
4347 if (counters) {
4348 PROF_ADD(prof, orig, cpu_exec_time);
4349 PROF_ADD(prof, orig, tb_count1);
4350 PROF_ADD(prof, orig, tb_count);
4351 PROF_ADD(prof, orig, op_count);
4352 PROF_MAX(prof, orig, op_count_max);
4353 PROF_ADD(prof, orig, temp_count);
4354 PROF_MAX(prof, orig, temp_count_max);
4355 PROF_ADD(prof, orig, del_op_count);
4356 PROF_ADD(prof, orig, code_in_len);
4357 PROF_ADD(prof, orig, code_out_len);
4358 PROF_ADD(prof, orig, search_out_len);
4359 PROF_ADD(prof, orig, interm_time);
4360 PROF_ADD(prof, orig, code_time);
4361 PROF_ADD(prof, orig, la_time);
4362 PROF_ADD(prof, orig, opt_time);
4363 PROF_ADD(prof, orig, restore_count);
4364 PROF_ADD(prof, orig, restore_time);
4366 if (table) {
4367 int i;
4369 for (i = 0; i < NB_OPS; i++) {
4370 PROF_ADD(prof, orig, table_op_count[i]);
4376 #undef PROF_ADD
4377 #undef PROF_MAX
4379 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4381 tcg_profile_snapshot(prof, true, false);
4384 static void tcg_profile_snapshot_table(TCGProfile *prof)
4386 tcg_profile_snapshot(prof, false, true);
4389 void tcg_dump_op_count(void)
4391 TCGProfile prof = {};
4392 int i;
4394 tcg_profile_snapshot_table(&prof);
4395 for (i = 0; i < NB_OPS; i++) {
4396 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4397 prof.table_op_count[i]);
4401 int64_t tcg_cpu_exec_time(void)
4403 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4404 unsigned int i;
4405 int64_t ret = 0;
4407 for (i = 0; i < n_ctxs; i++) {
4408 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4409 const TCGProfile *prof = &s->prof;
4411 ret += qatomic_read(&prof->cpu_exec_time);
4413 return ret;
4415 #else
4416 void tcg_dump_op_count(void)
4418 qemu_printf("[TCG profiler not compiled]\n");
4421 int64_t tcg_cpu_exec_time(void)
4423 error_report("%s: TCG profiler not compiled", __func__);
4424 exit(EXIT_FAILURE);
4426 #endif
4429 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4431 #ifdef CONFIG_PROFILER
4432 TCGProfile *prof = &s->prof;
4433 #endif
4434 int i, num_insns;
4435 TCGOp *op;
4437 #ifdef CONFIG_PROFILER
4439 int n = 0;
4441 QTAILQ_FOREACH(op, &s->ops, link) {
4442 n++;
4444 qatomic_set(&prof->op_count, prof->op_count + n);
4445 if (n > prof->op_count_max) {
4446 qatomic_set(&prof->op_count_max, n);
4449 n = s->nb_temps;
4450 qatomic_set(&prof->temp_count, prof->temp_count + n);
4451 if (n > prof->temp_count_max) {
4452 qatomic_set(&prof->temp_count_max, n);
4455 #endif
4457 #ifdef DEBUG_DISAS
4458 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4459 && qemu_log_in_addr_range(tb->pc))) {
4460 FILE *logfile = qemu_log_lock();
4461 qemu_log("OP:\n");
4462 tcg_dump_ops(s, false);
4463 qemu_log("\n");
4464 qemu_log_unlock(logfile);
4466 #endif
4468 #ifdef CONFIG_DEBUG_TCG
4469 /* Ensure all labels referenced have been emitted. */
4471 TCGLabel *l;
4472 bool error = false;
4474 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4475 if (unlikely(!l->present) && l->refs) {
4476 qemu_log_mask(CPU_LOG_TB_OP,
4477 "$L%d referenced but not present.\n", l->id);
4478 error = true;
4481 assert(!error);
4483 #endif
4485 #ifdef CONFIG_PROFILER
4486 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4487 #endif
4489 #ifdef USE_TCG_OPTIMIZATIONS
4490 tcg_optimize(s);
4491 #endif
4493 #ifdef CONFIG_PROFILER
4494 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4495 qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4496 #endif
4498 reachable_code_pass(s);
4499 liveness_pass_1(s);
4501 if (s->nb_indirects > 0) {
4502 #ifdef DEBUG_DISAS
4503 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4504 && qemu_log_in_addr_range(tb->pc))) {
4505 FILE *logfile = qemu_log_lock();
4506 qemu_log("OP before indirect lowering:\n");
4507 tcg_dump_ops(s, false);
4508 qemu_log("\n");
4509 qemu_log_unlock(logfile);
4511 #endif
4512 /* Replace indirect temps with direct temps. */
4513 if (liveness_pass_2(s)) {
4514 /* If changes were made, re-run liveness. */
4515 liveness_pass_1(s);
4519 #ifdef CONFIG_PROFILER
4520 qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4521 #endif
4523 #ifdef DEBUG_DISAS
4524 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4525 && qemu_log_in_addr_range(tb->pc))) {
4526 FILE *logfile = qemu_log_lock();
4527 qemu_log("OP after optimization and liveness analysis:\n");
4528 tcg_dump_ops(s, true);
4529 qemu_log("\n");
4530 qemu_log_unlock(logfile);
4532 #endif
4534 tcg_reg_alloc_start(s);
4537 * Reset the buffer pointers when restarting after overflow.
4538 * TODO: Move this into translate-all.c with the rest of the
4539 * buffer management. Having only this done here is confusing.
4541 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4542 s->code_ptr = s->code_buf;
4544 #ifdef TCG_TARGET_NEED_LDST_LABELS
4545 QSIMPLEQ_INIT(&s->ldst_labels);
4546 #endif
4547 #ifdef TCG_TARGET_NEED_POOL_LABELS
4548 s->pool_labels = NULL;
4549 #endif
4551 num_insns = -1;
4552 QTAILQ_FOREACH(op, &s->ops, link) {
4553 TCGOpcode opc = op->opc;
4555 #ifdef CONFIG_PROFILER
4556 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4557 #endif
4559 switch (opc) {
4560 case INDEX_op_mov_i32:
4561 case INDEX_op_mov_i64:
4562 case INDEX_op_mov_vec:
4563 tcg_reg_alloc_mov(s, op);
4564 break;
4565 case INDEX_op_dup_vec:
4566 tcg_reg_alloc_dup(s, op);
4567 break;
4568 case INDEX_op_insn_start:
4569 if (num_insns >= 0) {
4570 size_t off = tcg_current_code_size(s);
4571 s->gen_insn_end_off[num_insns] = off;
4572 /* Assert that we do not overflow our stored offset. */
4573 assert(s->gen_insn_end_off[num_insns] == off);
4575 num_insns++;
4576 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4577 target_ulong a;
4578 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4579 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4580 #else
4581 a = op->args[i];
4582 #endif
4583 s->gen_insn_data[num_insns][i] = a;
4585 break;
4586 case INDEX_op_discard:
4587 temp_dead(s, arg_temp(op->args[0]));
4588 break;
4589 case INDEX_op_set_label:
4590 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4591 tcg_out_label(s, arg_label(op->args[0]));
4592 break;
4593 case INDEX_op_call:
4594 tcg_reg_alloc_call(s, op);
4595 break;
4596 case INDEX_op_dup2_vec:
4597 if (tcg_reg_alloc_dup2(s, op)) {
4598 break;
4600 /* fall through */
4601 default:
4602 /* Sanity check that we've not introduced any unhandled opcodes. */
4603 tcg_debug_assert(tcg_op_supported(opc));
4604 /* Note: in order to speed up the code, it would be much
4605 faster to have specialized register allocator functions for
4606 some common argument patterns */
4607 tcg_reg_alloc_op(s, op);
4608 break;
4610 #ifdef CONFIG_DEBUG_TCG
4611 check_regs(s);
4612 #endif
4613 /* Test for (pending) buffer overflow. The assumption is that any
4614 one operation beginning below the high water mark cannot overrun
4615 the buffer completely. Thus we can test for overflow after
4616 generating code without having to check during generation. */
4617 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4618 return -1;
4620 /* Test for TB overflow, as seen by gen_insn_end_off. */
4621 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4622 return -2;
4625 tcg_debug_assert(num_insns >= 0);
4626 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4628 /* Generate TB finalization at the end of block */
4629 #ifdef TCG_TARGET_NEED_LDST_LABELS
4630 i = tcg_out_ldst_finalize(s);
4631 if (i < 0) {
4632 return i;
4634 #endif
4635 #ifdef TCG_TARGET_NEED_POOL_LABELS
4636 i = tcg_out_pool_finalize(s);
4637 if (i < 0) {
4638 return i;
4640 #endif
4641 if (!tcg_resolve_relocs(s)) {
4642 return -2;
4645 #ifndef CONFIG_TCG_INTERPRETER
4646 /* flush instruction cache */
4647 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4648 (uintptr_t)s->code_buf,
4649 tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4650 #endif
4652 return tcg_current_code_size(s);
4655 #ifdef CONFIG_PROFILER
4656 void tcg_dump_info(void)
4658 TCGProfile prof = {};
4659 const TCGProfile *s;
4660 int64_t tb_count;
4661 int64_t tb_div_count;
4662 int64_t tot;
4664 tcg_profile_snapshot_counters(&prof);
4665 s = &prof;
4666 tb_count = s->tb_count;
4667 tb_div_count = tb_count ? tb_count : 1;
4668 tot = s->interm_time + s->code_time;
4670 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4671 tot, tot / 2.4e9);
4672 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4673 " %0.1f%%)\n",
4674 tb_count, s->tb_count1 - tb_count,
4675 (double)(s->tb_count1 - s->tb_count)
4676 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4677 qemu_printf("avg ops/TB %0.1f max=%d\n",
4678 (double)s->op_count / tb_div_count, s->op_count_max);
4679 qemu_printf("deleted ops/TB %0.2f\n",
4680 (double)s->del_op_count / tb_div_count);
4681 qemu_printf("avg temps/TB %0.2f max=%d\n",
4682 (double)s->temp_count / tb_div_count, s->temp_count_max);
4683 qemu_printf("avg host code/TB %0.1f\n",
4684 (double)s->code_out_len / tb_div_count);
4685 qemu_printf("avg search data/TB %0.1f\n",
4686 (double)s->search_out_len / tb_div_count);
4688 qemu_printf("cycles/op %0.1f\n",
4689 s->op_count ? (double)tot / s->op_count : 0);
4690 qemu_printf("cycles/in byte %0.1f\n",
4691 s->code_in_len ? (double)tot / s->code_in_len : 0);
4692 qemu_printf("cycles/out byte %0.1f\n",
4693 s->code_out_len ? (double)tot / s->code_out_len : 0);
4694 qemu_printf("cycles/search byte %0.1f\n",
4695 s->search_out_len ? (double)tot / s->search_out_len : 0);
4696 if (tot == 0) {
4697 tot = 1;
4699 qemu_printf(" gen_interm time %0.1f%%\n",
4700 (double)s->interm_time / tot * 100.0);
4701 qemu_printf(" gen_code time %0.1f%%\n",
4702 (double)s->code_time / tot * 100.0);
4703 qemu_printf("optim./code time %0.1f%%\n",
4704 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4705 * 100.0);
4706 qemu_printf("liveness/code time %0.1f%%\n",
4707 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4708 qemu_printf("cpu_restore count %" PRId64 "\n",
4709 s->restore_count);
4710 qemu_printf(" avg cycles %0.1f\n",
4711 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4713 #else
4714 void tcg_dump_info(void)
4716 qemu_printf("[TCG profiler not compiled]\n");
4718 #endif
4720 #ifdef ELF_HOST_MACHINE
4721 /* In order to use this feature, the backend needs to do three things:
4723 (1) Define ELF_HOST_MACHINE to indicate both what value to
4724 put into the ELF image and to indicate support for the feature.
4726 (2) Define tcg_register_jit. This should create a buffer containing
4727 the contents of a .debug_frame section that describes the post-
4728 prologue unwind info for the tcg machine.
4730 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4733 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4734 typedef enum {
4735 JIT_NOACTION = 0,
4736 JIT_REGISTER_FN,
4737 JIT_UNREGISTER_FN
4738 } jit_actions_t;
4740 struct jit_code_entry {
4741 struct jit_code_entry *next_entry;
4742 struct jit_code_entry *prev_entry;
4743 const void *symfile_addr;
4744 uint64_t symfile_size;
4747 struct jit_descriptor {
4748 uint32_t version;
4749 uint32_t action_flag;
4750 struct jit_code_entry *relevant_entry;
4751 struct jit_code_entry *first_entry;
4754 void __jit_debug_register_code(void) __attribute__((noinline));
4755 void __jit_debug_register_code(void)
4757 asm("");
4760 /* Must statically initialize the version, because GDB may check
4761 the version before we can set it. */
4762 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4764 /* End GDB interface. */
4766 static int find_string(const char *strtab, const char *str)
4768 const char *p = strtab + 1;
4770 while (1) {
4771 if (strcmp(p, str) == 0) {
4772 return p - strtab;
4774 p += strlen(p) + 1;
4778 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4779 const void *debug_frame,
4780 size_t debug_frame_size)
4782 struct __attribute__((packed)) DebugInfo {
4783 uint32_t len;
4784 uint16_t version;
4785 uint32_t abbrev;
4786 uint8_t ptr_size;
4787 uint8_t cu_die;
4788 uint16_t cu_lang;
4789 uintptr_t cu_low_pc;
4790 uintptr_t cu_high_pc;
4791 uint8_t fn_die;
4792 char fn_name[16];
4793 uintptr_t fn_low_pc;
4794 uintptr_t fn_high_pc;
4795 uint8_t cu_eoc;
4798 struct ElfImage {
4799 ElfW(Ehdr) ehdr;
4800 ElfW(Phdr) phdr;
4801 ElfW(Shdr) shdr[7];
4802 ElfW(Sym) sym[2];
4803 struct DebugInfo di;
4804 uint8_t da[24];
4805 char str[80];
4808 struct ElfImage *img;
4810 static const struct ElfImage img_template = {
4811 .ehdr = {
4812 .e_ident[EI_MAG0] = ELFMAG0,
4813 .e_ident[EI_MAG1] = ELFMAG1,
4814 .e_ident[EI_MAG2] = ELFMAG2,
4815 .e_ident[EI_MAG3] = ELFMAG3,
4816 .e_ident[EI_CLASS] = ELF_CLASS,
4817 .e_ident[EI_DATA] = ELF_DATA,
4818 .e_ident[EI_VERSION] = EV_CURRENT,
4819 .e_type = ET_EXEC,
4820 .e_machine = ELF_HOST_MACHINE,
4821 .e_version = EV_CURRENT,
4822 .e_phoff = offsetof(struct ElfImage, phdr),
4823 .e_shoff = offsetof(struct ElfImage, shdr),
4824 .e_ehsize = sizeof(ElfW(Shdr)),
4825 .e_phentsize = sizeof(ElfW(Phdr)),
4826 .e_phnum = 1,
4827 .e_shentsize = sizeof(ElfW(Shdr)),
4828 .e_shnum = ARRAY_SIZE(img->shdr),
4829 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4830 #ifdef ELF_HOST_FLAGS
4831 .e_flags = ELF_HOST_FLAGS,
4832 #endif
4833 #ifdef ELF_OSABI
4834 .e_ident[EI_OSABI] = ELF_OSABI,
4835 #endif
4837 .phdr = {
4838 .p_type = PT_LOAD,
4839 .p_flags = PF_X,
4841 .shdr = {
4842 [0] = { .sh_type = SHT_NULL },
4843 /* Trick: The contents of code_gen_buffer are not present in
4844 this fake ELF file; that got allocated elsewhere. Therefore
4845 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4846 will not look for contents. We can record any address. */
4847 [1] = { /* .text */
4848 .sh_type = SHT_NOBITS,
4849 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4851 [2] = { /* .debug_info */
4852 .sh_type = SHT_PROGBITS,
4853 .sh_offset = offsetof(struct ElfImage, di),
4854 .sh_size = sizeof(struct DebugInfo),
4856 [3] = { /* .debug_abbrev */
4857 .sh_type = SHT_PROGBITS,
4858 .sh_offset = offsetof(struct ElfImage, da),
4859 .sh_size = sizeof(img->da),
4861 [4] = { /* .debug_frame */
4862 .sh_type = SHT_PROGBITS,
4863 .sh_offset = sizeof(struct ElfImage),
4865 [5] = { /* .symtab */
4866 .sh_type = SHT_SYMTAB,
4867 .sh_offset = offsetof(struct ElfImage, sym),
4868 .sh_size = sizeof(img->sym),
4869 .sh_info = 1,
4870 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4871 .sh_entsize = sizeof(ElfW(Sym)),
4873 [6] = { /* .strtab */
4874 .sh_type = SHT_STRTAB,
4875 .sh_offset = offsetof(struct ElfImage, str),
4876 .sh_size = sizeof(img->str),
4879 .sym = {
4880 [1] = { /* code_gen_buffer */
4881 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4882 .st_shndx = 1,
4885 .di = {
4886 .len = sizeof(struct DebugInfo) - 4,
4887 .version = 2,
4888 .ptr_size = sizeof(void *),
4889 .cu_die = 1,
4890 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
4891 .fn_die = 2,
4892 .fn_name = "code_gen_buffer"
4894 .da = {
4895 1, /* abbrev number (the cu) */
4896 0x11, 1, /* DW_TAG_compile_unit, has children */
4897 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4898 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4899 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4900 0, 0, /* end of abbrev */
4901 2, /* abbrev number (the fn) */
4902 0x2e, 0, /* DW_TAG_subprogram, no children */
4903 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4904 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4905 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4906 0, 0, /* end of abbrev */
4907 0 /* no more abbrev */
4909 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4910 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4913 /* We only need a single jit entry; statically allocate it. */
4914 static struct jit_code_entry one_entry;
4916 uintptr_t buf = (uintptr_t)buf_ptr;
4917 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4918 DebugFrameHeader *dfh;
4920 img = g_malloc(img_size);
4921 *img = img_template;
4923 img->phdr.p_vaddr = buf;
4924 img->phdr.p_paddr = buf;
4925 img->phdr.p_memsz = buf_size;
4927 img->shdr[1].sh_name = find_string(img->str, ".text");
4928 img->shdr[1].sh_addr = buf;
4929 img->shdr[1].sh_size = buf_size;
4931 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4932 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4934 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4935 img->shdr[4].sh_size = debug_frame_size;
4937 img->shdr[5].sh_name = find_string(img->str, ".symtab");
4938 img->shdr[6].sh_name = find_string(img->str, ".strtab");
4940 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4941 img->sym[1].st_value = buf;
4942 img->sym[1].st_size = buf_size;
4944 img->di.cu_low_pc = buf;
4945 img->di.cu_high_pc = buf + buf_size;
4946 img->di.fn_low_pc = buf;
4947 img->di.fn_high_pc = buf + buf_size;
4949 dfh = (DebugFrameHeader *)(img + 1);
4950 memcpy(dfh, debug_frame, debug_frame_size);
4951 dfh->fde.func_start = buf;
4952 dfh->fde.func_len = buf_size;
4954 #ifdef DEBUG_JIT
4955 /* Enable this block to be able to debug the ELF image file creation.
4956 One can use readelf, objdump, or other inspection utilities. */
4958 FILE *f = fopen("/tmp/qemu.jit", "w+b");
4959 if (f) {
4960 if (fwrite(img, img_size, 1, f) != img_size) {
4961 /* Avoid stupid unused return value warning for fwrite. */
4963 fclose(f);
4966 #endif
4968 one_entry.symfile_addr = img;
4969 one_entry.symfile_size = img_size;
4971 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4972 __jit_debug_descriptor.relevant_entry = &one_entry;
4973 __jit_debug_descriptor.first_entry = &one_entry;
4974 __jit_debug_register_code();
4976 #else
4977 /* No support for the feature. Provide the entry point expected by exec.c,
4978 and implement the internal function we declared earlier. */
4980 static void tcg_register_jit_int(const void *buf, size_t size,
4981 const void *debug_frame,
4982 size_t debug_frame_size)
4986 void tcg_register_jit(const void *buf, size_t buf_size)
4989 #endif /* ELF_HOST_MACHINE */
4991 #if !TCG_TARGET_MAYBE_vec
4992 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4994 g_assert_not_reached();
4996 #endif