roms: add the edk2 project as a git submodule
[qemu/ar7.git] / tcg / tcg.c
blob9b2bf7f43976e60023197dafcab6dff8241a627d
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/timer.h"
38 /* Note: the long term plan is to reduce the dependencies on the QEMU
39 CPU definitions. Currently they are used for qemu_ld/st
40 instructions */
41 #define NO_CPU_IO_DEFS
42 #include "cpu.h"
44 #include "exec/cpu-common.h"
45 #include "exec/exec-all.h"
47 #include "tcg-op.h"
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS ELFCLASS32
51 #else
52 # define ELF_CLASS ELFCLASS64
53 #endif
54 #ifdef HOST_WORDS_BIGENDIAN
55 # define ELF_DATA ELFDATA2MSB
56 #else
57 # define ELF_DATA ELFDATA2LSB
58 #endif
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "sysemu/sysemu.h"
64 /* Forward declarations for functions declared in tcg-target.inc.c and
65 used here. */
66 static void tcg_target_init(TCGContext *s);
67 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
68 static void tcg_target_qemu_prologue(TCGContext *s);
69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
70 intptr_t value, intptr_t addend);
72 /* The CIE and FDE header definitions will be common to all hosts. */
73 typedef struct {
74 uint32_t len __attribute__((aligned((sizeof(void *)))));
75 uint32_t id;
76 uint8_t version;
77 char augmentation[1];
78 uint8_t code_align;
79 uint8_t data_align;
80 uint8_t return_column;
81 } DebugFrameCIE;
83 typedef struct QEMU_PACKED {
84 uint32_t len __attribute__((aligned((sizeof(void *)))));
85 uint32_t cie_offset;
86 uintptr_t func_start;
87 uintptr_t func_len;
88 } DebugFrameFDEHeader;
90 typedef struct QEMU_PACKED {
91 DebugFrameCIE cie;
92 DebugFrameFDEHeader fde;
93 } DebugFrameHeader;
95 static void tcg_register_jit_int(void *buf, size_t size,
96 const void *debug_frame,
97 size_t debug_frame_size)
98 __attribute__((unused));
100 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
101 static const char *target_parse_constraint(TCGArgConstraint *ct,
102 const char *ct_str, TCGType type);
103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
104 intptr_t arg2);
105 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
106 static void tcg_out_movi(TCGContext *s, TCGType type,
107 TCGReg ret, tcg_target_long arg);
108 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
109 const int *const_args);
110 #if TCG_TARGET_MAYBE_vec
111 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
112 unsigned vece, const TCGArg *args,
113 const int *const_args);
114 #else
115 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
116 unsigned vece, const TCGArg *args,
117 const int *const_args)
119 g_assert_not_reached();
121 #endif
122 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
123 intptr_t arg2);
124 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
125 TCGReg base, intptr_t ofs);
126 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
127 static int tcg_target_const_match(tcg_target_long val, TCGType type,
128 const TCGArgConstraint *arg_ct);
129 #ifdef TCG_TARGET_NEED_LDST_LABELS
130 static bool tcg_out_ldst_finalize(TCGContext *s);
131 #endif
133 #define TCG_HIGHWATER 1024
135 static TCGContext **tcg_ctxs;
136 static unsigned int n_tcg_ctxs;
137 TCGv_env cpu_env = 0;
139 struct tcg_region_tree {
140 QemuMutex lock;
141 GTree *tree;
142 /* padding to avoid false sharing is computed at run-time */
146 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
147 * dynamically allocate from as demand dictates. Given appropriate region
148 * sizing, this minimizes flushes even when some TCG threads generate a lot
149 * more code than others.
151 struct tcg_region_state {
152 QemuMutex lock;
154 /* fields set at init time */
155 void *start;
156 void *start_aligned;
157 void *end;
158 size_t n;
159 size_t size; /* size of one region */
160 size_t stride; /* .size + guard size */
162 /* fields protected by the lock */
163 size_t current; /* current region index */
164 size_t agg_size_full; /* aggregate size of full regions */
167 static struct tcg_region_state region;
169 * This is an array of struct tcg_region_tree's, with padding.
170 * We use void * to simplify the computation of region_trees[i]; each
171 * struct is found every tree_size bytes.
173 static void *region_trees;
174 static size_t tree_size;
175 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
176 static TCGRegSet tcg_target_call_clobber_regs;
178 #if TCG_TARGET_INSN_UNIT_SIZE == 1
179 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
181 *s->code_ptr++ = v;
184 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
185 uint8_t v)
187 *p = v;
189 #endif
191 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
192 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
194 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
195 *s->code_ptr++ = v;
196 } else {
197 tcg_insn_unit *p = s->code_ptr;
198 memcpy(p, &v, sizeof(v));
199 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
203 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
204 uint16_t v)
206 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
207 *p = v;
208 } else {
209 memcpy(p, &v, sizeof(v));
212 #endif
214 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
215 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
217 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
218 *s->code_ptr++ = v;
219 } else {
220 tcg_insn_unit *p = s->code_ptr;
221 memcpy(p, &v, sizeof(v));
222 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
226 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
227 uint32_t v)
229 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
230 *p = v;
231 } else {
232 memcpy(p, &v, sizeof(v));
235 #endif
237 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
238 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
240 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
241 *s->code_ptr++ = v;
242 } else {
243 tcg_insn_unit *p = s->code_ptr;
244 memcpy(p, &v, sizeof(v));
245 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
249 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
250 uint64_t v)
252 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
253 *p = v;
254 } else {
255 memcpy(p, &v, sizeof(v));
258 #endif
260 /* label relocation processing */
262 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
263 TCGLabel *l, intptr_t addend)
265 TCGRelocation *r;
267 if (l->has_value) {
268 /* FIXME: This may break relocations on RISC targets that
269 modify instruction fields in place. The caller may not have
270 written the initial value. */
271 bool ok = patch_reloc(code_ptr, type, l->u.value, addend);
272 tcg_debug_assert(ok);
273 } else {
274 /* add a new relocation entry */
275 r = tcg_malloc(sizeof(TCGRelocation));
276 r->type = type;
277 r->ptr = code_ptr;
278 r->addend = addend;
279 r->next = l->u.first_reloc;
280 l->u.first_reloc = r;
284 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
286 intptr_t value = (intptr_t)ptr;
287 TCGRelocation *r;
289 tcg_debug_assert(!l->has_value);
291 for (r = l->u.first_reloc; r != NULL; r = r->next) {
292 bool ok = patch_reloc(r->ptr, r->type, value, r->addend);
293 tcg_debug_assert(ok);
296 l->has_value = 1;
297 l->u.value_ptr = ptr;
300 TCGLabel *gen_new_label(void)
302 TCGContext *s = tcg_ctx;
303 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
305 *l = (TCGLabel){
306 .id = s->nb_labels++
308 #ifdef CONFIG_DEBUG_TCG
309 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
310 #endif
312 return l;
315 static void set_jmp_reset_offset(TCGContext *s, int which)
317 size_t off = tcg_current_code_size(s);
318 s->tb_jmp_reset_offset[which] = off;
319 /* Make sure that we didn't overflow the stored offset. */
320 assert(s->tb_jmp_reset_offset[which] == off);
323 #include "tcg-target.inc.c"
325 /* compare a pointer @ptr and a tb_tc @s */
326 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
328 if (ptr >= s->ptr + s->size) {
329 return 1;
330 } else if (ptr < s->ptr) {
331 return -1;
333 return 0;
336 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
338 const struct tb_tc *a = ap;
339 const struct tb_tc *b = bp;
342 * When both sizes are set, we know this isn't a lookup.
343 * This is the most likely case: every TB must be inserted; lookups
344 * are a lot less frequent.
346 if (likely(a->size && b->size)) {
347 if (a->ptr > b->ptr) {
348 return 1;
349 } else if (a->ptr < b->ptr) {
350 return -1;
352 /* a->ptr == b->ptr should happen only on deletions */
353 g_assert(a->size == b->size);
354 return 0;
357 * All lookups have either .size field set to 0.
358 * From the glib sources we see that @ap is always the lookup key. However
359 * the docs provide no guarantee, so we just mark this case as likely.
361 if (likely(a->size == 0)) {
362 return ptr_cmp_tb_tc(a->ptr, b);
364 return ptr_cmp_tb_tc(b->ptr, a);
367 static void tcg_region_trees_init(void)
369 size_t i;
371 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
372 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
373 for (i = 0; i < region.n; i++) {
374 struct tcg_region_tree *rt = region_trees + i * tree_size;
376 qemu_mutex_init(&rt->lock);
377 rt->tree = g_tree_new(tb_tc_cmp);
381 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
383 size_t region_idx;
385 if (p < region.start_aligned) {
386 region_idx = 0;
387 } else {
388 ptrdiff_t offset = p - region.start_aligned;
390 if (offset > region.stride * (region.n - 1)) {
391 region_idx = region.n - 1;
392 } else {
393 region_idx = offset / region.stride;
396 return region_trees + region_idx * tree_size;
399 void tcg_tb_insert(TranslationBlock *tb)
401 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
403 qemu_mutex_lock(&rt->lock);
404 g_tree_insert(rt->tree, &tb->tc, tb);
405 qemu_mutex_unlock(&rt->lock);
408 void tcg_tb_remove(TranslationBlock *tb)
410 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
412 qemu_mutex_lock(&rt->lock);
413 g_tree_remove(rt->tree, &tb->tc);
414 qemu_mutex_unlock(&rt->lock);
418 * Find the TB 'tb' such that
419 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
420 * Return NULL if not found.
422 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
424 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
425 TranslationBlock *tb;
426 struct tb_tc s = { .ptr = (void *)tc_ptr };
428 qemu_mutex_lock(&rt->lock);
429 tb = g_tree_lookup(rt->tree, &s);
430 qemu_mutex_unlock(&rt->lock);
431 return tb;
434 static void tcg_region_tree_lock_all(void)
436 size_t i;
438 for (i = 0; i < region.n; i++) {
439 struct tcg_region_tree *rt = region_trees + i * tree_size;
441 qemu_mutex_lock(&rt->lock);
445 static void tcg_region_tree_unlock_all(void)
447 size_t i;
449 for (i = 0; i < region.n; i++) {
450 struct tcg_region_tree *rt = region_trees + i * tree_size;
452 qemu_mutex_unlock(&rt->lock);
456 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
458 size_t i;
460 tcg_region_tree_lock_all();
461 for (i = 0; i < region.n; i++) {
462 struct tcg_region_tree *rt = region_trees + i * tree_size;
464 g_tree_foreach(rt->tree, func, user_data);
466 tcg_region_tree_unlock_all();
469 size_t tcg_nb_tbs(void)
471 size_t nb_tbs = 0;
472 size_t i;
474 tcg_region_tree_lock_all();
475 for (i = 0; i < region.n; i++) {
476 struct tcg_region_tree *rt = region_trees + i * tree_size;
478 nb_tbs += g_tree_nnodes(rt->tree);
480 tcg_region_tree_unlock_all();
481 return nb_tbs;
484 static void tcg_region_tree_reset_all(void)
486 size_t i;
488 tcg_region_tree_lock_all();
489 for (i = 0; i < region.n; i++) {
490 struct tcg_region_tree *rt = region_trees + i * tree_size;
492 /* Increment the refcount first so that destroy acts as a reset */
493 g_tree_ref(rt->tree);
494 g_tree_destroy(rt->tree);
496 tcg_region_tree_unlock_all();
499 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
501 void *start, *end;
503 start = region.start_aligned + curr_region * region.stride;
504 end = start + region.size;
506 if (curr_region == 0) {
507 start = region.start;
509 if (curr_region == region.n - 1) {
510 end = region.end;
513 *pstart = start;
514 *pend = end;
517 static void tcg_region_assign(TCGContext *s, size_t curr_region)
519 void *start, *end;
521 tcg_region_bounds(curr_region, &start, &end);
523 s->code_gen_buffer = start;
524 s->code_gen_ptr = start;
525 s->code_gen_buffer_size = end - start;
526 s->code_gen_highwater = end - TCG_HIGHWATER;
529 static bool tcg_region_alloc__locked(TCGContext *s)
531 if (region.current == region.n) {
532 return true;
534 tcg_region_assign(s, region.current);
535 region.current++;
536 return false;
540 * Request a new region once the one in use has filled up.
541 * Returns true on error.
543 static bool tcg_region_alloc(TCGContext *s)
545 bool err;
546 /* read the region size now; alloc__locked will overwrite it on success */
547 size_t size_full = s->code_gen_buffer_size;
549 qemu_mutex_lock(&region.lock);
550 err = tcg_region_alloc__locked(s);
551 if (!err) {
552 region.agg_size_full += size_full - TCG_HIGHWATER;
554 qemu_mutex_unlock(&region.lock);
555 return err;
559 * Perform a context's first region allocation.
560 * This function does _not_ increment region.agg_size_full.
562 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
564 return tcg_region_alloc__locked(s);
567 /* Call from a safe-work context */
568 void tcg_region_reset_all(void)
570 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
571 unsigned int i;
573 qemu_mutex_lock(&region.lock);
574 region.current = 0;
575 region.agg_size_full = 0;
577 for (i = 0; i < n_ctxs; i++) {
578 TCGContext *s = atomic_read(&tcg_ctxs[i]);
579 bool err = tcg_region_initial_alloc__locked(s);
581 g_assert(!err);
583 qemu_mutex_unlock(&region.lock);
585 tcg_region_tree_reset_all();
588 #ifdef CONFIG_USER_ONLY
589 static size_t tcg_n_regions(void)
591 return 1;
593 #else
595 * It is likely that some vCPUs will translate more code than others, so we
596 * first try to set more regions than max_cpus, with those regions being of
597 * reasonable size. If that's not possible we make do by evenly dividing
598 * the code_gen_buffer among the vCPUs.
600 static size_t tcg_n_regions(void)
602 size_t i;
604 /* Use a single region if all we have is one vCPU thread */
605 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
606 return 1;
609 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
610 for (i = 8; i > 0; i--) {
611 size_t regions_per_thread = i;
612 size_t region_size;
614 region_size = tcg_init_ctx.code_gen_buffer_size;
615 region_size /= max_cpus * regions_per_thread;
617 if (region_size >= 2 * 1024u * 1024) {
618 return max_cpus * regions_per_thread;
621 /* If we can't, then just allocate one region per vCPU thread */
622 return max_cpus;
624 #endif
627 * Initializes region partitioning.
629 * Called at init time from the parent thread (i.e. the one calling
630 * tcg_context_init), after the target's TCG globals have been set.
632 * Region partitioning works by splitting code_gen_buffer into separate regions,
633 * and then assigning regions to TCG threads so that the threads can translate
634 * code in parallel without synchronization.
636 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
637 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
638 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
639 * must have been parsed before calling this function, since it calls
640 * qemu_tcg_mttcg_enabled().
642 * In user-mode we use a single region. Having multiple regions in user-mode
643 * is not supported, because the number of vCPU threads (recall that each thread
644 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
645 * OS, and usually this number is huge (tens of thousands is not uncommon).
646 * Thus, given this large bound on the number of vCPU threads and the fact
647 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
648 * that the availability of at least one region per vCPU thread.
650 * However, this user-mode limitation is unlikely to be a significant problem
651 * in practice. Multi-threaded guests share most if not all of their translated
652 * code, which makes parallel code generation less appealing than in softmmu.
654 void tcg_region_init(void)
656 void *buf = tcg_init_ctx.code_gen_buffer;
657 void *aligned;
658 size_t size = tcg_init_ctx.code_gen_buffer_size;
659 size_t page_size = qemu_real_host_page_size;
660 size_t region_size;
661 size_t n_regions;
662 size_t i;
664 n_regions = tcg_n_regions();
666 /* The first region will be 'aligned - buf' bytes larger than the others */
667 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
668 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
670 * Make region_size a multiple of page_size, using aligned as the start.
671 * As a result of this we might end up with a few extra pages at the end of
672 * the buffer; we will assign those to the last region.
674 region_size = (size - (aligned - buf)) / n_regions;
675 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
677 /* A region must have at least 2 pages; one code, one guard */
678 g_assert(region_size >= 2 * page_size);
680 /* init the region struct */
681 qemu_mutex_init(&region.lock);
682 region.n = n_regions;
683 region.size = region_size - page_size;
684 region.stride = region_size;
685 region.start = buf;
686 region.start_aligned = aligned;
687 /* page-align the end, since its last page will be a guard page */
688 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
689 /* account for that last guard page */
690 region.end -= page_size;
692 /* set guard pages */
693 for (i = 0; i < region.n; i++) {
694 void *start, *end;
695 int rc;
697 tcg_region_bounds(i, &start, &end);
698 rc = qemu_mprotect_none(end, page_size);
699 g_assert(!rc);
702 tcg_region_trees_init();
704 /* In user-mode we support only one ctx, so do the initial allocation now */
705 #ifdef CONFIG_USER_ONLY
707 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
709 g_assert(!err);
711 #endif
715 * All TCG threads except the parent (i.e. the one that called tcg_context_init
716 * and registered the target's TCG globals) must register with this function
717 * before initiating translation.
719 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
720 * of tcg_region_init() for the reasoning behind this.
722 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
723 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
724 * is not used anymore for translation once this function is called.
726 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
727 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
729 #ifdef CONFIG_USER_ONLY
730 void tcg_register_thread(void)
732 tcg_ctx = &tcg_init_ctx;
734 #else
735 void tcg_register_thread(void)
737 TCGContext *s = g_malloc(sizeof(*s));
738 unsigned int i, n;
739 bool err;
741 *s = tcg_init_ctx;
743 /* Relink mem_base. */
744 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
745 if (tcg_init_ctx.temps[i].mem_base) {
746 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
747 tcg_debug_assert(b >= 0 && b < n);
748 s->temps[i].mem_base = &s->temps[b];
752 /* Claim an entry in tcg_ctxs */
753 n = atomic_fetch_inc(&n_tcg_ctxs);
754 g_assert(n < max_cpus);
755 atomic_set(&tcg_ctxs[n], s);
757 tcg_ctx = s;
758 qemu_mutex_lock(&region.lock);
759 err = tcg_region_initial_alloc__locked(tcg_ctx);
760 g_assert(!err);
761 qemu_mutex_unlock(&region.lock);
763 #endif /* !CONFIG_USER_ONLY */
766 * Returns the size (in bytes) of all translated code (i.e. from all regions)
767 * currently in the cache.
768 * See also: tcg_code_capacity()
769 * Do not confuse with tcg_current_code_size(); that one applies to a single
770 * TCG context.
772 size_t tcg_code_size(void)
774 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
775 unsigned int i;
776 size_t total;
778 qemu_mutex_lock(&region.lock);
779 total = region.agg_size_full;
780 for (i = 0; i < n_ctxs; i++) {
781 const TCGContext *s = atomic_read(&tcg_ctxs[i]);
782 size_t size;
784 size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
785 g_assert(size <= s->code_gen_buffer_size);
786 total += size;
788 qemu_mutex_unlock(&region.lock);
789 return total;
793 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
794 * regions.
795 * See also: tcg_code_size()
797 size_t tcg_code_capacity(void)
799 size_t guard_size, capacity;
801 /* no need for synchronization; these variables are set at init time */
802 guard_size = region.stride - region.size;
803 capacity = region.end + guard_size - region.start;
804 capacity -= region.n * (guard_size + TCG_HIGHWATER);
805 return capacity;
808 size_t tcg_tb_phys_invalidate_count(void)
810 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
811 unsigned int i;
812 size_t total = 0;
814 for (i = 0; i < n_ctxs; i++) {
815 const TCGContext *s = atomic_read(&tcg_ctxs[i]);
817 total += atomic_read(&s->tb_phys_invalidate_count);
819 return total;
822 /* pool based memory allocation */
823 void *tcg_malloc_internal(TCGContext *s, int size)
825 TCGPool *p;
826 int pool_size;
828 if (size > TCG_POOL_CHUNK_SIZE) {
829 /* big malloc: insert a new pool (XXX: could optimize) */
830 p = g_malloc(sizeof(TCGPool) + size);
831 p->size = size;
832 p->next = s->pool_first_large;
833 s->pool_first_large = p;
834 return p->data;
835 } else {
836 p = s->pool_current;
837 if (!p) {
838 p = s->pool_first;
839 if (!p)
840 goto new_pool;
841 } else {
842 if (!p->next) {
843 new_pool:
844 pool_size = TCG_POOL_CHUNK_SIZE;
845 p = g_malloc(sizeof(TCGPool) + pool_size);
846 p->size = pool_size;
847 p->next = NULL;
848 if (s->pool_current)
849 s->pool_current->next = p;
850 else
851 s->pool_first = p;
852 } else {
853 p = p->next;
857 s->pool_current = p;
858 s->pool_cur = p->data + size;
859 s->pool_end = p->data + p->size;
860 return p->data;
863 void tcg_pool_reset(TCGContext *s)
865 TCGPool *p, *t;
866 for (p = s->pool_first_large; p; p = t) {
867 t = p->next;
868 g_free(p);
870 s->pool_first_large = NULL;
871 s->pool_cur = s->pool_end = NULL;
872 s->pool_current = NULL;
875 typedef struct TCGHelperInfo {
876 void *func;
877 const char *name;
878 unsigned flags;
879 unsigned sizemask;
880 } TCGHelperInfo;
882 #include "exec/helper-proto.h"
884 static const TCGHelperInfo all_helpers[] = {
885 #include "exec/helper-tcg.h"
887 static GHashTable *helper_table;
889 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
890 static void process_op_defs(TCGContext *s);
891 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
892 TCGReg reg, const char *name);
894 void tcg_context_init(TCGContext *s)
896 int op, total_args, n, i;
897 TCGOpDef *def;
898 TCGArgConstraint *args_ct;
899 int *sorted_args;
900 TCGTemp *ts;
902 memset(s, 0, sizeof(*s));
903 s->nb_globals = 0;
905 /* Count total number of arguments and allocate the corresponding
906 space */
907 total_args = 0;
908 for(op = 0; op < NB_OPS; op++) {
909 def = &tcg_op_defs[op];
910 n = def->nb_iargs + def->nb_oargs;
911 total_args += n;
914 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
915 sorted_args = g_malloc(sizeof(int) * total_args);
917 for(op = 0; op < NB_OPS; op++) {
918 def = &tcg_op_defs[op];
919 def->args_ct = args_ct;
920 def->sorted_args = sorted_args;
921 n = def->nb_iargs + def->nb_oargs;
922 sorted_args += n;
923 args_ct += n;
926 /* Register helpers. */
927 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
928 helper_table = g_hash_table_new(NULL, NULL);
930 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
931 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
932 (gpointer)&all_helpers[i]);
935 tcg_target_init(s);
936 process_op_defs(s);
938 /* Reverse the order of the saved registers, assuming they're all at
939 the start of tcg_target_reg_alloc_order. */
940 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
941 int r = tcg_target_reg_alloc_order[n];
942 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
943 break;
946 for (i = 0; i < n; ++i) {
947 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
949 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
950 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
953 tcg_ctx = s;
955 * In user-mode we simply share the init context among threads, since we
956 * use a single region. See the documentation tcg_region_init() for the
957 * reasoning behind this.
958 * In softmmu we will have at most max_cpus TCG threads.
960 #ifdef CONFIG_USER_ONLY
961 tcg_ctxs = &tcg_ctx;
962 n_tcg_ctxs = 1;
963 #else
964 tcg_ctxs = g_new(TCGContext *, max_cpus);
965 #endif
967 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
968 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
969 cpu_env = temp_tcgv_ptr(ts);
973 * Allocate TBs right before their corresponding translated code, making
974 * sure that TBs and code are on different cache lines.
976 TranslationBlock *tcg_tb_alloc(TCGContext *s)
978 uintptr_t align = qemu_icache_linesize;
979 TranslationBlock *tb;
980 void *next;
982 retry:
983 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
984 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
986 if (unlikely(next > s->code_gen_highwater)) {
987 if (tcg_region_alloc(s)) {
988 return NULL;
990 goto retry;
992 atomic_set(&s->code_gen_ptr, next);
993 s->data_gen_ptr = NULL;
994 return tb;
997 void tcg_prologue_init(TCGContext *s)
999 size_t prologue_size, total_size;
1000 void *buf0, *buf1;
1002 /* Put the prologue at the beginning of code_gen_buffer. */
1003 buf0 = s->code_gen_buffer;
1004 total_size = s->code_gen_buffer_size;
1005 s->code_ptr = buf0;
1006 s->code_buf = buf0;
1007 s->data_gen_ptr = NULL;
1008 s->code_gen_prologue = buf0;
1010 /* Compute a high-water mark, at which we voluntarily flush the buffer
1011 and start over. The size here is arbitrary, significantly larger
1012 than we expect the code generation for any one opcode to require. */
1013 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1015 #ifdef TCG_TARGET_NEED_POOL_LABELS
1016 s->pool_labels = NULL;
1017 #endif
1019 /* Generate the prologue. */
1020 tcg_target_qemu_prologue(s);
1022 #ifdef TCG_TARGET_NEED_POOL_LABELS
1023 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1025 bool ok = tcg_out_pool_finalize(s);
1026 tcg_debug_assert(ok);
1028 #endif
1030 buf1 = s->code_ptr;
1031 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1033 /* Deduct the prologue from the buffer. */
1034 prologue_size = tcg_current_code_size(s);
1035 s->code_gen_ptr = buf1;
1036 s->code_gen_buffer = buf1;
1037 s->code_buf = buf1;
1038 total_size -= prologue_size;
1039 s->code_gen_buffer_size = total_size;
1041 tcg_register_jit(s->code_gen_buffer, total_size);
1043 #ifdef DEBUG_DISAS
1044 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1045 qemu_log_lock();
1046 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1047 if (s->data_gen_ptr) {
1048 size_t code_size = s->data_gen_ptr - buf0;
1049 size_t data_size = prologue_size - code_size;
1050 size_t i;
1052 log_disas(buf0, code_size);
1054 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1055 if (sizeof(tcg_target_ulong) == 8) {
1056 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1057 (uintptr_t)s->data_gen_ptr + i,
1058 *(uint64_t *)(s->data_gen_ptr + i));
1059 } else {
1060 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
1061 (uintptr_t)s->data_gen_ptr + i,
1062 *(uint32_t *)(s->data_gen_ptr + i));
1065 } else {
1066 log_disas(buf0, prologue_size);
1068 qemu_log("\n");
1069 qemu_log_flush();
1070 qemu_log_unlock();
1072 #endif
1074 /* Assert that goto_ptr is implemented completely. */
1075 if (TCG_TARGET_HAS_goto_ptr) {
1076 tcg_debug_assert(s->code_gen_epilogue != NULL);
1080 void tcg_func_start(TCGContext *s)
1082 tcg_pool_reset(s);
1083 s->nb_temps = s->nb_globals;
1085 /* No temps have been previously allocated for size or locality. */
1086 memset(s->free_temps, 0, sizeof(s->free_temps));
1088 s->nb_ops = 0;
1089 s->nb_labels = 0;
1090 s->current_frame_offset = s->frame_start;
1092 #ifdef CONFIG_DEBUG_TCG
1093 s->goto_tb_issue_mask = 0;
1094 #endif
1096 QTAILQ_INIT(&s->ops);
1097 QTAILQ_INIT(&s->free_ops);
1098 #ifdef CONFIG_DEBUG_TCG
1099 QSIMPLEQ_INIT(&s->labels);
1100 #endif
1103 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1105 int n = s->nb_temps++;
1106 tcg_debug_assert(n < TCG_MAX_TEMPS);
1107 return memset(&s->temps[n], 0, sizeof(TCGTemp));
1110 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1112 TCGTemp *ts;
1114 tcg_debug_assert(s->nb_globals == s->nb_temps);
1115 s->nb_globals++;
1116 ts = tcg_temp_alloc(s);
1117 ts->temp_global = 1;
1119 return ts;
1122 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1123 TCGReg reg, const char *name)
1125 TCGTemp *ts;
1127 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1128 tcg_abort();
1131 ts = tcg_global_alloc(s);
1132 ts->base_type = type;
1133 ts->type = type;
1134 ts->fixed_reg = 1;
1135 ts->reg = reg;
1136 ts->name = name;
1137 tcg_regset_set_reg(s->reserved_regs, reg);
1139 return ts;
1142 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1144 s->frame_start = start;
1145 s->frame_end = start + size;
1146 s->frame_temp
1147 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1150 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1151 intptr_t offset, const char *name)
1153 TCGContext *s = tcg_ctx;
1154 TCGTemp *base_ts = tcgv_ptr_temp(base);
1155 TCGTemp *ts = tcg_global_alloc(s);
1156 int indirect_reg = 0, bigendian = 0;
1157 #ifdef HOST_WORDS_BIGENDIAN
1158 bigendian = 1;
1159 #endif
1161 if (!base_ts->fixed_reg) {
1162 /* We do not support double-indirect registers. */
1163 tcg_debug_assert(!base_ts->indirect_reg);
1164 base_ts->indirect_base = 1;
1165 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1166 ? 2 : 1);
1167 indirect_reg = 1;
1170 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1171 TCGTemp *ts2 = tcg_global_alloc(s);
1172 char buf[64];
1174 ts->base_type = TCG_TYPE_I64;
1175 ts->type = TCG_TYPE_I32;
1176 ts->indirect_reg = indirect_reg;
1177 ts->mem_allocated = 1;
1178 ts->mem_base = base_ts;
1179 ts->mem_offset = offset + bigendian * 4;
1180 pstrcpy(buf, sizeof(buf), name);
1181 pstrcat(buf, sizeof(buf), "_0");
1182 ts->name = strdup(buf);
1184 tcg_debug_assert(ts2 == ts + 1);
1185 ts2->base_type = TCG_TYPE_I64;
1186 ts2->type = TCG_TYPE_I32;
1187 ts2->indirect_reg = indirect_reg;
1188 ts2->mem_allocated = 1;
1189 ts2->mem_base = base_ts;
1190 ts2->mem_offset = offset + (1 - bigendian) * 4;
1191 pstrcpy(buf, sizeof(buf), name);
1192 pstrcat(buf, sizeof(buf), "_1");
1193 ts2->name = strdup(buf);
1194 } else {
1195 ts->base_type = type;
1196 ts->type = type;
1197 ts->indirect_reg = indirect_reg;
1198 ts->mem_allocated = 1;
1199 ts->mem_base = base_ts;
1200 ts->mem_offset = offset;
1201 ts->name = name;
1203 return ts;
1206 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1208 TCGContext *s = tcg_ctx;
1209 TCGTemp *ts;
1210 int idx, k;
1212 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1213 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1214 if (idx < TCG_MAX_TEMPS) {
1215 /* There is already an available temp with the right type. */
1216 clear_bit(idx, s->free_temps[k].l);
1218 ts = &s->temps[idx];
1219 ts->temp_allocated = 1;
1220 tcg_debug_assert(ts->base_type == type);
1221 tcg_debug_assert(ts->temp_local == temp_local);
1222 } else {
1223 ts = tcg_temp_alloc(s);
1224 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1225 TCGTemp *ts2 = tcg_temp_alloc(s);
1227 ts->base_type = type;
1228 ts->type = TCG_TYPE_I32;
1229 ts->temp_allocated = 1;
1230 ts->temp_local = temp_local;
1232 tcg_debug_assert(ts2 == ts + 1);
1233 ts2->base_type = TCG_TYPE_I64;
1234 ts2->type = TCG_TYPE_I32;
1235 ts2->temp_allocated = 1;
1236 ts2->temp_local = temp_local;
1237 } else {
1238 ts->base_type = type;
1239 ts->type = type;
1240 ts->temp_allocated = 1;
1241 ts->temp_local = temp_local;
1245 #if defined(CONFIG_DEBUG_TCG)
1246 s->temps_in_use++;
1247 #endif
1248 return ts;
1251 TCGv_vec tcg_temp_new_vec(TCGType type)
1253 TCGTemp *t;
1255 #ifdef CONFIG_DEBUG_TCG
1256 switch (type) {
1257 case TCG_TYPE_V64:
1258 assert(TCG_TARGET_HAS_v64);
1259 break;
1260 case TCG_TYPE_V128:
1261 assert(TCG_TARGET_HAS_v128);
1262 break;
1263 case TCG_TYPE_V256:
1264 assert(TCG_TARGET_HAS_v256);
1265 break;
1266 default:
1267 g_assert_not_reached();
1269 #endif
1271 t = tcg_temp_new_internal(type, 0);
1272 return temp_tcgv_vec(t);
1275 /* Create a new temp of the same type as an existing temp. */
1276 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1278 TCGTemp *t = tcgv_vec_temp(match);
1280 tcg_debug_assert(t->temp_allocated != 0);
1282 t = tcg_temp_new_internal(t->base_type, 0);
1283 return temp_tcgv_vec(t);
1286 void tcg_temp_free_internal(TCGTemp *ts)
1288 TCGContext *s = tcg_ctx;
1289 int k, idx;
1291 #if defined(CONFIG_DEBUG_TCG)
1292 s->temps_in_use--;
1293 if (s->temps_in_use < 0) {
1294 fprintf(stderr, "More temporaries freed than allocated!\n");
1296 #endif
1298 tcg_debug_assert(ts->temp_global == 0);
1299 tcg_debug_assert(ts->temp_allocated != 0);
1300 ts->temp_allocated = 0;
1302 idx = temp_idx(ts);
1303 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1304 set_bit(idx, s->free_temps[k].l);
1307 TCGv_i32 tcg_const_i32(int32_t val)
1309 TCGv_i32 t0;
1310 t0 = tcg_temp_new_i32();
1311 tcg_gen_movi_i32(t0, val);
1312 return t0;
1315 TCGv_i64 tcg_const_i64(int64_t val)
1317 TCGv_i64 t0;
1318 t0 = tcg_temp_new_i64();
1319 tcg_gen_movi_i64(t0, val);
1320 return t0;
1323 TCGv_i32 tcg_const_local_i32(int32_t val)
1325 TCGv_i32 t0;
1326 t0 = tcg_temp_local_new_i32();
1327 tcg_gen_movi_i32(t0, val);
1328 return t0;
1331 TCGv_i64 tcg_const_local_i64(int64_t val)
1333 TCGv_i64 t0;
1334 t0 = tcg_temp_local_new_i64();
1335 tcg_gen_movi_i64(t0, val);
1336 return t0;
1339 #if defined(CONFIG_DEBUG_TCG)
1340 void tcg_clear_temp_count(void)
1342 TCGContext *s = tcg_ctx;
1343 s->temps_in_use = 0;
1346 int tcg_check_temp_count(void)
1348 TCGContext *s = tcg_ctx;
1349 if (s->temps_in_use) {
1350 /* Clear the count so that we don't give another
1351 * warning immediately next time around.
1353 s->temps_in_use = 0;
1354 return 1;
1356 return 0;
1358 #endif
1360 /* Return true if OP may appear in the opcode stream.
1361 Test the runtime variable that controls each opcode. */
1362 bool tcg_op_supported(TCGOpcode op)
1364 const bool have_vec
1365 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1367 switch (op) {
1368 case INDEX_op_discard:
1369 case INDEX_op_set_label:
1370 case INDEX_op_call:
1371 case INDEX_op_br:
1372 case INDEX_op_mb:
1373 case INDEX_op_insn_start:
1374 case INDEX_op_exit_tb:
1375 case INDEX_op_goto_tb:
1376 case INDEX_op_qemu_ld_i32:
1377 case INDEX_op_qemu_st_i32:
1378 case INDEX_op_qemu_ld_i64:
1379 case INDEX_op_qemu_st_i64:
1380 return true;
1382 case INDEX_op_goto_ptr:
1383 return TCG_TARGET_HAS_goto_ptr;
1385 case INDEX_op_mov_i32:
1386 case INDEX_op_movi_i32:
1387 case INDEX_op_setcond_i32:
1388 case INDEX_op_brcond_i32:
1389 case INDEX_op_ld8u_i32:
1390 case INDEX_op_ld8s_i32:
1391 case INDEX_op_ld16u_i32:
1392 case INDEX_op_ld16s_i32:
1393 case INDEX_op_ld_i32:
1394 case INDEX_op_st8_i32:
1395 case INDEX_op_st16_i32:
1396 case INDEX_op_st_i32:
1397 case INDEX_op_add_i32:
1398 case INDEX_op_sub_i32:
1399 case INDEX_op_mul_i32:
1400 case INDEX_op_and_i32:
1401 case INDEX_op_or_i32:
1402 case INDEX_op_xor_i32:
1403 case INDEX_op_shl_i32:
1404 case INDEX_op_shr_i32:
1405 case INDEX_op_sar_i32:
1406 return true;
1408 case INDEX_op_movcond_i32:
1409 return TCG_TARGET_HAS_movcond_i32;
1410 case INDEX_op_div_i32:
1411 case INDEX_op_divu_i32:
1412 return TCG_TARGET_HAS_div_i32;
1413 case INDEX_op_rem_i32:
1414 case INDEX_op_remu_i32:
1415 return TCG_TARGET_HAS_rem_i32;
1416 case INDEX_op_div2_i32:
1417 case INDEX_op_divu2_i32:
1418 return TCG_TARGET_HAS_div2_i32;
1419 case INDEX_op_rotl_i32:
1420 case INDEX_op_rotr_i32:
1421 return TCG_TARGET_HAS_rot_i32;
1422 case INDEX_op_deposit_i32:
1423 return TCG_TARGET_HAS_deposit_i32;
1424 case INDEX_op_extract_i32:
1425 return TCG_TARGET_HAS_extract_i32;
1426 case INDEX_op_sextract_i32:
1427 return TCG_TARGET_HAS_sextract_i32;
1428 case INDEX_op_add2_i32:
1429 return TCG_TARGET_HAS_add2_i32;
1430 case INDEX_op_sub2_i32:
1431 return TCG_TARGET_HAS_sub2_i32;
1432 case INDEX_op_mulu2_i32:
1433 return TCG_TARGET_HAS_mulu2_i32;
1434 case INDEX_op_muls2_i32:
1435 return TCG_TARGET_HAS_muls2_i32;
1436 case INDEX_op_muluh_i32:
1437 return TCG_TARGET_HAS_muluh_i32;
1438 case INDEX_op_mulsh_i32:
1439 return TCG_TARGET_HAS_mulsh_i32;
1440 case INDEX_op_ext8s_i32:
1441 return TCG_TARGET_HAS_ext8s_i32;
1442 case INDEX_op_ext16s_i32:
1443 return TCG_TARGET_HAS_ext16s_i32;
1444 case INDEX_op_ext8u_i32:
1445 return TCG_TARGET_HAS_ext8u_i32;
1446 case INDEX_op_ext16u_i32:
1447 return TCG_TARGET_HAS_ext16u_i32;
1448 case INDEX_op_bswap16_i32:
1449 return TCG_TARGET_HAS_bswap16_i32;
1450 case INDEX_op_bswap32_i32:
1451 return TCG_TARGET_HAS_bswap32_i32;
1452 case INDEX_op_not_i32:
1453 return TCG_TARGET_HAS_not_i32;
1454 case INDEX_op_neg_i32:
1455 return TCG_TARGET_HAS_neg_i32;
1456 case INDEX_op_andc_i32:
1457 return TCG_TARGET_HAS_andc_i32;
1458 case INDEX_op_orc_i32:
1459 return TCG_TARGET_HAS_orc_i32;
1460 case INDEX_op_eqv_i32:
1461 return TCG_TARGET_HAS_eqv_i32;
1462 case INDEX_op_nand_i32:
1463 return TCG_TARGET_HAS_nand_i32;
1464 case INDEX_op_nor_i32:
1465 return TCG_TARGET_HAS_nor_i32;
1466 case INDEX_op_clz_i32:
1467 return TCG_TARGET_HAS_clz_i32;
1468 case INDEX_op_ctz_i32:
1469 return TCG_TARGET_HAS_ctz_i32;
1470 case INDEX_op_ctpop_i32:
1471 return TCG_TARGET_HAS_ctpop_i32;
1473 case INDEX_op_brcond2_i32:
1474 case INDEX_op_setcond2_i32:
1475 return TCG_TARGET_REG_BITS == 32;
1477 case INDEX_op_mov_i64:
1478 case INDEX_op_movi_i64:
1479 case INDEX_op_setcond_i64:
1480 case INDEX_op_brcond_i64:
1481 case INDEX_op_ld8u_i64:
1482 case INDEX_op_ld8s_i64:
1483 case INDEX_op_ld16u_i64:
1484 case INDEX_op_ld16s_i64:
1485 case INDEX_op_ld32u_i64:
1486 case INDEX_op_ld32s_i64:
1487 case INDEX_op_ld_i64:
1488 case INDEX_op_st8_i64:
1489 case INDEX_op_st16_i64:
1490 case INDEX_op_st32_i64:
1491 case INDEX_op_st_i64:
1492 case INDEX_op_add_i64:
1493 case INDEX_op_sub_i64:
1494 case INDEX_op_mul_i64:
1495 case INDEX_op_and_i64:
1496 case INDEX_op_or_i64:
1497 case INDEX_op_xor_i64:
1498 case INDEX_op_shl_i64:
1499 case INDEX_op_shr_i64:
1500 case INDEX_op_sar_i64:
1501 case INDEX_op_ext_i32_i64:
1502 case INDEX_op_extu_i32_i64:
1503 return TCG_TARGET_REG_BITS == 64;
1505 case INDEX_op_movcond_i64:
1506 return TCG_TARGET_HAS_movcond_i64;
1507 case INDEX_op_div_i64:
1508 case INDEX_op_divu_i64:
1509 return TCG_TARGET_HAS_div_i64;
1510 case INDEX_op_rem_i64:
1511 case INDEX_op_remu_i64:
1512 return TCG_TARGET_HAS_rem_i64;
1513 case INDEX_op_div2_i64:
1514 case INDEX_op_divu2_i64:
1515 return TCG_TARGET_HAS_div2_i64;
1516 case INDEX_op_rotl_i64:
1517 case INDEX_op_rotr_i64:
1518 return TCG_TARGET_HAS_rot_i64;
1519 case INDEX_op_deposit_i64:
1520 return TCG_TARGET_HAS_deposit_i64;
1521 case INDEX_op_extract_i64:
1522 return TCG_TARGET_HAS_extract_i64;
1523 case INDEX_op_sextract_i64:
1524 return TCG_TARGET_HAS_sextract_i64;
1525 case INDEX_op_extrl_i64_i32:
1526 return TCG_TARGET_HAS_extrl_i64_i32;
1527 case INDEX_op_extrh_i64_i32:
1528 return TCG_TARGET_HAS_extrh_i64_i32;
1529 case INDEX_op_ext8s_i64:
1530 return TCG_TARGET_HAS_ext8s_i64;
1531 case INDEX_op_ext16s_i64:
1532 return TCG_TARGET_HAS_ext16s_i64;
1533 case INDEX_op_ext32s_i64:
1534 return TCG_TARGET_HAS_ext32s_i64;
1535 case INDEX_op_ext8u_i64:
1536 return TCG_TARGET_HAS_ext8u_i64;
1537 case INDEX_op_ext16u_i64:
1538 return TCG_TARGET_HAS_ext16u_i64;
1539 case INDEX_op_ext32u_i64:
1540 return TCG_TARGET_HAS_ext32u_i64;
1541 case INDEX_op_bswap16_i64:
1542 return TCG_TARGET_HAS_bswap16_i64;
1543 case INDEX_op_bswap32_i64:
1544 return TCG_TARGET_HAS_bswap32_i64;
1545 case INDEX_op_bswap64_i64:
1546 return TCG_TARGET_HAS_bswap64_i64;
1547 case INDEX_op_not_i64:
1548 return TCG_TARGET_HAS_not_i64;
1549 case INDEX_op_neg_i64:
1550 return TCG_TARGET_HAS_neg_i64;
1551 case INDEX_op_andc_i64:
1552 return TCG_TARGET_HAS_andc_i64;
1553 case INDEX_op_orc_i64:
1554 return TCG_TARGET_HAS_orc_i64;
1555 case INDEX_op_eqv_i64:
1556 return TCG_TARGET_HAS_eqv_i64;
1557 case INDEX_op_nand_i64:
1558 return TCG_TARGET_HAS_nand_i64;
1559 case INDEX_op_nor_i64:
1560 return TCG_TARGET_HAS_nor_i64;
1561 case INDEX_op_clz_i64:
1562 return TCG_TARGET_HAS_clz_i64;
1563 case INDEX_op_ctz_i64:
1564 return TCG_TARGET_HAS_ctz_i64;
1565 case INDEX_op_ctpop_i64:
1566 return TCG_TARGET_HAS_ctpop_i64;
1567 case INDEX_op_add2_i64:
1568 return TCG_TARGET_HAS_add2_i64;
1569 case INDEX_op_sub2_i64:
1570 return TCG_TARGET_HAS_sub2_i64;
1571 case INDEX_op_mulu2_i64:
1572 return TCG_TARGET_HAS_mulu2_i64;
1573 case INDEX_op_muls2_i64:
1574 return TCG_TARGET_HAS_muls2_i64;
1575 case INDEX_op_muluh_i64:
1576 return TCG_TARGET_HAS_muluh_i64;
1577 case INDEX_op_mulsh_i64:
1578 return TCG_TARGET_HAS_mulsh_i64;
1580 case INDEX_op_mov_vec:
1581 case INDEX_op_dup_vec:
1582 case INDEX_op_dupi_vec:
1583 case INDEX_op_ld_vec:
1584 case INDEX_op_st_vec:
1585 case INDEX_op_add_vec:
1586 case INDEX_op_sub_vec:
1587 case INDEX_op_and_vec:
1588 case INDEX_op_or_vec:
1589 case INDEX_op_xor_vec:
1590 case INDEX_op_cmp_vec:
1591 return have_vec;
1592 case INDEX_op_dup2_vec:
1593 return have_vec && TCG_TARGET_REG_BITS == 32;
1594 case INDEX_op_not_vec:
1595 return have_vec && TCG_TARGET_HAS_not_vec;
1596 case INDEX_op_neg_vec:
1597 return have_vec && TCG_TARGET_HAS_neg_vec;
1598 case INDEX_op_andc_vec:
1599 return have_vec && TCG_TARGET_HAS_andc_vec;
1600 case INDEX_op_orc_vec:
1601 return have_vec && TCG_TARGET_HAS_orc_vec;
1602 case INDEX_op_mul_vec:
1603 return have_vec && TCG_TARGET_HAS_mul_vec;
1604 case INDEX_op_shli_vec:
1605 case INDEX_op_shri_vec:
1606 case INDEX_op_sari_vec:
1607 return have_vec && TCG_TARGET_HAS_shi_vec;
1608 case INDEX_op_shls_vec:
1609 case INDEX_op_shrs_vec:
1610 case INDEX_op_sars_vec:
1611 return have_vec && TCG_TARGET_HAS_shs_vec;
1612 case INDEX_op_shlv_vec:
1613 case INDEX_op_shrv_vec:
1614 case INDEX_op_sarv_vec:
1615 return have_vec && TCG_TARGET_HAS_shv_vec;
1616 case INDEX_op_ssadd_vec:
1617 case INDEX_op_usadd_vec:
1618 case INDEX_op_sssub_vec:
1619 case INDEX_op_ussub_vec:
1620 return have_vec && TCG_TARGET_HAS_sat_vec;
1621 case INDEX_op_smin_vec:
1622 case INDEX_op_umin_vec:
1623 case INDEX_op_smax_vec:
1624 case INDEX_op_umax_vec:
1625 return have_vec && TCG_TARGET_HAS_minmax_vec;
1627 default:
1628 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1629 return true;
1633 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1634 and endian swap. Maybe it would be better to do the alignment
1635 and endian swap in tcg_reg_alloc_call(). */
1636 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1638 int i, real_args, nb_rets, pi;
1639 unsigned sizemask, flags;
1640 TCGHelperInfo *info;
1641 TCGOp *op;
1643 info = g_hash_table_lookup(helper_table, (gpointer)func);
1644 flags = info->flags;
1645 sizemask = info->sizemask;
1647 #if defined(__sparc__) && !defined(__arch64__) \
1648 && !defined(CONFIG_TCG_INTERPRETER)
1649 /* We have 64-bit values in one register, but need to pass as two
1650 separate parameters. Split them. */
1651 int orig_sizemask = sizemask;
1652 int orig_nargs = nargs;
1653 TCGv_i64 retl, reth;
1654 TCGTemp *split_args[MAX_OPC_PARAM];
1656 retl = NULL;
1657 reth = NULL;
1658 if (sizemask != 0) {
1659 for (i = real_args = 0; i < nargs; ++i) {
1660 int is_64bit = sizemask & (1 << (i+1)*2);
1661 if (is_64bit) {
1662 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1663 TCGv_i32 h = tcg_temp_new_i32();
1664 TCGv_i32 l = tcg_temp_new_i32();
1665 tcg_gen_extr_i64_i32(l, h, orig);
1666 split_args[real_args++] = tcgv_i32_temp(h);
1667 split_args[real_args++] = tcgv_i32_temp(l);
1668 } else {
1669 split_args[real_args++] = args[i];
1672 nargs = real_args;
1673 args = split_args;
1674 sizemask = 0;
1676 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1677 for (i = 0; i < nargs; ++i) {
1678 int is_64bit = sizemask & (1 << (i+1)*2);
1679 int is_signed = sizemask & (2 << (i+1)*2);
1680 if (!is_64bit) {
1681 TCGv_i64 temp = tcg_temp_new_i64();
1682 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1683 if (is_signed) {
1684 tcg_gen_ext32s_i64(temp, orig);
1685 } else {
1686 tcg_gen_ext32u_i64(temp, orig);
1688 args[i] = tcgv_i64_temp(temp);
1691 #endif /* TCG_TARGET_EXTEND_ARGS */
1693 op = tcg_emit_op(INDEX_op_call);
1695 pi = 0;
1696 if (ret != NULL) {
1697 #if defined(__sparc__) && !defined(__arch64__) \
1698 && !defined(CONFIG_TCG_INTERPRETER)
1699 if (orig_sizemask & 1) {
1700 /* The 32-bit ABI is going to return the 64-bit value in
1701 the %o0/%o1 register pair. Prepare for this by using
1702 two return temporaries, and reassemble below. */
1703 retl = tcg_temp_new_i64();
1704 reth = tcg_temp_new_i64();
1705 op->args[pi++] = tcgv_i64_arg(reth);
1706 op->args[pi++] = tcgv_i64_arg(retl);
1707 nb_rets = 2;
1708 } else {
1709 op->args[pi++] = temp_arg(ret);
1710 nb_rets = 1;
1712 #else
1713 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1714 #ifdef HOST_WORDS_BIGENDIAN
1715 op->args[pi++] = temp_arg(ret + 1);
1716 op->args[pi++] = temp_arg(ret);
1717 #else
1718 op->args[pi++] = temp_arg(ret);
1719 op->args[pi++] = temp_arg(ret + 1);
1720 #endif
1721 nb_rets = 2;
1722 } else {
1723 op->args[pi++] = temp_arg(ret);
1724 nb_rets = 1;
1726 #endif
1727 } else {
1728 nb_rets = 0;
1730 TCGOP_CALLO(op) = nb_rets;
1732 real_args = 0;
1733 for (i = 0; i < nargs; i++) {
1734 int is_64bit = sizemask & (1 << (i+1)*2);
1735 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1736 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1737 /* some targets want aligned 64 bit args */
1738 if (real_args & 1) {
1739 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1740 real_args++;
1742 #endif
1743 /* If stack grows up, then we will be placing successive
1744 arguments at lower addresses, which means we need to
1745 reverse the order compared to how we would normally
1746 treat either big or little-endian. For those arguments
1747 that will wind up in registers, this still works for
1748 HPPA (the only current STACK_GROWSUP target) since the
1749 argument registers are *also* allocated in decreasing
1750 order. If another such target is added, this logic may
1751 have to get more complicated to differentiate between
1752 stack arguments and register arguments. */
1753 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1754 op->args[pi++] = temp_arg(args[i] + 1);
1755 op->args[pi++] = temp_arg(args[i]);
1756 #else
1757 op->args[pi++] = temp_arg(args[i]);
1758 op->args[pi++] = temp_arg(args[i] + 1);
1759 #endif
1760 real_args += 2;
1761 continue;
1764 op->args[pi++] = temp_arg(args[i]);
1765 real_args++;
1767 op->args[pi++] = (uintptr_t)func;
1768 op->args[pi++] = flags;
1769 TCGOP_CALLI(op) = real_args;
1771 /* Make sure the fields didn't overflow. */
1772 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1773 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1775 #if defined(__sparc__) && !defined(__arch64__) \
1776 && !defined(CONFIG_TCG_INTERPRETER)
1777 /* Free all of the parts we allocated above. */
1778 for (i = real_args = 0; i < orig_nargs; ++i) {
1779 int is_64bit = orig_sizemask & (1 << (i+1)*2);
1780 if (is_64bit) {
1781 tcg_temp_free_internal(args[real_args++]);
1782 tcg_temp_free_internal(args[real_args++]);
1783 } else {
1784 real_args++;
1787 if (orig_sizemask & 1) {
1788 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1789 Note that describing these as TCGv_i64 eliminates an unnecessary
1790 zero-extension that tcg_gen_concat_i32_i64 would create. */
1791 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1792 tcg_temp_free_i64(retl);
1793 tcg_temp_free_i64(reth);
1795 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1796 for (i = 0; i < nargs; ++i) {
1797 int is_64bit = sizemask & (1 << (i+1)*2);
1798 if (!is_64bit) {
1799 tcg_temp_free_internal(args[i]);
1802 #endif /* TCG_TARGET_EXTEND_ARGS */
1805 static void tcg_reg_alloc_start(TCGContext *s)
1807 int i, n;
1808 TCGTemp *ts;
1810 for (i = 0, n = s->nb_globals; i < n; i++) {
1811 ts = &s->temps[i];
1812 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1814 for (n = s->nb_temps; i < n; i++) {
1815 ts = &s->temps[i];
1816 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1817 ts->mem_allocated = 0;
1818 ts->fixed_reg = 0;
1821 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1824 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1825 TCGTemp *ts)
1827 int idx = temp_idx(ts);
1829 if (ts->temp_global) {
1830 pstrcpy(buf, buf_size, ts->name);
1831 } else if (ts->temp_local) {
1832 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1833 } else {
1834 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1836 return buf;
1839 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1840 int buf_size, TCGArg arg)
1842 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1845 /* Find helper name. */
1846 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1848 const char *ret = NULL;
1849 if (helper_table) {
1850 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1851 if (info) {
1852 ret = info->name;
1855 return ret;
1858 static const char * const cond_name[] =
1860 [TCG_COND_NEVER] = "never",
1861 [TCG_COND_ALWAYS] = "always",
1862 [TCG_COND_EQ] = "eq",
1863 [TCG_COND_NE] = "ne",
1864 [TCG_COND_LT] = "lt",
1865 [TCG_COND_GE] = "ge",
1866 [TCG_COND_LE] = "le",
1867 [TCG_COND_GT] = "gt",
1868 [TCG_COND_LTU] = "ltu",
1869 [TCG_COND_GEU] = "geu",
1870 [TCG_COND_LEU] = "leu",
1871 [TCG_COND_GTU] = "gtu"
1874 static const char * const ldst_name[] =
1876 [MO_UB] = "ub",
1877 [MO_SB] = "sb",
1878 [MO_LEUW] = "leuw",
1879 [MO_LESW] = "lesw",
1880 [MO_LEUL] = "leul",
1881 [MO_LESL] = "lesl",
1882 [MO_LEQ] = "leq",
1883 [MO_BEUW] = "beuw",
1884 [MO_BESW] = "besw",
1885 [MO_BEUL] = "beul",
1886 [MO_BESL] = "besl",
1887 [MO_BEQ] = "beq",
1890 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1891 #ifdef ALIGNED_ONLY
1892 [MO_UNALN >> MO_ASHIFT] = "un+",
1893 [MO_ALIGN >> MO_ASHIFT] = "",
1894 #else
1895 [MO_UNALN >> MO_ASHIFT] = "",
1896 [MO_ALIGN >> MO_ASHIFT] = "al+",
1897 #endif
1898 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
1899 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
1900 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
1901 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1902 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1903 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1906 static inline bool tcg_regset_single(TCGRegSet d)
1908 return (d & (d - 1)) == 0;
1911 static inline TCGReg tcg_regset_first(TCGRegSet d)
1913 if (TCG_TARGET_NB_REGS <= 32) {
1914 return ctz32(d);
1915 } else {
1916 return ctz64(d);
1920 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1922 char buf[128];
1923 TCGOp *op;
1925 QTAILQ_FOREACH(op, &s->ops, link) {
1926 int i, k, nb_oargs, nb_iargs, nb_cargs;
1927 const TCGOpDef *def;
1928 TCGOpcode c;
1929 int col = 0;
1931 c = op->opc;
1932 def = &tcg_op_defs[c];
1934 if (c == INDEX_op_insn_start) {
1935 nb_oargs = 0;
1936 col += qemu_log("\n ----");
1938 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1939 target_ulong a;
1940 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1941 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1942 #else
1943 a = op->args[i];
1944 #endif
1945 col += qemu_log(" " TARGET_FMT_lx, a);
1947 } else if (c == INDEX_op_call) {
1948 /* variable number of arguments */
1949 nb_oargs = TCGOP_CALLO(op);
1950 nb_iargs = TCGOP_CALLI(op);
1951 nb_cargs = def->nb_cargs;
1953 /* function name, flags, out args */
1954 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1955 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1956 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1957 for (i = 0; i < nb_oargs; i++) {
1958 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1959 op->args[i]));
1961 for (i = 0; i < nb_iargs; i++) {
1962 TCGArg arg = op->args[nb_oargs + i];
1963 const char *t = "<dummy>";
1964 if (arg != TCG_CALL_DUMMY_ARG) {
1965 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1967 col += qemu_log(",%s", t);
1969 } else {
1970 col += qemu_log(" %s ", def->name);
1972 nb_oargs = def->nb_oargs;
1973 nb_iargs = def->nb_iargs;
1974 nb_cargs = def->nb_cargs;
1976 if (def->flags & TCG_OPF_VECTOR) {
1977 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1978 8 << TCGOP_VECE(op));
1981 k = 0;
1982 for (i = 0; i < nb_oargs; i++) {
1983 if (k != 0) {
1984 col += qemu_log(",");
1986 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1987 op->args[k++]));
1989 for (i = 0; i < nb_iargs; i++) {
1990 if (k != 0) {
1991 col += qemu_log(",");
1993 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1994 op->args[k++]));
1996 switch (c) {
1997 case INDEX_op_brcond_i32:
1998 case INDEX_op_setcond_i32:
1999 case INDEX_op_movcond_i32:
2000 case INDEX_op_brcond2_i32:
2001 case INDEX_op_setcond2_i32:
2002 case INDEX_op_brcond_i64:
2003 case INDEX_op_setcond_i64:
2004 case INDEX_op_movcond_i64:
2005 case INDEX_op_cmp_vec:
2006 if (op->args[k] < ARRAY_SIZE(cond_name)
2007 && cond_name[op->args[k]]) {
2008 col += qemu_log(",%s", cond_name[op->args[k++]]);
2009 } else {
2010 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2012 i = 1;
2013 break;
2014 case INDEX_op_qemu_ld_i32:
2015 case INDEX_op_qemu_st_i32:
2016 case INDEX_op_qemu_ld_i64:
2017 case INDEX_op_qemu_st_i64:
2019 TCGMemOpIdx oi = op->args[k++];
2020 TCGMemOp op = get_memop(oi);
2021 unsigned ix = get_mmuidx(oi);
2023 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2024 col += qemu_log(",$0x%x,%u", op, ix);
2025 } else {
2026 const char *s_al, *s_op;
2027 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2028 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2029 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2031 i = 1;
2033 break;
2034 default:
2035 i = 0;
2036 break;
2038 switch (c) {
2039 case INDEX_op_set_label:
2040 case INDEX_op_br:
2041 case INDEX_op_brcond_i32:
2042 case INDEX_op_brcond_i64:
2043 case INDEX_op_brcond2_i32:
2044 col += qemu_log("%s$L%d", k ? "," : "",
2045 arg_label(op->args[k])->id);
2046 i++, k++;
2047 break;
2048 default:
2049 break;
2051 for (; i < nb_cargs; i++, k++) {
2052 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2056 if (have_prefs || op->life) {
2057 for (; col < 40; ++col) {
2058 putc(' ', qemu_logfile);
2062 if (op->life) {
2063 unsigned life = op->life;
2065 if (life & (SYNC_ARG * 3)) {
2066 qemu_log(" sync:");
2067 for (i = 0; i < 2; ++i) {
2068 if (life & (SYNC_ARG << i)) {
2069 qemu_log(" %d", i);
2073 life /= DEAD_ARG;
2074 if (life) {
2075 qemu_log(" dead:");
2076 for (i = 0; life; ++i, life >>= 1) {
2077 if (life & 1) {
2078 qemu_log(" %d", i);
2084 if (have_prefs) {
2085 for (i = 0; i < nb_oargs; ++i) {
2086 TCGRegSet set = op->output_pref[i];
2088 if (i == 0) {
2089 qemu_log(" pref=");
2090 } else {
2091 qemu_log(",");
2093 if (set == 0) {
2094 qemu_log("none");
2095 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2096 qemu_log("all");
2097 #ifdef CONFIG_DEBUG_TCG
2098 } else if (tcg_regset_single(set)) {
2099 TCGReg reg = tcg_regset_first(set);
2100 qemu_log("%s", tcg_target_reg_names[reg]);
2101 #endif
2102 } else if (TCG_TARGET_NB_REGS <= 32) {
2103 qemu_log("%#x", (uint32_t)set);
2104 } else {
2105 qemu_log("%#" PRIx64, (uint64_t)set);
2110 qemu_log("\n");
2114 /* we give more priority to constraints with less registers */
2115 static int get_constraint_priority(const TCGOpDef *def, int k)
2117 const TCGArgConstraint *arg_ct;
2119 int i, n;
2120 arg_ct = &def->args_ct[k];
2121 if (arg_ct->ct & TCG_CT_ALIAS) {
2122 /* an alias is equivalent to a single register */
2123 n = 1;
2124 } else {
2125 if (!(arg_ct->ct & TCG_CT_REG))
2126 return 0;
2127 n = 0;
2128 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2129 if (tcg_regset_test_reg(arg_ct->u.regs, i))
2130 n++;
2133 return TCG_TARGET_NB_REGS - n + 1;
2136 /* sort from highest priority to lowest */
2137 static void sort_constraints(TCGOpDef *def, int start, int n)
2139 int i, j, p1, p2, tmp;
2141 for(i = 0; i < n; i++)
2142 def->sorted_args[start + i] = start + i;
2143 if (n <= 1)
2144 return;
2145 for(i = 0; i < n - 1; i++) {
2146 for(j = i + 1; j < n; j++) {
2147 p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2148 p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2149 if (p1 < p2) {
2150 tmp = def->sorted_args[start + i];
2151 def->sorted_args[start + i] = def->sorted_args[start + j];
2152 def->sorted_args[start + j] = tmp;
2158 static void process_op_defs(TCGContext *s)
2160 TCGOpcode op;
2162 for (op = 0; op < NB_OPS; op++) {
2163 TCGOpDef *def = &tcg_op_defs[op];
2164 const TCGTargetOpDef *tdefs;
2165 TCGType type;
2166 int i, nb_args;
2168 if (def->flags & TCG_OPF_NOT_PRESENT) {
2169 continue;
2172 nb_args = def->nb_iargs + def->nb_oargs;
2173 if (nb_args == 0) {
2174 continue;
2177 tdefs = tcg_target_op_def(op);
2178 /* Missing TCGTargetOpDef entry. */
2179 tcg_debug_assert(tdefs != NULL);
2181 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2182 for (i = 0; i < nb_args; i++) {
2183 const char *ct_str = tdefs->args_ct_str[i];
2184 /* Incomplete TCGTargetOpDef entry. */
2185 tcg_debug_assert(ct_str != NULL);
2187 def->args_ct[i].u.regs = 0;
2188 def->args_ct[i].ct = 0;
2189 while (*ct_str != '\0') {
2190 switch(*ct_str) {
2191 case '0' ... '9':
2193 int oarg = *ct_str - '0';
2194 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2195 tcg_debug_assert(oarg < def->nb_oargs);
2196 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2197 /* TCG_CT_ALIAS is for the output arguments.
2198 The input is tagged with TCG_CT_IALIAS. */
2199 def->args_ct[i] = def->args_ct[oarg];
2200 def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2201 def->args_ct[oarg].alias_index = i;
2202 def->args_ct[i].ct |= TCG_CT_IALIAS;
2203 def->args_ct[i].alias_index = oarg;
2205 ct_str++;
2206 break;
2207 case '&':
2208 def->args_ct[i].ct |= TCG_CT_NEWREG;
2209 ct_str++;
2210 break;
2211 case 'i':
2212 def->args_ct[i].ct |= TCG_CT_CONST;
2213 ct_str++;
2214 break;
2215 default:
2216 ct_str = target_parse_constraint(&def->args_ct[i],
2217 ct_str, type);
2218 /* Typo in TCGTargetOpDef constraint. */
2219 tcg_debug_assert(ct_str != NULL);
2224 /* TCGTargetOpDef entry with too much information? */
2225 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2227 /* sort the constraints (XXX: this is just an heuristic) */
2228 sort_constraints(def, 0, def->nb_oargs);
2229 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2233 void tcg_op_remove(TCGContext *s, TCGOp *op)
2235 TCGLabel *label;
2237 switch (op->opc) {
2238 case INDEX_op_br:
2239 label = arg_label(op->args[0]);
2240 label->refs--;
2241 break;
2242 case INDEX_op_brcond_i32:
2243 case INDEX_op_brcond_i64:
2244 label = arg_label(op->args[3]);
2245 label->refs--;
2246 break;
2247 case INDEX_op_brcond2_i32:
2248 label = arg_label(op->args[5]);
2249 label->refs--;
2250 break;
2251 default:
2252 break;
2255 QTAILQ_REMOVE(&s->ops, op, link);
2256 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2257 s->nb_ops--;
2259 #ifdef CONFIG_PROFILER
2260 atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2261 #endif
2264 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2266 TCGContext *s = tcg_ctx;
2267 TCGOp *op;
2269 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2270 op = tcg_malloc(sizeof(TCGOp));
2271 } else {
2272 op = QTAILQ_FIRST(&s->free_ops);
2273 QTAILQ_REMOVE(&s->free_ops, op, link);
2275 memset(op, 0, offsetof(TCGOp, link));
2276 op->opc = opc;
2277 s->nb_ops++;
2279 return op;
2282 TCGOp *tcg_emit_op(TCGOpcode opc)
2284 TCGOp *op = tcg_op_alloc(opc);
2285 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2286 return op;
2289 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2291 TCGOp *new_op = tcg_op_alloc(opc);
2292 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2293 return new_op;
2296 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2298 TCGOp *new_op = tcg_op_alloc(opc);
2299 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2300 return new_op;
2303 /* Reachable analysis : remove unreachable code. */
2304 static void reachable_code_pass(TCGContext *s)
2306 TCGOp *op, *op_next;
2307 bool dead = false;
2309 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2310 bool remove = dead;
2311 TCGLabel *label;
2312 int call_flags;
2314 switch (op->opc) {
2315 case INDEX_op_set_label:
2316 label = arg_label(op->args[0]);
2317 if (label->refs == 0) {
2319 * While there is an occasional backward branch, virtually
2320 * all branches generated by the translators are forward.
2321 * Which means that generally we will have already removed
2322 * all references to the label that will be, and there is
2323 * little to be gained by iterating.
2325 remove = true;
2326 } else {
2327 /* Once we see a label, insns become live again. */
2328 dead = false;
2329 remove = false;
2332 * Optimization can fold conditional branches to unconditional.
2333 * If we find a label with one reference which is preceded by
2334 * an unconditional branch to it, remove both. This needed to
2335 * wait until the dead code in between them was removed.
2337 if (label->refs == 1) {
2338 TCGOp *op_prev = QTAILQ_PREV(op, link);
2339 if (op_prev->opc == INDEX_op_br &&
2340 label == arg_label(op_prev->args[0])) {
2341 tcg_op_remove(s, op_prev);
2342 remove = true;
2346 break;
2348 case INDEX_op_br:
2349 case INDEX_op_exit_tb:
2350 case INDEX_op_goto_ptr:
2351 /* Unconditional branches; everything following is dead. */
2352 dead = true;
2353 break;
2355 case INDEX_op_call:
2356 /* Notice noreturn helper calls, raising exceptions. */
2357 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2358 if (call_flags & TCG_CALL_NO_RETURN) {
2359 dead = true;
2361 break;
2363 case INDEX_op_insn_start:
2364 /* Never remove -- we need to keep these for unwind. */
2365 remove = false;
2366 break;
2368 default:
2369 break;
2372 if (remove) {
2373 tcg_op_remove(s, op);
2378 #define TS_DEAD 1
2379 #define TS_MEM 2
2381 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2382 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2384 /* For liveness_pass_1, the register preferences for a given temp. */
2385 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2387 return ts->state_ptr;
2390 /* For liveness_pass_1, reset the preferences for a given temp to the
2391 * maximal regset for its type.
2393 static inline void la_reset_pref(TCGTemp *ts)
2395 *la_temp_pref(ts)
2396 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2399 /* liveness analysis: end of function: all temps are dead, and globals
2400 should be in memory. */
2401 static void la_func_end(TCGContext *s, int ng, int nt)
2403 int i;
2405 for (i = 0; i < ng; ++i) {
2406 s->temps[i].state = TS_DEAD | TS_MEM;
2407 la_reset_pref(&s->temps[i]);
2409 for (i = ng; i < nt; ++i) {
2410 s->temps[i].state = TS_DEAD;
2411 la_reset_pref(&s->temps[i]);
2415 /* liveness analysis: end of basic block: all temps are dead, globals
2416 and local temps should be in memory. */
2417 static void la_bb_end(TCGContext *s, int ng, int nt)
2419 int i;
2421 for (i = 0; i < ng; ++i) {
2422 s->temps[i].state = TS_DEAD | TS_MEM;
2423 la_reset_pref(&s->temps[i]);
2425 for (i = ng; i < nt; ++i) {
2426 s->temps[i].state = (s->temps[i].temp_local
2427 ? TS_DEAD | TS_MEM
2428 : TS_DEAD);
2429 la_reset_pref(&s->temps[i]);
2433 /* liveness analysis: sync globals back to memory. */
2434 static void la_global_sync(TCGContext *s, int ng)
2436 int i;
2438 for (i = 0; i < ng; ++i) {
2439 int state = s->temps[i].state;
2440 s->temps[i].state = state | TS_MEM;
2441 if (state == TS_DEAD) {
2442 /* If the global was previously dead, reset prefs. */
2443 la_reset_pref(&s->temps[i]);
2448 /* liveness analysis: sync globals back to memory and kill. */
2449 static void la_global_kill(TCGContext *s, int ng)
2451 int i;
2453 for (i = 0; i < ng; i++) {
2454 s->temps[i].state = TS_DEAD | TS_MEM;
2455 la_reset_pref(&s->temps[i]);
2459 /* liveness analysis: note live globals crossing calls. */
2460 static void la_cross_call(TCGContext *s, int nt)
2462 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2463 int i;
2465 for (i = 0; i < nt; i++) {
2466 TCGTemp *ts = &s->temps[i];
2467 if (!(ts->state & TS_DEAD)) {
2468 TCGRegSet *pset = la_temp_pref(ts);
2469 TCGRegSet set = *pset;
2471 set &= mask;
2472 /* If the combination is not possible, restart. */
2473 if (set == 0) {
2474 set = tcg_target_available_regs[ts->type] & mask;
2476 *pset = set;
2481 /* Liveness analysis : update the opc_arg_life array to tell if a
2482 given input arguments is dead. Instructions updating dead
2483 temporaries are removed. */
2484 static void liveness_pass_1(TCGContext *s)
2486 int nb_globals = s->nb_globals;
2487 int nb_temps = s->nb_temps;
2488 TCGOp *op, *op_prev;
2489 TCGRegSet *prefs;
2490 int i;
2492 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2493 for (i = 0; i < nb_temps; ++i) {
2494 s->temps[i].state_ptr = prefs + i;
2497 /* ??? Should be redundant with the exit_tb that ends the TB. */
2498 la_func_end(s, nb_globals, nb_temps);
2500 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2501 int nb_iargs, nb_oargs;
2502 TCGOpcode opc_new, opc_new2;
2503 bool have_opc_new2;
2504 TCGLifeData arg_life = 0;
2505 TCGTemp *ts;
2506 TCGOpcode opc = op->opc;
2507 const TCGOpDef *def = &tcg_op_defs[opc];
2509 switch (opc) {
2510 case INDEX_op_call:
2512 int call_flags;
2513 int nb_call_regs;
2515 nb_oargs = TCGOP_CALLO(op);
2516 nb_iargs = TCGOP_CALLI(op);
2517 call_flags = op->args[nb_oargs + nb_iargs + 1];
2519 /* pure functions can be removed if their result is unused */
2520 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2521 for (i = 0; i < nb_oargs; i++) {
2522 ts = arg_temp(op->args[i]);
2523 if (ts->state != TS_DEAD) {
2524 goto do_not_remove_call;
2527 goto do_remove;
2529 do_not_remove_call:
2531 /* Output args are dead. */
2532 for (i = 0; i < nb_oargs; i++) {
2533 ts = arg_temp(op->args[i]);
2534 if (ts->state & TS_DEAD) {
2535 arg_life |= DEAD_ARG << i;
2537 if (ts->state & TS_MEM) {
2538 arg_life |= SYNC_ARG << i;
2540 ts->state = TS_DEAD;
2541 la_reset_pref(ts);
2543 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2544 op->output_pref[i] = 0;
2547 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2548 TCG_CALL_NO_READ_GLOBALS))) {
2549 la_global_kill(s, nb_globals);
2550 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2551 la_global_sync(s, nb_globals);
2554 /* Record arguments that die in this helper. */
2555 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2556 ts = arg_temp(op->args[i]);
2557 if (ts && ts->state & TS_DEAD) {
2558 arg_life |= DEAD_ARG << i;
2562 /* For all live registers, remove call-clobbered prefs. */
2563 la_cross_call(s, nb_temps);
2565 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2567 /* Input arguments are live for preceding opcodes. */
2568 for (i = 0; i < nb_iargs; i++) {
2569 ts = arg_temp(op->args[i + nb_oargs]);
2570 if (ts && ts->state & TS_DEAD) {
2571 /* For those arguments that die, and will be allocated
2572 * in registers, clear the register set for that arg,
2573 * to be filled in below. For args that will be on
2574 * the stack, reset to any available reg.
2576 *la_temp_pref(ts)
2577 = (i < nb_call_regs ? 0 :
2578 tcg_target_available_regs[ts->type]);
2579 ts->state &= ~TS_DEAD;
2583 /* For each input argument, add its input register to prefs.
2584 If a temp is used once, this produces a single set bit. */
2585 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2586 ts = arg_temp(op->args[i + nb_oargs]);
2587 if (ts) {
2588 tcg_regset_set_reg(*la_temp_pref(ts),
2589 tcg_target_call_iarg_regs[i]);
2593 break;
2594 case INDEX_op_insn_start:
2595 break;
2596 case INDEX_op_discard:
2597 /* mark the temporary as dead */
2598 ts = arg_temp(op->args[0]);
2599 ts->state = TS_DEAD;
2600 la_reset_pref(ts);
2601 break;
2603 case INDEX_op_add2_i32:
2604 opc_new = INDEX_op_add_i32;
2605 goto do_addsub2;
2606 case INDEX_op_sub2_i32:
2607 opc_new = INDEX_op_sub_i32;
2608 goto do_addsub2;
2609 case INDEX_op_add2_i64:
2610 opc_new = INDEX_op_add_i64;
2611 goto do_addsub2;
2612 case INDEX_op_sub2_i64:
2613 opc_new = INDEX_op_sub_i64;
2614 do_addsub2:
2615 nb_iargs = 4;
2616 nb_oargs = 2;
2617 /* Test if the high part of the operation is dead, but not
2618 the low part. The result can be optimized to a simple
2619 add or sub. This happens often for x86_64 guest when the
2620 cpu mode is set to 32 bit. */
2621 if (arg_temp(op->args[1])->state == TS_DEAD) {
2622 if (arg_temp(op->args[0])->state == TS_DEAD) {
2623 goto do_remove;
2625 /* Replace the opcode and adjust the args in place,
2626 leaving 3 unused args at the end. */
2627 op->opc = opc = opc_new;
2628 op->args[1] = op->args[2];
2629 op->args[2] = op->args[4];
2630 /* Fall through and mark the single-word operation live. */
2631 nb_iargs = 2;
2632 nb_oargs = 1;
2634 goto do_not_remove;
2636 case INDEX_op_mulu2_i32:
2637 opc_new = INDEX_op_mul_i32;
2638 opc_new2 = INDEX_op_muluh_i32;
2639 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2640 goto do_mul2;
2641 case INDEX_op_muls2_i32:
2642 opc_new = INDEX_op_mul_i32;
2643 opc_new2 = INDEX_op_mulsh_i32;
2644 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2645 goto do_mul2;
2646 case INDEX_op_mulu2_i64:
2647 opc_new = INDEX_op_mul_i64;
2648 opc_new2 = INDEX_op_muluh_i64;
2649 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2650 goto do_mul2;
2651 case INDEX_op_muls2_i64:
2652 opc_new = INDEX_op_mul_i64;
2653 opc_new2 = INDEX_op_mulsh_i64;
2654 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2655 goto do_mul2;
2656 do_mul2:
2657 nb_iargs = 2;
2658 nb_oargs = 2;
2659 if (arg_temp(op->args[1])->state == TS_DEAD) {
2660 if (arg_temp(op->args[0])->state == TS_DEAD) {
2661 /* Both parts of the operation are dead. */
2662 goto do_remove;
2664 /* The high part of the operation is dead; generate the low. */
2665 op->opc = opc = opc_new;
2666 op->args[1] = op->args[2];
2667 op->args[2] = op->args[3];
2668 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2669 /* The low part of the operation is dead; generate the high. */
2670 op->opc = opc = opc_new2;
2671 op->args[0] = op->args[1];
2672 op->args[1] = op->args[2];
2673 op->args[2] = op->args[3];
2674 } else {
2675 goto do_not_remove;
2677 /* Mark the single-word operation live. */
2678 nb_oargs = 1;
2679 goto do_not_remove;
2681 default:
2682 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2683 nb_iargs = def->nb_iargs;
2684 nb_oargs = def->nb_oargs;
2686 /* Test if the operation can be removed because all
2687 its outputs are dead. We assume that nb_oargs == 0
2688 implies side effects */
2689 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2690 for (i = 0; i < nb_oargs; i++) {
2691 if (arg_temp(op->args[i])->state != TS_DEAD) {
2692 goto do_not_remove;
2695 goto do_remove;
2697 goto do_not_remove;
2699 do_remove:
2700 tcg_op_remove(s, op);
2701 break;
2703 do_not_remove:
2704 for (i = 0; i < nb_oargs; i++) {
2705 ts = arg_temp(op->args[i]);
2707 /* Remember the preference of the uses that followed. */
2708 op->output_pref[i] = *la_temp_pref(ts);
2710 /* Output args are dead. */
2711 if (ts->state & TS_DEAD) {
2712 arg_life |= DEAD_ARG << i;
2714 if (ts->state & TS_MEM) {
2715 arg_life |= SYNC_ARG << i;
2717 ts->state = TS_DEAD;
2718 la_reset_pref(ts);
2721 /* If end of basic block, update. */
2722 if (def->flags & TCG_OPF_BB_EXIT) {
2723 la_func_end(s, nb_globals, nb_temps);
2724 } else if (def->flags & TCG_OPF_BB_END) {
2725 la_bb_end(s, nb_globals, nb_temps);
2726 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2727 la_global_sync(s, nb_globals);
2728 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2729 la_cross_call(s, nb_temps);
2733 /* Record arguments that die in this opcode. */
2734 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2735 ts = arg_temp(op->args[i]);
2736 if (ts->state & TS_DEAD) {
2737 arg_life |= DEAD_ARG << i;
2741 /* Input arguments are live for preceding opcodes. */
2742 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2743 ts = arg_temp(op->args[i]);
2744 if (ts->state & TS_DEAD) {
2745 /* For operands that were dead, initially allow
2746 all regs for the type. */
2747 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2748 ts->state &= ~TS_DEAD;
2752 /* Incorporate constraints for this operand. */
2753 switch (opc) {
2754 case INDEX_op_mov_i32:
2755 case INDEX_op_mov_i64:
2756 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2757 have proper constraints. That said, special case
2758 moves to propagate preferences backward. */
2759 if (IS_DEAD_ARG(1)) {
2760 *la_temp_pref(arg_temp(op->args[0]))
2761 = *la_temp_pref(arg_temp(op->args[1]));
2763 break;
2765 default:
2766 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2767 const TCGArgConstraint *ct = &def->args_ct[i];
2768 TCGRegSet set, *pset;
2770 ts = arg_temp(op->args[i]);
2771 pset = la_temp_pref(ts);
2772 set = *pset;
2774 set &= ct->u.regs;
2775 if (ct->ct & TCG_CT_IALIAS) {
2776 set &= op->output_pref[ct->alias_index];
2778 /* If the combination is not possible, restart. */
2779 if (set == 0) {
2780 set = ct->u.regs;
2782 *pset = set;
2784 break;
2786 break;
2788 op->life = arg_life;
2792 /* Liveness analysis: Convert indirect regs to direct temporaries. */
2793 static bool liveness_pass_2(TCGContext *s)
2795 int nb_globals = s->nb_globals;
2796 int nb_temps, i;
2797 bool changes = false;
2798 TCGOp *op, *op_next;
2800 /* Create a temporary for each indirect global. */
2801 for (i = 0; i < nb_globals; ++i) {
2802 TCGTemp *its = &s->temps[i];
2803 if (its->indirect_reg) {
2804 TCGTemp *dts = tcg_temp_alloc(s);
2805 dts->type = its->type;
2806 dts->base_type = its->base_type;
2807 its->state_ptr = dts;
2808 } else {
2809 its->state_ptr = NULL;
2811 /* All globals begin dead. */
2812 its->state = TS_DEAD;
2814 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2815 TCGTemp *its = &s->temps[i];
2816 its->state_ptr = NULL;
2817 its->state = TS_DEAD;
2820 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2821 TCGOpcode opc = op->opc;
2822 const TCGOpDef *def = &tcg_op_defs[opc];
2823 TCGLifeData arg_life = op->life;
2824 int nb_iargs, nb_oargs, call_flags;
2825 TCGTemp *arg_ts, *dir_ts;
2827 if (opc == INDEX_op_call) {
2828 nb_oargs = TCGOP_CALLO(op);
2829 nb_iargs = TCGOP_CALLI(op);
2830 call_flags = op->args[nb_oargs + nb_iargs + 1];
2831 } else {
2832 nb_iargs = def->nb_iargs;
2833 nb_oargs = def->nb_oargs;
2835 /* Set flags similar to how calls require. */
2836 if (def->flags & TCG_OPF_BB_END) {
2837 /* Like writing globals: save_globals */
2838 call_flags = 0;
2839 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2840 /* Like reading globals: sync_globals */
2841 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2842 } else {
2843 /* No effect on globals. */
2844 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2845 TCG_CALL_NO_WRITE_GLOBALS);
2849 /* Make sure that input arguments are available. */
2850 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2851 arg_ts = arg_temp(op->args[i]);
2852 if (arg_ts) {
2853 dir_ts = arg_ts->state_ptr;
2854 if (dir_ts && arg_ts->state == TS_DEAD) {
2855 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2856 ? INDEX_op_ld_i32
2857 : INDEX_op_ld_i64);
2858 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2860 lop->args[0] = temp_arg(dir_ts);
2861 lop->args[1] = temp_arg(arg_ts->mem_base);
2862 lop->args[2] = arg_ts->mem_offset;
2864 /* Loaded, but synced with memory. */
2865 arg_ts->state = TS_MEM;
2870 /* Perform input replacement, and mark inputs that became dead.
2871 No action is required except keeping temp_state up to date
2872 so that we reload when needed. */
2873 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2874 arg_ts = arg_temp(op->args[i]);
2875 if (arg_ts) {
2876 dir_ts = arg_ts->state_ptr;
2877 if (dir_ts) {
2878 op->args[i] = temp_arg(dir_ts);
2879 changes = true;
2880 if (IS_DEAD_ARG(i)) {
2881 arg_ts->state = TS_DEAD;
2887 /* Liveness analysis should ensure that the following are
2888 all correct, for call sites and basic block end points. */
2889 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2890 /* Nothing to do */
2891 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2892 for (i = 0; i < nb_globals; ++i) {
2893 /* Liveness should see that globals are synced back,
2894 that is, either TS_DEAD or TS_MEM. */
2895 arg_ts = &s->temps[i];
2896 tcg_debug_assert(arg_ts->state_ptr == 0
2897 || arg_ts->state != 0);
2899 } else {
2900 for (i = 0; i < nb_globals; ++i) {
2901 /* Liveness should see that globals are saved back,
2902 that is, TS_DEAD, waiting to be reloaded. */
2903 arg_ts = &s->temps[i];
2904 tcg_debug_assert(arg_ts->state_ptr == 0
2905 || arg_ts->state == TS_DEAD);
2909 /* Outputs become available. */
2910 for (i = 0; i < nb_oargs; i++) {
2911 arg_ts = arg_temp(op->args[i]);
2912 dir_ts = arg_ts->state_ptr;
2913 if (!dir_ts) {
2914 continue;
2916 op->args[i] = temp_arg(dir_ts);
2917 changes = true;
2919 /* The output is now live and modified. */
2920 arg_ts->state = 0;
2922 /* Sync outputs upon their last write. */
2923 if (NEED_SYNC_ARG(i)) {
2924 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2925 ? INDEX_op_st_i32
2926 : INDEX_op_st_i64);
2927 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2929 sop->args[0] = temp_arg(dir_ts);
2930 sop->args[1] = temp_arg(arg_ts->mem_base);
2931 sop->args[2] = arg_ts->mem_offset;
2933 arg_ts->state = TS_MEM;
2935 /* Drop outputs that are dead. */
2936 if (IS_DEAD_ARG(i)) {
2937 arg_ts->state = TS_DEAD;
2942 return changes;
2945 #ifdef CONFIG_DEBUG_TCG
2946 static void dump_regs(TCGContext *s)
2948 TCGTemp *ts;
2949 int i;
2950 char buf[64];
2952 for(i = 0; i < s->nb_temps; i++) {
2953 ts = &s->temps[i];
2954 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2955 switch(ts->val_type) {
2956 case TEMP_VAL_REG:
2957 printf("%s", tcg_target_reg_names[ts->reg]);
2958 break;
2959 case TEMP_VAL_MEM:
2960 printf("%d(%s)", (int)ts->mem_offset,
2961 tcg_target_reg_names[ts->mem_base->reg]);
2962 break;
2963 case TEMP_VAL_CONST:
2964 printf("$0x%" TCG_PRIlx, ts->val);
2965 break;
2966 case TEMP_VAL_DEAD:
2967 printf("D");
2968 break;
2969 default:
2970 printf("???");
2971 break;
2973 printf("\n");
2976 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2977 if (s->reg_to_temp[i] != NULL) {
2978 printf("%s: %s\n",
2979 tcg_target_reg_names[i],
2980 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2985 static void check_regs(TCGContext *s)
2987 int reg;
2988 int k;
2989 TCGTemp *ts;
2990 char buf[64];
2992 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2993 ts = s->reg_to_temp[reg];
2994 if (ts != NULL) {
2995 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2996 printf("Inconsistency for register %s:\n",
2997 tcg_target_reg_names[reg]);
2998 goto fail;
3002 for (k = 0; k < s->nb_temps; k++) {
3003 ts = &s->temps[k];
3004 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3005 && s->reg_to_temp[ts->reg] != ts) {
3006 printf("Inconsistency for temp %s:\n",
3007 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3008 fail:
3009 printf("reg state:\n");
3010 dump_regs(s);
3011 tcg_abort();
3015 #endif
3017 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3019 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3020 /* Sparc64 stack is accessed with offset of 2047 */
3021 s->current_frame_offset = (s->current_frame_offset +
3022 (tcg_target_long)sizeof(tcg_target_long) - 1) &
3023 ~(sizeof(tcg_target_long) - 1);
3024 #endif
3025 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3026 s->frame_end) {
3027 tcg_abort();
3029 ts->mem_offset = s->current_frame_offset;
3030 ts->mem_base = s->frame_temp;
3031 ts->mem_allocated = 1;
3032 s->current_frame_offset += sizeof(tcg_target_long);
3035 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3037 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3038 mark it free; otherwise mark it dead. */
3039 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3041 if (ts->fixed_reg) {
3042 return;
3044 if (ts->val_type == TEMP_VAL_REG) {
3045 s->reg_to_temp[ts->reg] = NULL;
3047 ts->val_type = (free_or_dead < 0
3048 || ts->temp_local
3049 || ts->temp_global
3050 ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3053 /* Mark a temporary as dead. */
3054 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3056 temp_free_or_dead(s, ts, 1);
3059 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3060 registers needs to be allocated to store a constant. If 'free_or_dead'
3061 is non-zero, subsequently release the temporary; if it is positive, the
3062 temp is dead; if it is negative, the temp is free. */
3063 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3064 TCGRegSet preferred_regs, int free_or_dead)
3066 if (ts->fixed_reg) {
3067 return;
3069 if (!ts->mem_coherent) {
3070 if (!ts->mem_allocated) {
3071 temp_allocate_frame(s, ts);
3073 switch (ts->val_type) {
3074 case TEMP_VAL_CONST:
3075 /* If we're going to free the temp immediately, then we won't
3076 require it later in a register, so attempt to store the
3077 constant to memory directly. */
3078 if (free_or_dead
3079 && tcg_out_sti(s, ts->type, ts->val,
3080 ts->mem_base->reg, ts->mem_offset)) {
3081 break;
3083 temp_load(s, ts, tcg_target_available_regs[ts->type],
3084 allocated_regs, preferred_regs);
3085 /* fallthrough */
3087 case TEMP_VAL_REG:
3088 tcg_out_st(s, ts->type, ts->reg,
3089 ts->mem_base->reg, ts->mem_offset);
3090 break;
3092 case TEMP_VAL_MEM:
3093 break;
3095 case TEMP_VAL_DEAD:
3096 default:
3097 tcg_abort();
3099 ts->mem_coherent = 1;
3101 if (free_or_dead) {
3102 temp_free_or_dead(s, ts, free_or_dead);
3106 /* free register 'reg' by spilling the corresponding temporary if necessary */
3107 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3109 TCGTemp *ts = s->reg_to_temp[reg];
3110 if (ts != NULL) {
3111 temp_sync(s, ts, allocated_regs, 0, -1);
3116 * tcg_reg_alloc:
3117 * @required_regs: Set of registers in which we must allocate.
3118 * @allocated_regs: Set of registers which must be avoided.
3119 * @preferred_regs: Set of registers we should prefer.
3120 * @rev: True if we search the registers in "indirect" order.
3122 * The allocated register must be in @required_regs & ~@allocated_regs,
3123 * but if we can put it in @preferred_regs we may save a move later.
3125 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3126 TCGRegSet allocated_regs,
3127 TCGRegSet preferred_regs, bool rev)
3129 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3130 TCGRegSet reg_ct[2];
3131 const int *order;
3133 reg_ct[1] = required_regs & ~allocated_regs;
3134 tcg_debug_assert(reg_ct[1] != 0);
3135 reg_ct[0] = reg_ct[1] & preferred_regs;
3137 /* Skip the preferred_regs option if it cannot be satisfied,
3138 or if the preference made no difference. */
3139 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3141 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3143 /* Try free registers, preferences first. */
3144 for (j = f; j < 2; j++) {
3145 TCGRegSet set = reg_ct[j];
3147 if (tcg_regset_single(set)) {
3148 /* One register in the set. */
3149 TCGReg reg = tcg_regset_first(set);
3150 if (s->reg_to_temp[reg] == NULL) {
3151 return reg;
3153 } else {
3154 for (i = 0; i < n; i++) {
3155 TCGReg reg = order[i];
3156 if (s->reg_to_temp[reg] == NULL &&
3157 tcg_regset_test_reg(set, reg)) {
3158 return reg;
3164 /* We must spill something. */
3165 for (j = f; j < 2; j++) {
3166 TCGRegSet set = reg_ct[j];
3168 if (tcg_regset_single(set)) {
3169 /* One register in the set. */
3170 TCGReg reg = tcg_regset_first(set);
3171 tcg_reg_free(s, reg, allocated_regs);
3172 return reg;
3173 } else {
3174 for (i = 0; i < n; i++) {
3175 TCGReg reg = order[i];
3176 if (tcg_regset_test_reg(set, reg)) {
3177 tcg_reg_free(s, reg, allocated_regs);
3178 return reg;
3184 tcg_abort();
3187 /* Make sure the temporary is in a register. If needed, allocate the register
3188 from DESIRED while avoiding ALLOCATED. */
3189 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3190 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3192 TCGReg reg;
3194 switch (ts->val_type) {
3195 case TEMP_VAL_REG:
3196 return;
3197 case TEMP_VAL_CONST:
3198 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3199 preferred_regs, ts->indirect_base);
3200 tcg_out_movi(s, ts->type, reg, ts->val);
3201 ts->mem_coherent = 0;
3202 break;
3203 case TEMP_VAL_MEM:
3204 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3205 preferred_regs, ts->indirect_base);
3206 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3207 ts->mem_coherent = 1;
3208 break;
3209 case TEMP_VAL_DEAD:
3210 default:
3211 tcg_abort();
3213 ts->reg = reg;
3214 ts->val_type = TEMP_VAL_REG;
3215 s->reg_to_temp[reg] = ts;
3218 /* Save a temporary to memory. 'allocated_regs' is used in case a
3219 temporary registers needs to be allocated to store a constant. */
3220 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3222 /* The liveness analysis already ensures that globals are back
3223 in memory. Keep an tcg_debug_assert for safety. */
3224 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3227 /* save globals to their canonical location and assume they can be
3228 modified be the following code. 'allocated_regs' is used in case a
3229 temporary registers needs to be allocated to store a constant. */
3230 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3232 int i, n;
3234 for (i = 0, n = s->nb_globals; i < n; i++) {
3235 temp_save(s, &s->temps[i], allocated_regs);
3239 /* sync globals to their canonical location and assume they can be
3240 read by the following code. 'allocated_regs' is used in case a
3241 temporary registers needs to be allocated to store a constant. */
3242 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3244 int i, n;
3246 for (i = 0, n = s->nb_globals; i < n; i++) {
3247 TCGTemp *ts = &s->temps[i];
3248 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3249 || ts->fixed_reg
3250 || ts->mem_coherent);
3254 /* at the end of a basic block, we assume all temporaries are dead and
3255 all globals are stored at their canonical location. */
3256 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3258 int i;
3260 for (i = s->nb_globals; i < s->nb_temps; i++) {
3261 TCGTemp *ts = &s->temps[i];
3262 if (ts->temp_local) {
3263 temp_save(s, ts, allocated_regs);
3264 } else {
3265 /* The liveness analysis already ensures that temps are dead.
3266 Keep an tcg_debug_assert for safety. */
3267 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3271 save_globals(s, allocated_regs);
3274 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3275 tcg_target_ulong val, TCGLifeData arg_life,
3276 TCGRegSet preferred_regs)
3278 if (ots->fixed_reg) {
3279 /* For fixed registers, we do not do any constant propagation. */
3280 tcg_out_movi(s, ots->type, ots->reg, val);
3281 return;
3284 /* The movi is not explicitly generated here. */
3285 if (ots->val_type == TEMP_VAL_REG) {
3286 s->reg_to_temp[ots->reg] = NULL;
3288 ots->val_type = TEMP_VAL_CONST;
3289 ots->val = val;
3290 ots->mem_coherent = 0;
3291 if (NEED_SYNC_ARG(0)) {
3292 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3293 } else if (IS_DEAD_ARG(0)) {
3294 temp_dead(s, ots);
3298 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3300 TCGTemp *ots = arg_temp(op->args[0]);
3301 tcg_target_ulong val = op->args[1];
3303 tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3306 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3308 const TCGLifeData arg_life = op->life;
3309 TCGRegSet allocated_regs, preferred_regs;
3310 TCGTemp *ts, *ots;
3311 TCGType otype, itype;
3313 allocated_regs = s->reserved_regs;
3314 preferred_regs = op->output_pref[0];
3315 ots = arg_temp(op->args[0]);
3316 ts = arg_temp(op->args[1]);
3318 /* Note that otype != itype for no-op truncation. */
3319 otype = ots->type;
3320 itype = ts->type;
3322 if (ts->val_type == TEMP_VAL_CONST) {
3323 /* propagate constant or generate sti */
3324 tcg_target_ulong val = ts->val;
3325 if (IS_DEAD_ARG(1)) {
3326 temp_dead(s, ts);
3328 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3329 return;
3332 /* If the source value is in memory we're going to be forced
3333 to have it in a register in order to perform the copy. Copy
3334 the SOURCE value into its own register first, that way we
3335 don't have to reload SOURCE the next time it is used. */
3336 if (ts->val_type == TEMP_VAL_MEM) {
3337 temp_load(s, ts, tcg_target_available_regs[itype],
3338 allocated_regs, preferred_regs);
3341 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3342 if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
3343 /* mov to a non-saved dead register makes no sense (even with
3344 liveness analysis disabled). */
3345 tcg_debug_assert(NEED_SYNC_ARG(0));
3346 if (!ots->mem_allocated) {
3347 temp_allocate_frame(s, ots);
3349 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3350 if (IS_DEAD_ARG(1)) {
3351 temp_dead(s, ts);
3353 temp_dead(s, ots);
3354 } else {
3355 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
3356 /* the mov can be suppressed */
3357 if (ots->val_type == TEMP_VAL_REG) {
3358 s->reg_to_temp[ots->reg] = NULL;
3360 ots->reg = ts->reg;
3361 temp_dead(s, ts);
3362 } else {
3363 if (ots->val_type != TEMP_VAL_REG) {
3364 /* When allocating a new register, make sure to not spill the
3365 input one. */
3366 tcg_regset_set_reg(allocated_regs, ts->reg);
3367 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3368 allocated_regs, preferred_regs,
3369 ots->indirect_base);
3371 tcg_out_mov(s, otype, ots->reg, ts->reg);
3373 ots->val_type = TEMP_VAL_REG;
3374 ots->mem_coherent = 0;
3375 s->reg_to_temp[ots->reg] = ots;
3376 if (NEED_SYNC_ARG(0)) {
3377 temp_sync(s, ots, allocated_regs, 0, 0);
3382 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3384 const TCGLifeData arg_life = op->life;
3385 const TCGOpDef * const def = &tcg_op_defs[op->opc];
3386 TCGRegSet i_allocated_regs;
3387 TCGRegSet o_allocated_regs;
3388 int i, k, nb_iargs, nb_oargs;
3389 TCGReg reg;
3390 TCGArg arg;
3391 const TCGArgConstraint *arg_ct;
3392 TCGTemp *ts;
3393 TCGArg new_args[TCG_MAX_OP_ARGS];
3394 int const_args[TCG_MAX_OP_ARGS];
3396 nb_oargs = def->nb_oargs;
3397 nb_iargs = def->nb_iargs;
3399 /* copy constants */
3400 memcpy(new_args + nb_oargs + nb_iargs,
3401 op->args + nb_oargs + nb_iargs,
3402 sizeof(TCGArg) * def->nb_cargs);
3404 i_allocated_regs = s->reserved_regs;
3405 o_allocated_regs = s->reserved_regs;
3407 /* satisfy input constraints */
3408 for (k = 0; k < nb_iargs; k++) {
3409 TCGRegSet i_preferred_regs, o_preferred_regs;
3411 i = def->sorted_args[nb_oargs + k];
3412 arg = op->args[i];
3413 arg_ct = &def->args_ct[i];
3414 ts = arg_temp(arg);
3416 if (ts->val_type == TEMP_VAL_CONST
3417 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3418 /* constant is OK for instruction */
3419 const_args[i] = 1;
3420 new_args[i] = ts->val;
3421 continue;
3424 i_preferred_regs = o_preferred_regs = 0;
3425 if (arg_ct->ct & TCG_CT_IALIAS) {
3426 o_preferred_regs = op->output_pref[arg_ct->alias_index];
3427 if (ts->fixed_reg) {
3428 /* if fixed register, we must allocate a new register
3429 if the alias is not the same register */
3430 if (arg != op->args[arg_ct->alias_index]) {
3431 goto allocate_in_reg;
3433 } else {
3434 /* if the input is aliased to an output and if it is
3435 not dead after the instruction, we must allocate
3436 a new register and move it */
3437 if (!IS_DEAD_ARG(i)) {
3438 goto allocate_in_reg;
3441 /* check if the current register has already been allocated
3442 for another input aliased to an output */
3443 if (ts->val_type == TEMP_VAL_REG) {
3444 int k2, i2;
3445 reg = ts->reg;
3446 for (k2 = 0 ; k2 < k ; k2++) {
3447 i2 = def->sorted_args[nb_oargs + k2];
3448 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3449 reg == new_args[i2]) {
3450 goto allocate_in_reg;
3454 i_preferred_regs = o_preferred_regs;
3458 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3459 reg = ts->reg;
3461 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3462 /* nothing to do : the constraint is satisfied */
3463 } else {
3464 allocate_in_reg:
3465 /* allocate a new register matching the constraint
3466 and move the temporary register into it */
3467 temp_load(s, ts, tcg_target_available_regs[ts->type],
3468 i_allocated_regs, 0);
3469 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3470 o_preferred_regs, ts->indirect_base);
3471 tcg_out_mov(s, ts->type, reg, ts->reg);
3473 new_args[i] = reg;
3474 const_args[i] = 0;
3475 tcg_regset_set_reg(i_allocated_regs, reg);
3478 /* mark dead temporaries and free the associated registers */
3479 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3480 if (IS_DEAD_ARG(i)) {
3481 temp_dead(s, arg_temp(op->args[i]));
3485 if (def->flags & TCG_OPF_BB_END) {
3486 tcg_reg_alloc_bb_end(s, i_allocated_regs);
3487 } else {
3488 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3489 /* XXX: permit generic clobber register list ? */
3490 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3491 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3492 tcg_reg_free(s, i, i_allocated_regs);
3496 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3497 /* sync globals if the op has side effects and might trigger
3498 an exception. */
3499 sync_globals(s, i_allocated_regs);
3502 /* satisfy the output constraints */
3503 for(k = 0; k < nb_oargs; k++) {
3504 i = def->sorted_args[k];
3505 arg = op->args[i];
3506 arg_ct = &def->args_ct[i];
3507 ts = arg_temp(arg);
3508 if ((arg_ct->ct & TCG_CT_ALIAS)
3509 && !const_args[arg_ct->alias_index]) {
3510 reg = new_args[arg_ct->alias_index];
3511 } else if (arg_ct->ct & TCG_CT_NEWREG) {
3512 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3513 i_allocated_regs | o_allocated_regs,
3514 op->output_pref[k], ts->indirect_base);
3515 } else {
3516 /* if fixed register, we try to use it */
3517 reg = ts->reg;
3518 if (ts->fixed_reg &&
3519 tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3520 goto oarg_end;
3522 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3523 op->output_pref[k], ts->indirect_base);
3525 tcg_regset_set_reg(o_allocated_regs, reg);
3526 /* if a fixed register is used, then a move will be done afterwards */
3527 if (!ts->fixed_reg) {
3528 if (ts->val_type == TEMP_VAL_REG) {
3529 s->reg_to_temp[ts->reg] = NULL;
3531 ts->val_type = TEMP_VAL_REG;
3532 ts->reg = reg;
3533 /* temp value is modified, so the value kept in memory is
3534 potentially not the same */
3535 ts->mem_coherent = 0;
3536 s->reg_to_temp[reg] = ts;
3538 oarg_end:
3539 new_args[i] = reg;
3543 /* emit instruction */
3544 if (def->flags & TCG_OPF_VECTOR) {
3545 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3546 new_args, const_args);
3547 } else {
3548 tcg_out_op(s, op->opc, new_args, const_args);
3551 /* move the outputs in the correct register if needed */
3552 for(i = 0; i < nb_oargs; i++) {
3553 ts = arg_temp(op->args[i]);
3554 reg = new_args[i];
3555 if (ts->fixed_reg && ts->reg != reg) {
3556 tcg_out_mov(s, ts->type, ts->reg, reg);
3558 if (NEED_SYNC_ARG(i)) {
3559 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3560 } else if (IS_DEAD_ARG(i)) {
3561 temp_dead(s, ts);
3566 #ifdef TCG_TARGET_STACK_GROWSUP
3567 #define STACK_DIR(x) (-(x))
3568 #else
3569 #define STACK_DIR(x) (x)
3570 #endif
3572 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3574 const int nb_oargs = TCGOP_CALLO(op);
3575 const int nb_iargs = TCGOP_CALLI(op);
3576 const TCGLifeData arg_life = op->life;
3577 int flags, nb_regs, i;
3578 TCGReg reg;
3579 TCGArg arg;
3580 TCGTemp *ts;
3581 intptr_t stack_offset;
3582 size_t call_stack_size;
3583 tcg_insn_unit *func_addr;
3584 int allocate_args;
3585 TCGRegSet allocated_regs;
3587 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3588 flags = op->args[nb_oargs + nb_iargs + 1];
3590 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3591 if (nb_regs > nb_iargs) {
3592 nb_regs = nb_iargs;
3595 /* assign stack slots first */
3596 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3597 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3598 ~(TCG_TARGET_STACK_ALIGN - 1);
3599 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3600 if (allocate_args) {
3601 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3602 preallocate call stack */
3603 tcg_abort();
3606 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3607 for (i = nb_regs; i < nb_iargs; i++) {
3608 arg = op->args[nb_oargs + i];
3609 #ifdef TCG_TARGET_STACK_GROWSUP
3610 stack_offset -= sizeof(tcg_target_long);
3611 #endif
3612 if (arg != TCG_CALL_DUMMY_ARG) {
3613 ts = arg_temp(arg);
3614 temp_load(s, ts, tcg_target_available_regs[ts->type],
3615 s->reserved_regs, 0);
3616 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3618 #ifndef TCG_TARGET_STACK_GROWSUP
3619 stack_offset += sizeof(tcg_target_long);
3620 #endif
3623 /* assign input registers */
3624 allocated_regs = s->reserved_regs;
3625 for (i = 0; i < nb_regs; i++) {
3626 arg = op->args[nb_oargs + i];
3627 if (arg != TCG_CALL_DUMMY_ARG) {
3628 ts = arg_temp(arg);
3629 reg = tcg_target_call_iarg_regs[i];
3631 if (ts->val_type == TEMP_VAL_REG) {
3632 if (ts->reg != reg) {
3633 tcg_reg_free(s, reg, allocated_regs);
3634 tcg_out_mov(s, ts->type, reg, ts->reg);
3636 } else {
3637 TCGRegSet arg_set = 0;
3639 tcg_reg_free(s, reg, allocated_regs);
3640 tcg_regset_set_reg(arg_set, reg);
3641 temp_load(s, ts, arg_set, allocated_regs, 0);
3644 tcg_regset_set_reg(allocated_regs, reg);
3648 /* mark dead temporaries and free the associated registers */
3649 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3650 if (IS_DEAD_ARG(i)) {
3651 temp_dead(s, arg_temp(op->args[i]));
3655 /* clobber call registers */
3656 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3657 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3658 tcg_reg_free(s, i, allocated_regs);
3662 /* Save globals if they might be written by the helper, sync them if
3663 they might be read. */
3664 if (flags & TCG_CALL_NO_READ_GLOBALS) {
3665 /* Nothing to do */
3666 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3667 sync_globals(s, allocated_regs);
3668 } else {
3669 save_globals(s, allocated_regs);
3672 tcg_out_call(s, func_addr);
3674 /* assign output registers and emit moves if needed */
3675 for(i = 0; i < nb_oargs; i++) {
3676 arg = op->args[i];
3677 ts = arg_temp(arg);
3678 reg = tcg_target_call_oarg_regs[i];
3679 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3681 if (ts->fixed_reg) {
3682 if (ts->reg != reg) {
3683 tcg_out_mov(s, ts->type, ts->reg, reg);
3685 } else {
3686 if (ts->val_type == TEMP_VAL_REG) {
3687 s->reg_to_temp[ts->reg] = NULL;
3689 ts->val_type = TEMP_VAL_REG;
3690 ts->reg = reg;
3691 ts->mem_coherent = 0;
3692 s->reg_to_temp[reg] = ts;
3693 if (NEED_SYNC_ARG(i)) {
3694 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3695 } else if (IS_DEAD_ARG(i)) {
3696 temp_dead(s, ts);
3702 #ifdef CONFIG_PROFILER
3704 /* avoid copy/paste errors */
3705 #define PROF_ADD(to, from, field) \
3706 do { \
3707 (to)->field += atomic_read(&((from)->field)); \
3708 } while (0)
3710 #define PROF_MAX(to, from, field) \
3711 do { \
3712 typeof((from)->field) val__ = atomic_read(&((from)->field)); \
3713 if (val__ > (to)->field) { \
3714 (to)->field = val__; \
3716 } while (0)
3718 /* Pass in a zero'ed @prof */
3719 static inline
3720 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3722 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3723 unsigned int i;
3725 for (i = 0; i < n_ctxs; i++) {
3726 TCGContext *s = atomic_read(&tcg_ctxs[i]);
3727 const TCGProfile *orig = &s->prof;
3729 if (counters) {
3730 PROF_ADD(prof, orig, cpu_exec_time);
3731 PROF_ADD(prof, orig, tb_count1);
3732 PROF_ADD(prof, orig, tb_count);
3733 PROF_ADD(prof, orig, op_count);
3734 PROF_MAX(prof, orig, op_count_max);
3735 PROF_ADD(prof, orig, temp_count);
3736 PROF_MAX(prof, orig, temp_count_max);
3737 PROF_ADD(prof, orig, del_op_count);
3738 PROF_ADD(prof, orig, code_in_len);
3739 PROF_ADD(prof, orig, code_out_len);
3740 PROF_ADD(prof, orig, search_out_len);
3741 PROF_ADD(prof, orig, interm_time);
3742 PROF_ADD(prof, orig, code_time);
3743 PROF_ADD(prof, orig, la_time);
3744 PROF_ADD(prof, orig, opt_time);
3745 PROF_ADD(prof, orig, restore_count);
3746 PROF_ADD(prof, orig, restore_time);
3748 if (table) {
3749 int i;
3751 for (i = 0; i < NB_OPS; i++) {
3752 PROF_ADD(prof, orig, table_op_count[i]);
3758 #undef PROF_ADD
3759 #undef PROF_MAX
3761 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3763 tcg_profile_snapshot(prof, true, false);
3766 static void tcg_profile_snapshot_table(TCGProfile *prof)
3768 tcg_profile_snapshot(prof, false, true);
3771 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3773 TCGProfile prof = {};
3774 int i;
3776 tcg_profile_snapshot_table(&prof);
3777 for (i = 0; i < NB_OPS; i++) {
3778 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
3779 prof.table_op_count[i]);
3783 int64_t tcg_cpu_exec_time(void)
3785 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3786 unsigned int i;
3787 int64_t ret = 0;
3789 for (i = 0; i < n_ctxs; i++) {
3790 const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3791 const TCGProfile *prof = &s->prof;
3793 ret += atomic_read(&prof->cpu_exec_time);
3795 return ret;
3797 #else
3798 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3800 cpu_fprintf(f, "[TCG profiler not compiled]\n");
3803 int64_t tcg_cpu_exec_time(void)
3805 error_report("%s: TCG profiler not compiled", __func__);
3806 exit(EXIT_FAILURE);
3808 #endif
3811 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3813 #ifdef CONFIG_PROFILER
3814 TCGProfile *prof = &s->prof;
3815 #endif
3816 int i, num_insns;
3817 TCGOp *op;
3819 #ifdef CONFIG_PROFILER
3821 int n = 0;
3823 QTAILQ_FOREACH(op, &s->ops, link) {
3824 n++;
3826 atomic_set(&prof->op_count, prof->op_count + n);
3827 if (n > prof->op_count_max) {
3828 atomic_set(&prof->op_count_max, n);
3831 n = s->nb_temps;
3832 atomic_set(&prof->temp_count, prof->temp_count + n);
3833 if (n > prof->temp_count_max) {
3834 atomic_set(&prof->temp_count_max, n);
3837 #endif
3839 #ifdef DEBUG_DISAS
3840 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3841 && qemu_log_in_addr_range(tb->pc))) {
3842 qemu_log_lock();
3843 qemu_log("OP:\n");
3844 tcg_dump_ops(s, false);
3845 qemu_log("\n");
3846 qemu_log_unlock();
3848 #endif
3850 #ifdef CONFIG_DEBUG_TCG
3851 /* Ensure all labels referenced have been emitted. */
3853 TCGLabel *l;
3854 bool error = false;
3856 QSIMPLEQ_FOREACH(l, &s->labels, next) {
3857 if (unlikely(!l->present) && l->refs) {
3858 qemu_log_mask(CPU_LOG_TB_OP,
3859 "$L%d referenced but not present.\n", l->id);
3860 error = true;
3863 assert(!error);
3865 #endif
3867 #ifdef CONFIG_PROFILER
3868 atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3869 #endif
3871 #ifdef USE_TCG_OPTIMIZATIONS
3872 tcg_optimize(s);
3873 #endif
3875 #ifdef CONFIG_PROFILER
3876 atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3877 atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3878 #endif
3880 reachable_code_pass(s);
3881 liveness_pass_1(s);
3883 if (s->nb_indirects > 0) {
3884 #ifdef DEBUG_DISAS
3885 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3886 && qemu_log_in_addr_range(tb->pc))) {
3887 qemu_log_lock();
3888 qemu_log("OP before indirect lowering:\n");
3889 tcg_dump_ops(s, false);
3890 qemu_log("\n");
3891 qemu_log_unlock();
3893 #endif
3894 /* Replace indirect temps with direct temps. */
3895 if (liveness_pass_2(s)) {
3896 /* If changes were made, re-run liveness. */
3897 liveness_pass_1(s);
3901 #ifdef CONFIG_PROFILER
3902 atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3903 #endif
3905 #ifdef DEBUG_DISAS
3906 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3907 && qemu_log_in_addr_range(tb->pc))) {
3908 qemu_log_lock();
3909 qemu_log("OP after optimization and liveness analysis:\n");
3910 tcg_dump_ops(s, true);
3911 qemu_log("\n");
3912 qemu_log_unlock();
3914 #endif
3916 tcg_reg_alloc_start(s);
3918 s->code_buf = tb->tc.ptr;
3919 s->code_ptr = tb->tc.ptr;
3921 #ifdef TCG_TARGET_NEED_LDST_LABELS
3922 QSIMPLEQ_INIT(&s->ldst_labels);
3923 #endif
3924 #ifdef TCG_TARGET_NEED_POOL_LABELS
3925 s->pool_labels = NULL;
3926 #endif
3928 num_insns = -1;
3929 QTAILQ_FOREACH(op, &s->ops, link) {
3930 TCGOpcode opc = op->opc;
3932 #ifdef CONFIG_PROFILER
3933 atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3934 #endif
3936 switch (opc) {
3937 case INDEX_op_mov_i32:
3938 case INDEX_op_mov_i64:
3939 case INDEX_op_mov_vec:
3940 tcg_reg_alloc_mov(s, op);
3941 break;
3942 case INDEX_op_movi_i32:
3943 case INDEX_op_movi_i64:
3944 case INDEX_op_dupi_vec:
3945 tcg_reg_alloc_movi(s, op);
3946 break;
3947 case INDEX_op_insn_start:
3948 if (num_insns >= 0) {
3949 size_t off = tcg_current_code_size(s);
3950 s->gen_insn_end_off[num_insns] = off;
3951 /* Assert that we do not overflow our stored offset. */
3952 assert(s->gen_insn_end_off[num_insns] == off);
3954 num_insns++;
3955 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3956 target_ulong a;
3957 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3958 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3959 #else
3960 a = op->args[i];
3961 #endif
3962 s->gen_insn_data[num_insns][i] = a;
3964 break;
3965 case INDEX_op_discard:
3966 temp_dead(s, arg_temp(op->args[0]));
3967 break;
3968 case INDEX_op_set_label:
3969 tcg_reg_alloc_bb_end(s, s->reserved_regs);
3970 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3971 break;
3972 case INDEX_op_call:
3973 tcg_reg_alloc_call(s, op);
3974 break;
3975 default:
3976 /* Sanity check that we've not introduced any unhandled opcodes. */
3977 tcg_debug_assert(tcg_op_supported(opc));
3978 /* Note: in order to speed up the code, it would be much
3979 faster to have specialized register allocator functions for
3980 some common argument patterns */
3981 tcg_reg_alloc_op(s, op);
3982 break;
3984 #ifdef CONFIG_DEBUG_TCG
3985 check_regs(s);
3986 #endif
3987 /* Test for (pending) buffer overflow. The assumption is that any
3988 one operation beginning below the high water mark cannot overrun
3989 the buffer completely. Thus we can test for overflow after
3990 generating code without having to check during generation. */
3991 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3992 return -1;
3995 tcg_debug_assert(num_insns >= 0);
3996 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3998 /* Generate TB finalization at the end of block */
3999 #ifdef TCG_TARGET_NEED_LDST_LABELS
4000 if (!tcg_out_ldst_finalize(s)) {
4001 return -1;
4003 #endif
4004 #ifdef TCG_TARGET_NEED_POOL_LABELS
4005 if (!tcg_out_pool_finalize(s)) {
4006 return -1;
4008 #endif
4010 /* flush instruction cache */
4011 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4013 return tcg_current_code_size(s);
4016 #ifdef CONFIG_PROFILER
4017 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
4019 TCGProfile prof = {};
4020 const TCGProfile *s;
4021 int64_t tb_count;
4022 int64_t tb_div_count;
4023 int64_t tot;
4025 tcg_profile_snapshot_counters(&prof);
4026 s = &prof;
4027 tb_count = s->tb_count;
4028 tb_div_count = tb_count ? tb_count : 1;
4029 tot = s->interm_time + s->code_time;
4031 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4032 tot, tot / 2.4e9);
4033 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
4034 tb_count, s->tb_count1 - tb_count,
4035 (double)(s->tb_count1 - s->tb_count)
4036 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4037 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
4038 (double)s->op_count / tb_div_count, s->op_count_max);
4039 cpu_fprintf(f, "deleted ops/TB %0.2f\n",
4040 (double)s->del_op_count / tb_div_count);
4041 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
4042 (double)s->temp_count / tb_div_count, s->temp_count_max);
4043 cpu_fprintf(f, "avg host code/TB %0.1f\n",
4044 (double)s->code_out_len / tb_div_count);
4045 cpu_fprintf(f, "avg search data/TB %0.1f\n",
4046 (double)s->search_out_len / tb_div_count);
4048 cpu_fprintf(f, "cycles/op %0.1f\n",
4049 s->op_count ? (double)tot / s->op_count : 0);
4050 cpu_fprintf(f, "cycles/in byte %0.1f\n",
4051 s->code_in_len ? (double)tot / s->code_in_len : 0);
4052 cpu_fprintf(f, "cycles/out byte %0.1f\n",
4053 s->code_out_len ? (double)tot / s->code_out_len : 0);
4054 cpu_fprintf(f, "cycles/search byte %0.1f\n",
4055 s->search_out_len ? (double)tot / s->search_out_len : 0);
4056 if (tot == 0) {
4057 tot = 1;
4059 cpu_fprintf(f, " gen_interm time %0.1f%%\n",
4060 (double)s->interm_time / tot * 100.0);
4061 cpu_fprintf(f, " gen_code time %0.1f%%\n",
4062 (double)s->code_time / tot * 100.0);
4063 cpu_fprintf(f, "optim./code time %0.1f%%\n",
4064 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4065 * 100.0);
4066 cpu_fprintf(f, "liveness/code time %0.1f%%\n",
4067 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4068 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
4069 s->restore_count);
4070 cpu_fprintf(f, " avg cycles %0.1f\n",
4071 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4073 #else
4074 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
4076 cpu_fprintf(f, "[TCG profiler not compiled]\n");
4078 #endif
4080 #ifdef ELF_HOST_MACHINE
4081 /* In order to use this feature, the backend needs to do three things:
4083 (1) Define ELF_HOST_MACHINE to indicate both what value to
4084 put into the ELF image and to indicate support for the feature.
4086 (2) Define tcg_register_jit. This should create a buffer containing
4087 the contents of a .debug_frame section that describes the post-
4088 prologue unwind info for the tcg machine.
4090 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4093 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4094 typedef enum {
4095 JIT_NOACTION = 0,
4096 JIT_REGISTER_FN,
4097 JIT_UNREGISTER_FN
4098 } jit_actions_t;
4100 struct jit_code_entry {
4101 struct jit_code_entry *next_entry;
4102 struct jit_code_entry *prev_entry;
4103 const void *symfile_addr;
4104 uint64_t symfile_size;
4107 struct jit_descriptor {
4108 uint32_t version;
4109 uint32_t action_flag;
4110 struct jit_code_entry *relevant_entry;
4111 struct jit_code_entry *first_entry;
4114 void __jit_debug_register_code(void) __attribute__((noinline));
4115 void __jit_debug_register_code(void)
4117 asm("");
4120 /* Must statically initialize the version, because GDB may check
4121 the version before we can set it. */
4122 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4124 /* End GDB interface. */
4126 static int find_string(const char *strtab, const char *str)
4128 const char *p = strtab + 1;
4130 while (1) {
4131 if (strcmp(p, str) == 0) {
4132 return p - strtab;
4134 p += strlen(p) + 1;
4138 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4139 const void *debug_frame,
4140 size_t debug_frame_size)
4142 struct __attribute__((packed)) DebugInfo {
4143 uint32_t len;
4144 uint16_t version;
4145 uint32_t abbrev;
4146 uint8_t ptr_size;
4147 uint8_t cu_die;
4148 uint16_t cu_lang;
4149 uintptr_t cu_low_pc;
4150 uintptr_t cu_high_pc;
4151 uint8_t fn_die;
4152 char fn_name[16];
4153 uintptr_t fn_low_pc;
4154 uintptr_t fn_high_pc;
4155 uint8_t cu_eoc;
4158 struct ElfImage {
4159 ElfW(Ehdr) ehdr;
4160 ElfW(Phdr) phdr;
4161 ElfW(Shdr) shdr[7];
4162 ElfW(Sym) sym[2];
4163 struct DebugInfo di;
4164 uint8_t da[24];
4165 char str[80];
4168 struct ElfImage *img;
4170 static const struct ElfImage img_template = {
4171 .ehdr = {
4172 .e_ident[EI_MAG0] = ELFMAG0,
4173 .e_ident[EI_MAG1] = ELFMAG1,
4174 .e_ident[EI_MAG2] = ELFMAG2,
4175 .e_ident[EI_MAG3] = ELFMAG3,
4176 .e_ident[EI_CLASS] = ELF_CLASS,
4177 .e_ident[EI_DATA] = ELF_DATA,
4178 .e_ident[EI_VERSION] = EV_CURRENT,
4179 .e_type = ET_EXEC,
4180 .e_machine = ELF_HOST_MACHINE,
4181 .e_version = EV_CURRENT,
4182 .e_phoff = offsetof(struct ElfImage, phdr),
4183 .e_shoff = offsetof(struct ElfImage, shdr),
4184 .e_ehsize = sizeof(ElfW(Shdr)),
4185 .e_phentsize = sizeof(ElfW(Phdr)),
4186 .e_phnum = 1,
4187 .e_shentsize = sizeof(ElfW(Shdr)),
4188 .e_shnum = ARRAY_SIZE(img->shdr),
4189 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4190 #ifdef ELF_HOST_FLAGS
4191 .e_flags = ELF_HOST_FLAGS,
4192 #endif
4193 #ifdef ELF_OSABI
4194 .e_ident[EI_OSABI] = ELF_OSABI,
4195 #endif
4197 .phdr = {
4198 .p_type = PT_LOAD,
4199 .p_flags = PF_X,
4201 .shdr = {
4202 [0] = { .sh_type = SHT_NULL },
4203 /* Trick: The contents of code_gen_buffer are not present in
4204 this fake ELF file; that got allocated elsewhere. Therefore
4205 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4206 will not look for contents. We can record any address. */
4207 [1] = { /* .text */
4208 .sh_type = SHT_NOBITS,
4209 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4211 [2] = { /* .debug_info */
4212 .sh_type = SHT_PROGBITS,
4213 .sh_offset = offsetof(struct ElfImage, di),
4214 .sh_size = sizeof(struct DebugInfo),
4216 [3] = { /* .debug_abbrev */
4217 .sh_type = SHT_PROGBITS,
4218 .sh_offset = offsetof(struct ElfImage, da),
4219 .sh_size = sizeof(img->da),
4221 [4] = { /* .debug_frame */
4222 .sh_type = SHT_PROGBITS,
4223 .sh_offset = sizeof(struct ElfImage),
4225 [5] = { /* .symtab */
4226 .sh_type = SHT_SYMTAB,
4227 .sh_offset = offsetof(struct ElfImage, sym),
4228 .sh_size = sizeof(img->sym),
4229 .sh_info = 1,
4230 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4231 .sh_entsize = sizeof(ElfW(Sym)),
4233 [6] = { /* .strtab */
4234 .sh_type = SHT_STRTAB,
4235 .sh_offset = offsetof(struct ElfImage, str),
4236 .sh_size = sizeof(img->str),
4239 .sym = {
4240 [1] = { /* code_gen_buffer */
4241 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4242 .st_shndx = 1,
4245 .di = {
4246 .len = sizeof(struct DebugInfo) - 4,
4247 .version = 2,
4248 .ptr_size = sizeof(void *),
4249 .cu_die = 1,
4250 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
4251 .fn_die = 2,
4252 .fn_name = "code_gen_buffer"
4254 .da = {
4255 1, /* abbrev number (the cu) */
4256 0x11, 1, /* DW_TAG_compile_unit, has children */
4257 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4258 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4259 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4260 0, 0, /* end of abbrev */
4261 2, /* abbrev number (the fn) */
4262 0x2e, 0, /* DW_TAG_subprogram, no children */
4263 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4264 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4265 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4266 0, 0, /* end of abbrev */
4267 0 /* no more abbrev */
4269 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4270 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4273 /* We only need a single jit entry; statically allocate it. */
4274 static struct jit_code_entry one_entry;
4276 uintptr_t buf = (uintptr_t)buf_ptr;
4277 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4278 DebugFrameHeader *dfh;
4280 img = g_malloc(img_size);
4281 *img = img_template;
4283 img->phdr.p_vaddr = buf;
4284 img->phdr.p_paddr = buf;
4285 img->phdr.p_memsz = buf_size;
4287 img->shdr[1].sh_name = find_string(img->str, ".text");
4288 img->shdr[1].sh_addr = buf;
4289 img->shdr[1].sh_size = buf_size;
4291 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4292 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4294 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4295 img->shdr[4].sh_size = debug_frame_size;
4297 img->shdr[5].sh_name = find_string(img->str, ".symtab");
4298 img->shdr[6].sh_name = find_string(img->str, ".strtab");
4300 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4301 img->sym[1].st_value = buf;
4302 img->sym[1].st_size = buf_size;
4304 img->di.cu_low_pc = buf;
4305 img->di.cu_high_pc = buf + buf_size;
4306 img->di.fn_low_pc = buf;
4307 img->di.fn_high_pc = buf + buf_size;
4309 dfh = (DebugFrameHeader *)(img + 1);
4310 memcpy(dfh, debug_frame, debug_frame_size);
4311 dfh->fde.func_start = buf;
4312 dfh->fde.func_len = buf_size;
4314 #ifdef DEBUG_JIT
4315 /* Enable this block to be able to debug the ELF image file creation.
4316 One can use readelf, objdump, or other inspection utilities. */
4318 FILE *f = fopen("/tmp/qemu.jit", "w+b");
4319 if (f) {
4320 if (fwrite(img, img_size, 1, f) != img_size) {
4321 /* Avoid stupid unused return value warning for fwrite. */
4323 fclose(f);
4326 #endif
4328 one_entry.symfile_addr = img;
4329 one_entry.symfile_size = img_size;
4331 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4332 __jit_debug_descriptor.relevant_entry = &one_entry;
4333 __jit_debug_descriptor.first_entry = &one_entry;
4334 __jit_debug_register_code();
4336 #else
4337 /* No support for the feature. Provide the entry point expected by exec.c,
4338 and implement the internal function we declared earlier. */
4340 static void tcg_register_jit_int(void *buf, size_t size,
4341 const void *debug_frame,
4342 size_t debug_frame_size)
4346 void tcg_register_jit(void *buf, size_t buf_size)
4349 #endif /* ELF_HOST_MACHINE */
4351 #if !TCG_TARGET_MAYBE_vec
4352 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4354 g_assert_not_reached();
4356 #endif