Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
[qemu/ar7.git] / tcg / tcg.c
blobbe2c33c400cf0f9a479d2a6664035948281872e3
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40 CPU definitions. Currently they are used for qemu_ld/st
41 instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
45 #include "exec/cpu-common.h"
46 #include "exec/exec-all.h"
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
52 #include "tcg-op.h"
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS ELFCLASS32
56 #else
57 # define ELF_CLASS ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA ELFDATA2MSB
61 #else
62 # define ELF_DATA ELFDATA2LSB
63 #endif
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
69 /* Forward declarations for functions declared in tcg-target.inc.c and
70 used here. */
71 static void tcg_target_init(TCGContext *s);
72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
73 static void tcg_target_qemu_prologue(TCGContext *s);
74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
75 intptr_t value, intptr_t addend);
77 /* The CIE and FDE header definitions will be common to all hosts. */
78 typedef struct {
79 uint32_t len __attribute__((aligned((sizeof(void *)))));
80 uint32_t id;
81 uint8_t version;
82 char augmentation[1];
83 uint8_t code_align;
84 uint8_t data_align;
85 uint8_t return_column;
86 } DebugFrameCIE;
88 typedef struct QEMU_PACKED {
89 uint32_t len __attribute__((aligned((sizeof(void *)))));
90 uint32_t cie_offset;
91 uintptr_t func_start;
92 uintptr_t func_len;
93 } DebugFrameFDEHeader;
95 typedef struct QEMU_PACKED {
96 DebugFrameCIE cie;
97 DebugFrameFDEHeader fde;
98 } DebugFrameHeader;
100 static void tcg_register_jit_int(void *buf, size_t size,
101 const void *debug_frame,
102 size_t debug_frame_size)
103 __attribute__((unused));
105 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
106 static const char *target_parse_constraint(TCGArgConstraint *ct,
107 const char *ct_str, TCGType type);
108 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
109 intptr_t arg2);
110 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
111 static void tcg_out_movi(TCGContext *s, TCGType type,
112 TCGReg ret, tcg_target_long arg);
113 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
114 const int *const_args);
115 #if TCG_TARGET_MAYBE_vec
116 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
117 TCGReg dst, TCGReg src);
118 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
119 TCGReg dst, TCGReg base, intptr_t offset);
120 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
121 TCGReg dst, tcg_target_long arg);
122 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
123 unsigned vece, const TCGArg *args,
124 const int *const_args);
125 #else
126 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
127 TCGReg dst, TCGReg src)
129 g_assert_not_reached();
131 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
132 TCGReg dst, TCGReg base, intptr_t offset)
134 g_assert_not_reached();
136 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
137 TCGReg dst, tcg_target_long arg)
139 g_assert_not_reached();
141 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
142 unsigned vece, const TCGArg *args,
143 const int *const_args)
145 g_assert_not_reached();
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149 intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151 TCGReg base, intptr_t ofs);
152 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
153 static int tcg_target_const_match(tcg_target_long val, TCGType type,
154 const TCGArgConstraint *arg_ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
157 #endif
159 #define TCG_HIGHWATER 1024
161 static TCGContext **tcg_ctxs;
162 static unsigned int n_tcg_ctxs;
163 TCGv_env cpu_env = 0;
165 struct tcg_region_tree {
166 QemuMutex lock;
167 GTree *tree;
168 /* padding to avoid false sharing is computed at run-time */
172 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
173 * dynamically allocate from as demand dictates. Given appropriate region
174 * sizing, this minimizes flushes even when some TCG threads generate a lot
175 * more code than others.
177 struct tcg_region_state {
178 QemuMutex lock;
180 /* fields set at init time */
181 void *start;
182 void *start_aligned;
183 void *end;
184 size_t n;
185 size_t size; /* size of one region */
186 size_t stride; /* .size + guard size */
188 /* fields protected by the lock */
189 size_t current; /* current region index */
190 size_t agg_size_full; /* aggregate size of full regions */
193 static struct tcg_region_state region;
195 * This is an array of struct tcg_region_tree's, with padding.
196 * We use void * to simplify the computation of region_trees[i]; each
197 * struct is found every tree_size bytes.
199 static void *region_trees;
200 static size_t tree_size;
201 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
202 static TCGRegSet tcg_target_call_clobber_regs;
204 #if TCG_TARGET_INSN_UNIT_SIZE == 1
205 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
207 *s->code_ptr++ = v;
210 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
211 uint8_t v)
213 *p = v;
215 #endif
217 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
218 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
220 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
221 *s->code_ptr++ = v;
222 } else {
223 tcg_insn_unit *p = s->code_ptr;
224 memcpy(p, &v, sizeof(v));
225 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
229 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
230 uint16_t v)
232 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
233 *p = v;
234 } else {
235 memcpy(p, &v, sizeof(v));
238 #endif
240 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
241 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
243 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
244 *s->code_ptr++ = v;
245 } else {
246 tcg_insn_unit *p = s->code_ptr;
247 memcpy(p, &v, sizeof(v));
248 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
252 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
253 uint32_t v)
255 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
256 *p = v;
257 } else {
258 memcpy(p, &v, sizeof(v));
261 #endif
263 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
264 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
266 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
267 *s->code_ptr++ = v;
268 } else {
269 tcg_insn_unit *p = s->code_ptr;
270 memcpy(p, &v, sizeof(v));
271 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
275 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
276 uint64_t v)
278 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
279 *p = v;
280 } else {
281 memcpy(p, &v, sizeof(v));
284 #endif
286 /* label relocation processing */
288 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
289 TCGLabel *l, intptr_t addend)
291 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
293 r->type = type;
294 r->ptr = code_ptr;
295 r->addend = addend;
296 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
299 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
301 tcg_debug_assert(!l->has_value);
302 l->has_value = 1;
303 l->u.value_ptr = ptr;
306 TCGLabel *gen_new_label(void)
308 TCGContext *s = tcg_ctx;
309 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
311 memset(l, 0, sizeof(TCGLabel));
312 l->id = s->nb_labels++;
313 QSIMPLEQ_INIT(&l->relocs);
315 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
317 return l;
320 static bool tcg_resolve_relocs(TCGContext *s)
322 TCGLabel *l;
324 QSIMPLEQ_FOREACH(l, &s->labels, next) {
325 TCGRelocation *r;
326 uintptr_t value = l->u.value;
328 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
329 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
330 return false;
334 return true;
337 static void set_jmp_reset_offset(TCGContext *s, int which)
339 size_t off = tcg_current_code_size(s);
340 s->tb_jmp_reset_offset[which] = off;
341 /* Make sure that we didn't overflow the stored offset. */
342 assert(s->tb_jmp_reset_offset[which] == off);
345 #include "tcg-target.inc.c"
347 /* compare a pointer @ptr and a tb_tc @s */
348 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
350 if (ptr >= s->ptr + s->size) {
351 return 1;
352 } else if (ptr < s->ptr) {
353 return -1;
355 return 0;
358 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
360 const struct tb_tc *a = ap;
361 const struct tb_tc *b = bp;
364 * When both sizes are set, we know this isn't a lookup.
365 * This is the most likely case: every TB must be inserted; lookups
366 * are a lot less frequent.
368 if (likely(a->size && b->size)) {
369 if (a->ptr > b->ptr) {
370 return 1;
371 } else if (a->ptr < b->ptr) {
372 return -1;
374 /* a->ptr == b->ptr should happen only on deletions */
375 g_assert(a->size == b->size);
376 return 0;
379 * All lookups have either .size field set to 0.
380 * From the glib sources we see that @ap is always the lookup key. However
381 * the docs provide no guarantee, so we just mark this case as likely.
383 if (likely(a->size == 0)) {
384 return ptr_cmp_tb_tc(a->ptr, b);
386 return ptr_cmp_tb_tc(b->ptr, a);
389 static void tcg_region_trees_init(void)
391 size_t i;
393 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
394 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
395 for (i = 0; i < region.n; i++) {
396 struct tcg_region_tree *rt = region_trees + i * tree_size;
398 qemu_mutex_init(&rt->lock);
399 rt->tree = g_tree_new(tb_tc_cmp);
403 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
405 size_t region_idx;
407 if (p < region.start_aligned) {
408 region_idx = 0;
409 } else {
410 ptrdiff_t offset = p - region.start_aligned;
412 if (offset > region.stride * (region.n - 1)) {
413 region_idx = region.n - 1;
414 } else {
415 region_idx = offset / region.stride;
418 return region_trees + region_idx * tree_size;
421 void tcg_tb_insert(TranslationBlock *tb)
423 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
425 qemu_mutex_lock(&rt->lock);
426 g_tree_insert(rt->tree, &tb->tc, tb);
427 qemu_mutex_unlock(&rt->lock);
430 void tcg_tb_remove(TranslationBlock *tb)
432 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
434 qemu_mutex_lock(&rt->lock);
435 g_tree_remove(rt->tree, &tb->tc);
436 qemu_mutex_unlock(&rt->lock);
440 * Find the TB 'tb' such that
441 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
442 * Return NULL if not found.
444 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
446 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
447 TranslationBlock *tb;
448 struct tb_tc s = { .ptr = (void *)tc_ptr };
450 qemu_mutex_lock(&rt->lock);
451 tb = g_tree_lookup(rt->tree, &s);
452 qemu_mutex_unlock(&rt->lock);
453 return tb;
456 static void tcg_region_tree_lock_all(void)
458 size_t i;
460 for (i = 0; i < region.n; i++) {
461 struct tcg_region_tree *rt = region_trees + i * tree_size;
463 qemu_mutex_lock(&rt->lock);
467 static void tcg_region_tree_unlock_all(void)
469 size_t i;
471 for (i = 0; i < region.n; i++) {
472 struct tcg_region_tree *rt = region_trees + i * tree_size;
474 qemu_mutex_unlock(&rt->lock);
478 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
480 size_t i;
482 tcg_region_tree_lock_all();
483 for (i = 0; i < region.n; i++) {
484 struct tcg_region_tree *rt = region_trees + i * tree_size;
486 g_tree_foreach(rt->tree, func, user_data);
488 tcg_region_tree_unlock_all();
491 size_t tcg_nb_tbs(void)
493 size_t nb_tbs = 0;
494 size_t i;
496 tcg_region_tree_lock_all();
497 for (i = 0; i < region.n; i++) {
498 struct tcg_region_tree *rt = region_trees + i * tree_size;
500 nb_tbs += g_tree_nnodes(rt->tree);
502 tcg_region_tree_unlock_all();
503 return nb_tbs;
506 static void tcg_region_tree_reset_all(void)
508 size_t i;
510 tcg_region_tree_lock_all();
511 for (i = 0; i < region.n; i++) {
512 struct tcg_region_tree *rt = region_trees + i * tree_size;
514 /* Increment the refcount first so that destroy acts as a reset */
515 g_tree_ref(rt->tree);
516 g_tree_destroy(rt->tree);
518 tcg_region_tree_unlock_all();
521 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
523 void *start, *end;
525 start = region.start_aligned + curr_region * region.stride;
526 end = start + region.size;
528 if (curr_region == 0) {
529 start = region.start;
531 if (curr_region == region.n - 1) {
532 end = region.end;
535 *pstart = start;
536 *pend = end;
539 static void tcg_region_assign(TCGContext *s, size_t curr_region)
541 void *start, *end;
543 tcg_region_bounds(curr_region, &start, &end);
545 s->code_gen_buffer = start;
546 s->code_gen_ptr = start;
547 s->code_gen_buffer_size = end - start;
548 s->code_gen_highwater = end - TCG_HIGHWATER;
551 static bool tcg_region_alloc__locked(TCGContext *s)
553 if (region.current == region.n) {
554 return true;
556 tcg_region_assign(s, region.current);
557 region.current++;
558 return false;
562 * Request a new region once the one in use has filled up.
563 * Returns true on error.
565 static bool tcg_region_alloc(TCGContext *s)
567 bool err;
568 /* read the region size now; alloc__locked will overwrite it on success */
569 size_t size_full = s->code_gen_buffer_size;
571 qemu_mutex_lock(&region.lock);
572 err = tcg_region_alloc__locked(s);
573 if (!err) {
574 region.agg_size_full += size_full - TCG_HIGHWATER;
576 qemu_mutex_unlock(&region.lock);
577 return err;
581 * Perform a context's first region allocation.
582 * This function does _not_ increment region.agg_size_full.
584 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
586 return tcg_region_alloc__locked(s);
589 /* Call from a safe-work context */
590 void tcg_region_reset_all(void)
592 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
593 unsigned int i;
595 qemu_mutex_lock(&region.lock);
596 region.current = 0;
597 region.agg_size_full = 0;
599 for (i = 0; i < n_ctxs; i++) {
600 TCGContext *s = atomic_read(&tcg_ctxs[i]);
601 bool err = tcg_region_initial_alloc__locked(s);
603 g_assert(!err);
605 qemu_mutex_unlock(&region.lock);
607 tcg_region_tree_reset_all();
610 #ifdef CONFIG_USER_ONLY
611 static size_t tcg_n_regions(void)
613 return 1;
615 #else
617 * It is likely that some vCPUs will translate more code than others, so we
618 * first try to set more regions than max_cpus, with those regions being of
619 * reasonable size. If that's not possible we make do by evenly dividing
620 * the code_gen_buffer among the vCPUs.
622 static size_t tcg_n_regions(void)
624 size_t i;
626 /* Use a single region if all we have is one vCPU thread */
627 #if !defined(CONFIG_USER_ONLY)
628 MachineState *ms = MACHINE(qdev_get_machine());
629 unsigned int max_cpus = ms->smp.max_cpus;
630 #endif
631 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
632 return 1;
635 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
636 for (i = 8; i > 0; i--) {
637 size_t regions_per_thread = i;
638 size_t region_size;
640 region_size = tcg_init_ctx.code_gen_buffer_size;
641 region_size /= max_cpus * regions_per_thread;
643 if (region_size >= 2 * 1024u * 1024) {
644 return max_cpus * regions_per_thread;
647 /* If we can't, then just allocate one region per vCPU thread */
648 return max_cpus;
650 #endif
653 * Initializes region partitioning.
655 * Called at init time from the parent thread (i.e. the one calling
656 * tcg_context_init), after the target's TCG globals have been set.
658 * Region partitioning works by splitting code_gen_buffer into separate regions,
659 * and then assigning regions to TCG threads so that the threads can translate
660 * code in parallel without synchronization.
662 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
663 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
664 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
665 * must have been parsed before calling this function, since it calls
666 * qemu_tcg_mttcg_enabled().
668 * In user-mode we use a single region. Having multiple regions in user-mode
669 * is not supported, because the number of vCPU threads (recall that each thread
670 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
671 * OS, and usually this number is huge (tens of thousands is not uncommon).
672 * Thus, given this large bound on the number of vCPU threads and the fact
673 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
674 * that the availability of at least one region per vCPU thread.
676 * However, this user-mode limitation is unlikely to be a significant problem
677 * in practice. Multi-threaded guests share most if not all of their translated
678 * code, which makes parallel code generation less appealing than in softmmu.
680 void tcg_region_init(void)
682 void *buf = tcg_init_ctx.code_gen_buffer;
683 void *aligned;
684 size_t size = tcg_init_ctx.code_gen_buffer_size;
685 size_t page_size = qemu_real_host_page_size;
686 size_t region_size;
687 size_t n_regions;
688 size_t i;
690 n_regions = tcg_n_regions();
692 /* The first region will be 'aligned - buf' bytes larger than the others */
693 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
694 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
696 * Make region_size a multiple of page_size, using aligned as the start.
697 * As a result of this we might end up with a few extra pages at the end of
698 * the buffer; we will assign those to the last region.
700 region_size = (size - (aligned - buf)) / n_regions;
701 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
703 /* A region must have at least 2 pages; one code, one guard */
704 g_assert(region_size >= 2 * page_size);
706 /* init the region struct */
707 qemu_mutex_init(&region.lock);
708 region.n = n_regions;
709 region.size = region_size - page_size;
710 region.stride = region_size;
711 region.start = buf;
712 region.start_aligned = aligned;
713 /* page-align the end, since its last page will be a guard page */
714 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
715 /* account for that last guard page */
716 region.end -= page_size;
718 /* set guard pages */
719 for (i = 0; i < region.n; i++) {
720 void *start, *end;
721 int rc;
723 tcg_region_bounds(i, &start, &end);
724 rc = qemu_mprotect_none(end, page_size);
725 g_assert(!rc);
728 tcg_region_trees_init();
730 /* In user-mode we support only one ctx, so do the initial allocation now */
731 #ifdef CONFIG_USER_ONLY
733 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
735 g_assert(!err);
737 #endif
741 * All TCG threads except the parent (i.e. the one that called tcg_context_init
742 * and registered the target's TCG globals) must register with this function
743 * before initiating translation.
745 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
746 * of tcg_region_init() for the reasoning behind this.
748 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
749 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
750 * is not used anymore for translation once this function is called.
752 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
753 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
755 #ifdef CONFIG_USER_ONLY
756 void tcg_register_thread(void)
758 tcg_ctx = &tcg_init_ctx;
760 #else
761 void tcg_register_thread(void)
763 MachineState *ms = MACHINE(qdev_get_machine());
764 TCGContext *s = g_malloc(sizeof(*s));
765 unsigned int i, n;
766 bool err;
768 *s = tcg_init_ctx;
770 /* Relink mem_base. */
771 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
772 if (tcg_init_ctx.temps[i].mem_base) {
773 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
774 tcg_debug_assert(b >= 0 && b < n);
775 s->temps[i].mem_base = &s->temps[b];
779 /* Claim an entry in tcg_ctxs */
780 n = atomic_fetch_inc(&n_tcg_ctxs);
781 g_assert(n < ms->smp.max_cpus);
782 atomic_set(&tcg_ctxs[n], s);
784 tcg_ctx = s;
785 qemu_mutex_lock(&region.lock);
786 err = tcg_region_initial_alloc__locked(tcg_ctx);
787 g_assert(!err);
788 qemu_mutex_unlock(&region.lock);
790 #endif /* !CONFIG_USER_ONLY */
793 * Returns the size (in bytes) of all translated code (i.e. from all regions)
794 * currently in the cache.
795 * See also: tcg_code_capacity()
796 * Do not confuse with tcg_current_code_size(); that one applies to a single
797 * TCG context.
799 size_t tcg_code_size(void)
801 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
802 unsigned int i;
803 size_t total;
805 qemu_mutex_lock(&region.lock);
806 total = region.agg_size_full;
807 for (i = 0; i < n_ctxs; i++) {
808 const TCGContext *s = atomic_read(&tcg_ctxs[i]);
809 size_t size;
811 size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
812 g_assert(size <= s->code_gen_buffer_size);
813 total += size;
815 qemu_mutex_unlock(&region.lock);
816 return total;
820 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
821 * regions.
822 * See also: tcg_code_size()
824 size_t tcg_code_capacity(void)
826 size_t guard_size, capacity;
828 /* no need for synchronization; these variables are set at init time */
829 guard_size = region.stride - region.size;
830 capacity = region.end + guard_size - region.start;
831 capacity -= region.n * (guard_size + TCG_HIGHWATER);
832 return capacity;
835 size_t tcg_tb_phys_invalidate_count(void)
837 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
838 unsigned int i;
839 size_t total = 0;
841 for (i = 0; i < n_ctxs; i++) {
842 const TCGContext *s = atomic_read(&tcg_ctxs[i]);
844 total += atomic_read(&s->tb_phys_invalidate_count);
846 return total;
849 /* pool based memory allocation */
850 void *tcg_malloc_internal(TCGContext *s, int size)
852 TCGPool *p;
853 int pool_size;
855 if (size > TCG_POOL_CHUNK_SIZE) {
856 /* big malloc: insert a new pool (XXX: could optimize) */
857 p = g_malloc(sizeof(TCGPool) + size);
858 p->size = size;
859 p->next = s->pool_first_large;
860 s->pool_first_large = p;
861 return p->data;
862 } else {
863 p = s->pool_current;
864 if (!p) {
865 p = s->pool_first;
866 if (!p)
867 goto new_pool;
868 } else {
869 if (!p->next) {
870 new_pool:
871 pool_size = TCG_POOL_CHUNK_SIZE;
872 p = g_malloc(sizeof(TCGPool) + pool_size);
873 p->size = pool_size;
874 p->next = NULL;
875 if (s->pool_current)
876 s->pool_current->next = p;
877 else
878 s->pool_first = p;
879 } else {
880 p = p->next;
884 s->pool_current = p;
885 s->pool_cur = p->data + size;
886 s->pool_end = p->data + p->size;
887 return p->data;
890 void tcg_pool_reset(TCGContext *s)
892 TCGPool *p, *t;
893 for (p = s->pool_first_large; p; p = t) {
894 t = p->next;
895 g_free(p);
897 s->pool_first_large = NULL;
898 s->pool_cur = s->pool_end = NULL;
899 s->pool_current = NULL;
902 typedef struct TCGHelperInfo {
903 void *func;
904 const char *name;
905 unsigned flags;
906 unsigned sizemask;
907 } TCGHelperInfo;
909 #include "exec/helper-proto.h"
911 static const TCGHelperInfo all_helpers[] = {
912 #include "exec/helper-tcg.h"
914 static GHashTable *helper_table;
916 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
917 static void process_op_defs(TCGContext *s);
918 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
919 TCGReg reg, const char *name);
921 void tcg_context_init(TCGContext *s)
923 int op, total_args, n, i;
924 TCGOpDef *def;
925 TCGArgConstraint *args_ct;
926 int *sorted_args;
927 TCGTemp *ts;
929 memset(s, 0, sizeof(*s));
930 s->nb_globals = 0;
932 /* Count total number of arguments and allocate the corresponding
933 space */
934 total_args = 0;
935 for(op = 0; op < NB_OPS; op++) {
936 def = &tcg_op_defs[op];
937 n = def->nb_iargs + def->nb_oargs;
938 total_args += n;
941 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
942 sorted_args = g_malloc(sizeof(int) * total_args);
944 for(op = 0; op < NB_OPS; op++) {
945 def = &tcg_op_defs[op];
946 def->args_ct = args_ct;
947 def->sorted_args = sorted_args;
948 n = def->nb_iargs + def->nb_oargs;
949 sorted_args += n;
950 args_ct += n;
953 /* Register helpers. */
954 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
955 helper_table = g_hash_table_new(NULL, NULL);
957 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
958 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
959 (gpointer)&all_helpers[i]);
962 tcg_target_init(s);
963 process_op_defs(s);
965 /* Reverse the order of the saved registers, assuming they're all at
966 the start of tcg_target_reg_alloc_order. */
967 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
968 int r = tcg_target_reg_alloc_order[n];
969 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
970 break;
973 for (i = 0; i < n; ++i) {
974 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
976 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
977 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
980 tcg_ctx = s;
982 * In user-mode we simply share the init context among threads, since we
983 * use a single region. See the documentation tcg_region_init() for the
984 * reasoning behind this.
985 * In softmmu we will have at most max_cpus TCG threads.
987 #ifdef CONFIG_USER_ONLY
988 tcg_ctxs = &tcg_ctx;
989 n_tcg_ctxs = 1;
990 #else
991 MachineState *ms = MACHINE(qdev_get_machine());
992 unsigned int max_cpus = ms->smp.max_cpus;
993 tcg_ctxs = g_new(TCGContext *, max_cpus);
994 #endif
996 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
997 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
998 cpu_env = temp_tcgv_ptr(ts);
1002 * Allocate TBs right before their corresponding translated code, making
1003 * sure that TBs and code are on different cache lines.
1005 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1007 uintptr_t align = qemu_icache_linesize;
1008 TranslationBlock *tb;
1009 void *next;
1011 retry:
1012 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1013 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1015 if (unlikely(next > s->code_gen_highwater)) {
1016 if (tcg_region_alloc(s)) {
1017 return NULL;
1019 goto retry;
1021 atomic_set(&s->code_gen_ptr, next);
1022 s->data_gen_ptr = NULL;
1023 return tb;
1026 void tcg_prologue_init(TCGContext *s)
1028 size_t prologue_size, total_size;
1029 void *buf0, *buf1;
1031 /* Put the prologue at the beginning of code_gen_buffer. */
1032 buf0 = s->code_gen_buffer;
1033 total_size = s->code_gen_buffer_size;
1034 s->code_ptr = buf0;
1035 s->code_buf = buf0;
1036 s->data_gen_ptr = NULL;
1037 s->code_gen_prologue = buf0;
1039 /* Compute a high-water mark, at which we voluntarily flush the buffer
1040 and start over. The size here is arbitrary, significantly larger
1041 than we expect the code generation for any one opcode to require. */
1042 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1044 #ifdef TCG_TARGET_NEED_POOL_LABELS
1045 s->pool_labels = NULL;
1046 #endif
1048 /* Generate the prologue. */
1049 tcg_target_qemu_prologue(s);
1051 #ifdef TCG_TARGET_NEED_POOL_LABELS
1052 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1054 int result = tcg_out_pool_finalize(s);
1055 tcg_debug_assert(result == 0);
1057 #endif
1059 buf1 = s->code_ptr;
1060 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1062 /* Deduct the prologue from the buffer. */
1063 prologue_size = tcg_current_code_size(s);
1064 s->code_gen_ptr = buf1;
1065 s->code_gen_buffer = buf1;
1066 s->code_buf = buf1;
1067 total_size -= prologue_size;
1068 s->code_gen_buffer_size = total_size;
1070 tcg_register_jit(s->code_gen_buffer, total_size);
1072 #ifdef DEBUG_DISAS
1073 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1074 qemu_log_lock();
1075 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1076 if (s->data_gen_ptr) {
1077 size_t code_size = s->data_gen_ptr - buf0;
1078 size_t data_size = prologue_size - code_size;
1079 size_t i;
1081 log_disas(buf0, code_size);
1083 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1084 if (sizeof(tcg_target_ulong) == 8) {
1085 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1086 (uintptr_t)s->data_gen_ptr + i,
1087 *(uint64_t *)(s->data_gen_ptr + i));
1088 } else {
1089 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
1090 (uintptr_t)s->data_gen_ptr + i,
1091 *(uint32_t *)(s->data_gen_ptr + i));
1094 } else {
1095 log_disas(buf0, prologue_size);
1097 qemu_log("\n");
1098 qemu_log_flush();
1099 qemu_log_unlock();
1101 #endif
1103 /* Assert that goto_ptr is implemented completely. */
1104 if (TCG_TARGET_HAS_goto_ptr) {
1105 tcg_debug_assert(s->code_gen_epilogue != NULL);
1109 void tcg_func_start(TCGContext *s)
1111 tcg_pool_reset(s);
1112 s->nb_temps = s->nb_globals;
1114 /* No temps have been previously allocated for size or locality. */
1115 memset(s->free_temps, 0, sizeof(s->free_temps));
1117 s->nb_ops = 0;
1118 s->nb_labels = 0;
1119 s->current_frame_offset = s->frame_start;
1121 #ifdef CONFIG_DEBUG_TCG
1122 s->goto_tb_issue_mask = 0;
1123 #endif
1125 QTAILQ_INIT(&s->ops);
1126 QTAILQ_INIT(&s->free_ops);
1127 QSIMPLEQ_INIT(&s->labels);
1130 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1132 int n = s->nb_temps++;
1133 tcg_debug_assert(n < TCG_MAX_TEMPS);
1134 return memset(&s->temps[n], 0, sizeof(TCGTemp));
1137 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1139 TCGTemp *ts;
1141 tcg_debug_assert(s->nb_globals == s->nb_temps);
1142 s->nb_globals++;
1143 ts = tcg_temp_alloc(s);
1144 ts->temp_global = 1;
1146 return ts;
1149 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1150 TCGReg reg, const char *name)
1152 TCGTemp *ts;
1154 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1155 tcg_abort();
1158 ts = tcg_global_alloc(s);
1159 ts->base_type = type;
1160 ts->type = type;
1161 ts->fixed_reg = 1;
1162 ts->reg = reg;
1163 ts->name = name;
1164 tcg_regset_set_reg(s->reserved_regs, reg);
1166 return ts;
1169 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1171 s->frame_start = start;
1172 s->frame_end = start + size;
1173 s->frame_temp
1174 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1177 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1178 intptr_t offset, const char *name)
1180 TCGContext *s = tcg_ctx;
1181 TCGTemp *base_ts = tcgv_ptr_temp(base);
1182 TCGTemp *ts = tcg_global_alloc(s);
1183 int indirect_reg = 0, bigendian = 0;
1184 #ifdef HOST_WORDS_BIGENDIAN
1185 bigendian = 1;
1186 #endif
1188 if (!base_ts->fixed_reg) {
1189 /* We do not support double-indirect registers. */
1190 tcg_debug_assert(!base_ts->indirect_reg);
1191 base_ts->indirect_base = 1;
1192 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1193 ? 2 : 1);
1194 indirect_reg = 1;
1197 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1198 TCGTemp *ts2 = tcg_global_alloc(s);
1199 char buf[64];
1201 ts->base_type = TCG_TYPE_I64;
1202 ts->type = TCG_TYPE_I32;
1203 ts->indirect_reg = indirect_reg;
1204 ts->mem_allocated = 1;
1205 ts->mem_base = base_ts;
1206 ts->mem_offset = offset + bigendian * 4;
1207 pstrcpy(buf, sizeof(buf), name);
1208 pstrcat(buf, sizeof(buf), "_0");
1209 ts->name = strdup(buf);
1211 tcg_debug_assert(ts2 == ts + 1);
1212 ts2->base_type = TCG_TYPE_I64;
1213 ts2->type = TCG_TYPE_I32;
1214 ts2->indirect_reg = indirect_reg;
1215 ts2->mem_allocated = 1;
1216 ts2->mem_base = base_ts;
1217 ts2->mem_offset = offset + (1 - bigendian) * 4;
1218 pstrcpy(buf, sizeof(buf), name);
1219 pstrcat(buf, sizeof(buf), "_1");
1220 ts2->name = strdup(buf);
1221 } else {
1222 ts->base_type = type;
1223 ts->type = type;
1224 ts->indirect_reg = indirect_reg;
1225 ts->mem_allocated = 1;
1226 ts->mem_base = base_ts;
1227 ts->mem_offset = offset;
1228 ts->name = name;
1230 return ts;
1233 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1235 TCGContext *s = tcg_ctx;
1236 TCGTemp *ts;
1237 int idx, k;
1239 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1240 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1241 if (idx < TCG_MAX_TEMPS) {
1242 /* There is already an available temp with the right type. */
1243 clear_bit(idx, s->free_temps[k].l);
1245 ts = &s->temps[idx];
1246 ts->temp_allocated = 1;
1247 tcg_debug_assert(ts->base_type == type);
1248 tcg_debug_assert(ts->temp_local == temp_local);
1249 } else {
1250 ts = tcg_temp_alloc(s);
1251 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1252 TCGTemp *ts2 = tcg_temp_alloc(s);
1254 ts->base_type = type;
1255 ts->type = TCG_TYPE_I32;
1256 ts->temp_allocated = 1;
1257 ts->temp_local = temp_local;
1259 tcg_debug_assert(ts2 == ts + 1);
1260 ts2->base_type = TCG_TYPE_I64;
1261 ts2->type = TCG_TYPE_I32;
1262 ts2->temp_allocated = 1;
1263 ts2->temp_local = temp_local;
1264 } else {
1265 ts->base_type = type;
1266 ts->type = type;
1267 ts->temp_allocated = 1;
1268 ts->temp_local = temp_local;
1272 #if defined(CONFIG_DEBUG_TCG)
1273 s->temps_in_use++;
1274 #endif
1275 return ts;
1278 TCGv_vec tcg_temp_new_vec(TCGType type)
1280 TCGTemp *t;
1282 #ifdef CONFIG_DEBUG_TCG
1283 switch (type) {
1284 case TCG_TYPE_V64:
1285 assert(TCG_TARGET_HAS_v64);
1286 break;
1287 case TCG_TYPE_V128:
1288 assert(TCG_TARGET_HAS_v128);
1289 break;
1290 case TCG_TYPE_V256:
1291 assert(TCG_TARGET_HAS_v256);
1292 break;
1293 default:
1294 g_assert_not_reached();
1296 #endif
1298 t = tcg_temp_new_internal(type, 0);
1299 return temp_tcgv_vec(t);
1302 /* Create a new temp of the same type as an existing temp. */
1303 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1305 TCGTemp *t = tcgv_vec_temp(match);
1307 tcg_debug_assert(t->temp_allocated != 0);
1309 t = tcg_temp_new_internal(t->base_type, 0);
1310 return temp_tcgv_vec(t);
1313 void tcg_temp_free_internal(TCGTemp *ts)
1315 TCGContext *s = tcg_ctx;
1316 int k, idx;
1318 #if defined(CONFIG_DEBUG_TCG)
1319 s->temps_in_use--;
1320 if (s->temps_in_use < 0) {
1321 fprintf(stderr, "More temporaries freed than allocated!\n");
1323 #endif
1325 tcg_debug_assert(ts->temp_global == 0);
1326 tcg_debug_assert(ts->temp_allocated != 0);
1327 ts->temp_allocated = 0;
1329 idx = temp_idx(ts);
1330 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1331 set_bit(idx, s->free_temps[k].l);
1334 TCGv_i32 tcg_const_i32(int32_t val)
1336 TCGv_i32 t0;
1337 t0 = tcg_temp_new_i32();
1338 tcg_gen_movi_i32(t0, val);
1339 return t0;
1342 TCGv_i64 tcg_const_i64(int64_t val)
1344 TCGv_i64 t0;
1345 t0 = tcg_temp_new_i64();
1346 tcg_gen_movi_i64(t0, val);
1347 return t0;
1350 TCGv_i32 tcg_const_local_i32(int32_t val)
1352 TCGv_i32 t0;
1353 t0 = tcg_temp_local_new_i32();
1354 tcg_gen_movi_i32(t0, val);
1355 return t0;
1358 TCGv_i64 tcg_const_local_i64(int64_t val)
1360 TCGv_i64 t0;
1361 t0 = tcg_temp_local_new_i64();
1362 tcg_gen_movi_i64(t0, val);
1363 return t0;
1366 #if defined(CONFIG_DEBUG_TCG)
1367 void tcg_clear_temp_count(void)
1369 TCGContext *s = tcg_ctx;
1370 s->temps_in_use = 0;
1373 int tcg_check_temp_count(void)
1375 TCGContext *s = tcg_ctx;
1376 if (s->temps_in_use) {
1377 /* Clear the count so that we don't give another
1378 * warning immediately next time around.
1380 s->temps_in_use = 0;
1381 return 1;
1383 return 0;
1385 #endif
1387 /* Return true if OP may appear in the opcode stream.
1388 Test the runtime variable that controls each opcode. */
1389 bool tcg_op_supported(TCGOpcode op)
1391 const bool have_vec
1392 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1394 switch (op) {
1395 case INDEX_op_discard:
1396 case INDEX_op_set_label:
1397 case INDEX_op_call:
1398 case INDEX_op_br:
1399 case INDEX_op_mb:
1400 case INDEX_op_insn_start:
1401 case INDEX_op_exit_tb:
1402 case INDEX_op_goto_tb:
1403 case INDEX_op_qemu_ld_i32:
1404 case INDEX_op_qemu_st_i32:
1405 case INDEX_op_qemu_ld_i64:
1406 case INDEX_op_qemu_st_i64:
1407 return true;
1409 case INDEX_op_goto_ptr:
1410 return TCG_TARGET_HAS_goto_ptr;
1412 case INDEX_op_mov_i32:
1413 case INDEX_op_movi_i32:
1414 case INDEX_op_setcond_i32:
1415 case INDEX_op_brcond_i32:
1416 case INDEX_op_ld8u_i32:
1417 case INDEX_op_ld8s_i32:
1418 case INDEX_op_ld16u_i32:
1419 case INDEX_op_ld16s_i32:
1420 case INDEX_op_ld_i32:
1421 case INDEX_op_st8_i32:
1422 case INDEX_op_st16_i32:
1423 case INDEX_op_st_i32:
1424 case INDEX_op_add_i32:
1425 case INDEX_op_sub_i32:
1426 case INDEX_op_mul_i32:
1427 case INDEX_op_and_i32:
1428 case INDEX_op_or_i32:
1429 case INDEX_op_xor_i32:
1430 case INDEX_op_shl_i32:
1431 case INDEX_op_shr_i32:
1432 case INDEX_op_sar_i32:
1433 return true;
1435 case INDEX_op_movcond_i32:
1436 return TCG_TARGET_HAS_movcond_i32;
1437 case INDEX_op_div_i32:
1438 case INDEX_op_divu_i32:
1439 return TCG_TARGET_HAS_div_i32;
1440 case INDEX_op_rem_i32:
1441 case INDEX_op_remu_i32:
1442 return TCG_TARGET_HAS_rem_i32;
1443 case INDEX_op_div2_i32:
1444 case INDEX_op_divu2_i32:
1445 return TCG_TARGET_HAS_div2_i32;
1446 case INDEX_op_rotl_i32:
1447 case INDEX_op_rotr_i32:
1448 return TCG_TARGET_HAS_rot_i32;
1449 case INDEX_op_deposit_i32:
1450 return TCG_TARGET_HAS_deposit_i32;
1451 case INDEX_op_extract_i32:
1452 return TCG_TARGET_HAS_extract_i32;
1453 case INDEX_op_sextract_i32:
1454 return TCG_TARGET_HAS_sextract_i32;
1455 case INDEX_op_extract2_i32:
1456 return TCG_TARGET_HAS_extract2_i32;
1457 case INDEX_op_add2_i32:
1458 return TCG_TARGET_HAS_add2_i32;
1459 case INDEX_op_sub2_i32:
1460 return TCG_TARGET_HAS_sub2_i32;
1461 case INDEX_op_mulu2_i32:
1462 return TCG_TARGET_HAS_mulu2_i32;
1463 case INDEX_op_muls2_i32:
1464 return TCG_TARGET_HAS_muls2_i32;
1465 case INDEX_op_muluh_i32:
1466 return TCG_TARGET_HAS_muluh_i32;
1467 case INDEX_op_mulsh_i32:
1468 return TCG_TARGET_HAS_mulsh_i32;
1469 case INDEX_op_ext8s_i32:
1470 return TCG_TARGET_HAS_ext8s_i32;
1471 case INDEX_op_ext16s_i32:
1472 return TCG_TARGET_HAS_ext16s_i32;
1473 case INDEX_op_ext8u_i32:
1474 return TCG_TARGET_HAS_ext8u_i32;
1475 case INDEX_op_ext16u_i32:
1476 return TCG_TARGET_HAS_ext16u_i32;
1477 case INDEX_op_bswap16_i32:
1478 return TCG_TARGET_HAS_bswap16_i32;
1479 case INDEX_op_bswap32_i32:
1480 return TCG_TARGET_HAS_bswap32_i32;
1481 case INDEX_op_not_i32:
1482 return TCG_TARGET_HAS_not_i32;
1483 case INDEX_op_neg_i32:
1484 return TCG_TARGET_HAS_neg_i32;
1485 case INDEX_op_andc_i32:
1486 return TCG_TARGET_HAS_andc_i32;
1487 case INDEX_op_orc_i32:
1488 return TCG_TARGET_HAS_orc_i32;
1489 case INDEX_op_eqv_i32:
1490 return TCG_TARGET_HAS_eqv_i32;
1491 case INDEX_op_nand_i32:
1492 return TCG_TARGET_HAS_nand_i32;
1493 case INDEX_op_nor_i32:
1494 return TCG_TARGET_HAS_nor_i32;
1495 case INDEX_op_clz_i32:
1496 return TCG_TARGET_HAS_clz_i32;
1497 case INDEX_op_ctz_i32:
1498 return TCG_TARGET_HAS_ctz_i32;
1499 case INDEX_op_ctpop_i32:
1500 return TCG_TARGET_HAS_ctpop_i32;
1502 case INDEX_op_brcond2_i32:
1503 case INDEX_op_setcond2_i32:
1504 return TCG_TARGET_REG_BITS == 32;
1506 case INDEX_op_mov_i64:
1507 case INDEX_op_movi_i64:
1508 case INDEX_op_setcond_i64:
1509 case INDEX_op_brcond_i64:
1510 case INDEX_op_ld8u_i64:
1511 case INDEX_op_ld8s_i64:
1512 case INDEX_op_ld16u_i64:
1513 case INDEX_op_ld16s_i64:
1514 case INDEX_op_ld32u_i64:
1515 case INDEX_op_ld32s_i64:
1516 case INDEX_op_ld_i64:
1517 case INDEX_op_st8_i64:
1518 case INDEX_op_st16_i64:
1519 case INDEX_op_st32_i64:
1520 case INDEX_op_st_i64:
1521 case INDEX_op_add_i64:
1522 case INDEX_op_sub_i64:
1523 case INDEX_op_mul_i64:
1524 case INDEX_op_and_i64:
1525 case INDEX_op_or_i64:
1526 case INDEX_op_xor_i64:
1527 case INDEX_op_shl_i64:
1528 case INDEX_op_shr_i64:
1529 case INDEX_op_sar_i64:
1530 case INDEX_op_ext_i32_i64:
1531 case INDEX_op_extu_i32_i64:
1532 return TCG_TARGET_REG_BITS == 64;
1534 case INDEX_op_movcond_i64:
1535 return TCG_TARGET_HAS_movcond_i64;
1536 case INDEX_op_div_i64:
1537 case INDEX_op_divu_i64:
1538 return TCG_TARGET_HAS_div_i64;
1539 case INDEX_op_rem_i64:
1540 case INDEX_op_remu_i64:
1541 return TCG_TARGET_HAS_rem_i64;
1542 case INDEX_op_div2_i64:
1543 case INDEX_op_divu2_i64:
1544 return TCG_TARGET_HAS_div2_i64;
1545 case INDEX_op_rotl_i64:
1546 case INDEX_op_rotr_i64:
1547 return TCG_TARGET_HAS_rot_i64;
1548 case INDEX_op_deposit_i64:
1549 return TCG_TARGET_HAS_deposit_i64;
1550 case INDEX_op_extract_i64:
1551 return TCG_TARGET_HAS_extract_i64;
1552 case INDEX_op_sextract_i64:
1553 return TCG_TARGET_HAS_sextract_i64;
1554 case INDEX_op_extract2_i64:
1555 return TCG_TARGET_HAS_extract2_i64;
1556 case INDEX_op_extrl_i64_i32:
1557 return TCG_TARGET_HAS_extrl_i64_i32;
1558 case INDEX_op_extrh_i64_i32:
1559 return TCG_TARGET_HAS_extrh_i64_i32;
1560 case INDEX_op_ext8s_i64:
1561 return TCG_TARGET_HAS_ext8s_i64;
1562 case INDEX_op_ext16s_i64:
1563 return TCG_TARGET_HAS_ext16s_i64;
1564 case INDEX_op_ext32s_i64:
1565 return TCG_TARGET_HAS_ext32s_i64;
1566 case INDEX_op_ext8u_i64:
1567 return TCG_TARGET_HAS_ext8u_i64;
1568 case INDEX_op_ext16u_i64:
1569 return TCG_TARGET_HAS_ext16u_i64;
1570 case INDEX_op_ext32u_i64:
1571 return TCG_TARGET_HAS_ext32u_i64;
1572 case INDEX_op_bswap16_i64:
1573 return TCG_TARGET_HAS_bswap16_i64;
1574 case INDEX_op_bswap32_i64:
1575 return TCG_TARGET_HAS_bswap32_i64;
1576 case INDEX_op_bswap64_i64:
1577 return TCG_TARGET_HAS_bswap64_i64;
1578 case INDEX_op_not_i64:
1579 return TCG_TARGET_HAS_not_i64;
1580 case INDEX_op_neg_i64:
1581 return TCG_TARGET_HAS_neg_i64;
1582 case INDEX_op_andc_i64:
1583 return TCG_TARGET_HAS_andc_i64;
1584 case INDEX_op_orc_i64:
1585 return TCG_TARGET_HAS_orc_i64;
1586 case INDEX_op_eqv_i64:
1587 return TCG_TARGET_HAS_eqv_i64;
1588 case INDEX_op_nand_i64:
1589 return TCG_TARGET_HAS_nand_i64;
1590 case INDEX_op_nor_i64:
1591 return TCG_TARGET_HAS_nor_i64;
1592 case INDEX_op_clz_i64:
1593 return TCG_TARGET_HAS_clz_i64;
1594 case INDEX_op_ctz_i64:
1595 return TCG_TARGET_HAS_ctz_i64;
1596 case INDEX_op_ctpop_i64:
1597 return TCG_TARGET_HAS_ctpop_i64;
1598 case INDEX_op_add2_i64:
1599 return TCG_TARGET_HAS_add2_i64;
1600 case INDEX_op_sub2_i64:
1601 return TCG_TARGET_HAS_sub2_i64;
1602 case INDEX_op_mulu2_i64:
1603 return TCG_TARGET_HAS_mulu2_i64;
1604 case INDEX_op_muls2_i64:
1605 return TCG_TARGET_HAS_muls2_i64;
1606 case INDEX_op_muluh_i64:
1607 return TCG_TARGET_HAS_muluh_i64;
1608 case INDEX_op_mulsh_i64:
1609 return TCG_TARGET_HAS_mulsh_i64;
1611 case INDEX_op_mov_vec:
1612 case INDEX_op_dup_vec:
1613 case INDEX_op_dupi_vec:
1614 case INDEX_op_dupm_vec:
1615 case INDEX_op_ld_vec:
1616 case INDEX_op_st_vec:
1617 case INDEX_op_add_vec:
1618 case INDEX_op_sub_vec:
1619 case INDEX_op_and_vec:
1620 case INDEX_op_or_vec:
1621 case INDEX_op_xor_vec:
1622 case INDEX_op_cmp_vec:
1623 return have_vec;
1624 case INDEX_op_dup2_vec:
1625 return have_vec && TCG_TARGET_REG_BITS == 32;
1626 case INDEX_op_not_vec:
1627 return have_vec && TCG_TARGET_HAS_not_vec;
1628 case INDEX_op_neg_vec:
1629 return have_vec && TCG_TARGET_HAS_neg_vec;
1630 case INDEX_op_abs_vec:
1631 return have_vec && TCG_TARGET_HAS_abs_vec;
1632 case INDEX_op_andc_vec:
1633 return have_vec && TCG_TARGET_HAS_andc_vec;
1634 case INDEX_op_orc_vec:
1635 return have_vec && TCG_TARGET_HAS_orc_vec;
1636 case INDEX_op_mul_vec:
1637 return have_vec && TCG_TARGET_HAS_mul_vec;
1638 case INDEX_op_shli_vec:
1639 case INDEX_op_shri_vec:
1640 case INDEX_op_sari_vec:
1641 return have_vec && TCG_TARGET_HAS_shi_vec;
1642 case INDEX_op_shls_vec:
1643 case INDEX_op_shrs_vec:
1644 case INDEX_op_sars_vec:
1645 return have_vec && TCG_TARGET_HAS_shs_vec;
1646 case INDEX_op_shlv_vec:
1647 case INDEX_op_shrv_vec:
1648 case INDEX_op_sarv_vec:
1649 return have_vec && TCG_TARGET_HAS_shv_vec;
1650 case INDEX_op_ssadd_vec:
1651 case INDEX_op_usadd_vec:
1652 case INDEX_op_sssub_vec:
1653 case INDEX_op_ussub_vec:
1654 return have_vec && TCG_TARGET_HAS_sat_vec;
1655 case INDEX_op_smin_vec:
1656 case INDEX_op_umin_vec:
1657 case INDEX_op_smax_vec:
1658 case INDEX_op_umax_vec:
1659 return have_vec && TCG_TARGET_HAS_minmax_vec;
1660 case INDEX_op_bitsel_vec:
1661 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1662 case INDEX_op_cmpsel_vec:
1663 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1665 default:
1666 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1667 return true;
1671 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1672 and endian swap. Maybe it would be better to do the alignment
1673 and endian swap in tcg_reg_alloc_call(). */
1674 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1676 int i, real_args, nb_rets, pi;
1677 unsigned sizemask, flags;
1678 TCGHelperInfo *info;
1679 TCGOp *op;
1681 info = g_hash_table_lookup(helper_table, (gpointer)func);
1682 flags = info->flags;
1683 sizemask = info->sizemask;
1685 #if defined(__sparc__) && !defined(__arch64__) \
1686 && !defined(CONFIG_TCG_INTERPRETER)
1687 /* We have 64-bit values in one register, but need to pass as two
1688 separate parameters. Split them. */
1689 int orig_sizemask = sizemask;
1690 int orig_nargs = nargs;
1691 TCGv_i64 retl, reth;
1692 TCGTemp *split_args[MAX_OPC_PARAM];
1694 retl = NULL;
1695 reth = NULL;
1696 if (sizemask != 0) {
1697 for (i = real_args = 0; i < nargs; ++i) {
1698 int is_64bit = sizemask & (1 << (i+1)*2);
1699 if (is_64bit) {
1700 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1701 TCGv_i32 h = tcg_temp_new_i32();
1702 TCGv_i32 l = tcg_temp_new_i32();
1703 tcg_gen_extr_i64_i32(l, h, orig);
1704 split_args[real_args++] = tcgv_i32_temp(h);
1705 split_args[real_args++] = tcgv_i32_temp(l);
1706 } else {
1707 split_args[real_args++] = args[i];
1710 nargs = real_args;
1711 args = split_args;
1712 sizemask = 0;
1714 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1715 for (i = 0; i < nargs; ++i) {
1716 int is_64bit = sizemask & (1 << (i+1)*2);
1717 int is_signed = sizemask & (2 << (i+1)*2);
1718 if (!is_64bit) {
1719 TCGv_i64 temp = tcg_temp_new_i64();
1720 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1721 if (is_signed) {
1722 tcg_gen_ext32s_i64(temp, orig);
1723 } else {
1724 tcg_gen_ext32u_i64(temp, orig);
1726 args[i] = tcgv_i64_temp(temp);
1729 #endif /* TCG_TARGET_EXTEND_ARGS */
1731 op = tcg_emit_op(INDEX_op_call);
1733 pi = 0;
1734 if (ret != NULL) {
1735 #if defined(__sparc__) && !defined(__arch64__) \
1736 && !defined(CONFIG_TCG_INTERPRETER)
1737 if (orig_sizemask & 1) {
1738 /* The 32-bit ABI is going to return the 64-bit value in
1739 the %o0/%o1 register pair. Prepare for this by using
1740 two return temporaries, and reassemble below. */
1741 retl = tcg_temp_new_i64();
1742 reth = tcg_temp_new_i64();
1743 op->args[pi++] = tcgv_i64_arg(reth);
1744 op->args[pi++] = tcgv_i64_arg(retl);
1745 nb_rets = 2;
1746 } else {
1747 op->args[pi++] = temp_arg(ret);
1748 nb_rets = 1;
1750 #else
1751 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1752 #ifdef HOST_WORDS_BIGENDIAN
1753 op->args[pi++] = temp_arg(ret + 1);
1754 op->args[pi++] = temp_arg(ret);
1755 #else
1756 op->args[pi++] = temp_arg(ret);
1757 op->args[pi++] = temp_arg(ret + 1);
1758 #endif
1759 nb_rets = 2;
1760 } else {
1761 op->args[pi++] = temp_arg(ret);
1762 nb_rets = 1;
1764 #endif
1765 } else {
1766 nb_rets = 0;
1768 TCGOP_CALLO(op) = nb_rets;
1770 real_args = 0;
1771 for (i = 0; i < nargs; i++) {
1772 int is_64bit = sizemask & (1 << (i+1)*2);
1773 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1774 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1775 /* some targets want aligned 64 bit args */
1776 if (real_args & 1) {
1777 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1778 real_args++;
1780 #endif
1781 /* If stack grows up, then we will be placing successive
1782 arguments at lower addresses, which means we need to
1783 reverse the order compared to how we would normally
1784 treat either big or little-endian. For those arguments
1785 that will wind up in registers, this still works for
1786 HPPA (the only current STACK_GROWSUP target) since the
1787 argument registers are *also* allocated in decreasing
1788 order. If another such target is added, this logic may
1789 have to get more complicated to differentiate between
1790 stack arguments and register arguments. */
1791 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1792 op->args[pi++] = temp_arg(args[i] + 1);
1793 op->args[pi++] = temp_arg(args[i]);
1794 #else
1795 op->args[pi++] = temp_arg(args[i]);
1796 op->args[pi++] = temp_arg(args[i] + 1);
1797 #endif
1798 real_args += 2;
1799 continue;
1802 op->args[pi++] = temp_arg(args[i]);
1803 real_args++;
1805 op->args[pi++] = (uintptr_t)func;
1806 op->args[pi++] = flags;
1807 TCGOP_CALLI(op) = real_args;
1809 /* Make sure the fields didn't overflow. */
1810 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1811 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1813 #if defined(__sparc__) && !defined(__arch64__) \
1814 && !defined(CONFIG_TCG_INTERPRETER)
1815 /* Free all of the parts we allocated above. */
1816 for (i = real_args = 0; i < orig_nargs; ++i) {
1817 int is_64bit = orig_sizemask & (1 << (i+1)*2);
1818 if (is_64bit) {
1819 tcg_temp_free_internal(args[real_args++]);
1820 tcg_temp_free_internal(args[real_args++]);
1821 } else {
1822 real_args++;
1825 if (orig_sizemask & 1) {
1826 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1827 Note that describing these as TCGv_i64 eliminates an unnecessary
1828 zero-extension that tcg_gen_concat_i32_i64 would create. */
1829 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1830 tcg_temp_free_i64(retl);
1831 tcg_temp_free_i64(reth);
1833 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1834 for (i = 0; i < nargs; ++i) {
1835 int is_64bit = sizemask & (1 << (i+1)*2);
1836 if (!is_64bit) {
1837 tcg_temp_free_internal(args[i]);
1840 #endif /* TCG_TARGET_EXTEND_ARGS */
1843 static void tcg_reg_alloc_start(TCGContext *s)
1845 int i, n;
1846 TCGTemp *ts;
1848 for (i = 0, n = s->nb_globals; i < n; i++) {
1849 ts = &s->temps[i];
1850 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1852 for (n = s->nb_temps; i < n; i++) {
1853 ts = &s->temps[i];
1854 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1855 ts->mem_allocated = 0;
1856 ts->fixed_reg = 0;
1859 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1862 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1863 TCGTemp *ts)
1865 int idx = temp_idx(ts);
1867 if (ts->temp_global) {
1868 pstrcpy(buf, buf_size, ts->name);
1869 } else if (ts->temp_local) {
1870 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1871 } else {
1872 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1874 return buf;
1877 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1878 int buf_size, TCGArg arg)
1880 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1883 /* Find helper name. */
1884 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1886 const char *ret = NULL;
1887 if (helper_table) {
1888 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1889 if (info) {
1890 ret = info->name;
1893 return ret;
1896 static const char * const cond_name[] =
1898 [TCG_COND_NEVER] = "never",
1899 [TCG_COND_ALWAYS] = "always",
1900 [TCG_COND_EQ] = "eq",
1901 [TCG_COND_NE] = "ne",
1902 [TCG_COND_LT] = "lt",
1903 [TCG_COND_GE] = "ge",
1904 [TCG_COND_LE] = "le",
1905 [TCG_COND_GT] = "gt",
1906 [TCG_COND_LTU] = "ltu",
1907 [TCG_COND_GEU] = "geu",
1908 [TCG_COND_LEU] = "leu",
1909 [TCG_COND_GTU] = "gtu"
1912 static const char * const ldst_name[] =
1914 [MO_UB] = "ub",
1915 [MO_SB] = "sb",
1916 [MO_LEUW] = "leuw",
1917 [MO_LESW] = "lesw",
1918 [MO_LEUL] = "leul",
1919 [MO_LESL] = "lesl",
1920 [MO_LEQ] = "leq",
1921 [MO_BEUW] = "beuw",
1922 [MO_BESW] = "besw",
1923 [MO_BEUL] = "beul",
1924 [MO_BESL] = "besl",
1925 [MO_BEQ] = "beq",
1928 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1929 #ifdef ALIGNED_ONLY
1930 [MO_UNALN >> MO_ASHIFT] = "un+",
1931 [MO_ALIGN >> MO_ASHIFT] = "",
1932 #else
1933 [MO_UNALN >> MO_ASHIFT] = "",
1934 [MO_ALIGN >> MO_ASHIFT] = "al+",
1935 #endif
1936 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
1937 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
1938 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
1939 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1940 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1941 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1944 static inline bool tcg_regset_single(TCGRegSet d)
1946 return (d & (d - 1)) == 0;
1949 static inline TCGReg tcg_regset_first(TCGRegSet d)
1951 if (TCG_TARGET_NB_REGS <= 32) {
1952 return ctz32(d);
1953 } else {
1954 return ctz64(d);
1958 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1960 char buf[128];
1961 TCGOp *op;
1963 QTAILQ_FOREACH(op, &s->ops, link) {
1964 int i, k, nb_oargs, nb_iargs, nb_cargs;
1965 const TCGOpDef *def;
1966 TCGOpcode c;
1967 int col = 0;
1969 c = op->opc;
1970 def = &tcg_op_defs[c];
1972 if (c == INDEX_op_insn_start) {
1973 nb_oargs = 0;
1974 col += qemu_log("\n ----");
1976 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1977 target_ulong a;
1978 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1979 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1980 #else
1981 a = op->args[i];
1982 #endif
1983 col += qemu_log(" " TARGET_FMT_lx, a);
1985 } else if (c == INDEX_op_call) {
1986 /* variable number of arguments */
1987 nb_oargs = TCGOP_CALLO(op);
1988 nb_iargs = TCGOP_CALLI(op);
1989 nb_cargs = def->nb_cargs;
1991 /* function name, flags, out args */
1992 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1993 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1994 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1995 for (i = 0; i < nb_oargs; i++) {
1996 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1997 op->args[i]));
1999 for (i = 0; i < nb_iargs; i++) {
2000 TCGArg arg = op->args[nb_oargs + i];
2001 const char *t = "<dummy>";
2002 if (arg != TCG_CALL_DUMMY_ARG) {
2003 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2005 col += qemu_log(",%s", t);
2007 } else {
2008 col += qemu_log(" %s ", def->name);
2010 nb_oargs = def->nb_oargs;
2011 nb_iargs = def->nb_iargs;
2012 nb_cargs = def->nb_cargs;
2014 if (def->flags & TCG_OPF_VECTOR) {
2015 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2016 8 << TCGOP_VECE(op));
2019 k = 0;
2020 for (i = 0; i < nb_oargs; i++) {
2021 if (k != 0) {
2022 col += qemu_log(",");
2024 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2025 op->args[k++]));
2027 for (i = 0; i < nb_iargs; i++) {
2028 if (k != 0) {
2029 col += qemu_log(",");
2031 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2032 op->args[k++]));
2034 switch (c) {
2035 case INDEX_op_brcond_i32:
2036 case INDEX_op_setcond_i32:
2037 case INDEX_op_movcond_i32:
2038 case INDEX_op_brcond2_i32:
2039 case INDEX_op_setcond2_i32:
2040 case INDEX_op_brcond_i64:
2041 case INDEX_op_setcond_i64:
2042 case INDEX_op_movcond_i64:
2043 case INDEX_op_cmp_vec:
2044 case INDEX_op_cmpsel_vec:
2045 if (op->args[k] < ARRAY_SIZE(cond_name)
2046 && cond_name[op->args[k]]) {
2047 col += qemu_log(",%s", cond_name[op->args[k++]]);
2048 } else {
2049 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2051 i = 1;
2052 break;
2053 case INDEX_op_qemu_ld_i32:
2054 case INDEX_op_qemu_st_i32:
2055 case INDEX_op_qemu_ld_i64:
2056 case INDEX_op_qemu_st_i64:
2058 TCGMemOpIdx oi = op->args[k++];
2059 TCGMemOp op = get_memop(oi);
2060 unsigned ix = get_mmuidx(oi);
2062 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2063 col += qemu_log(",$0x%x,%u", op, ix);
2064 } else {
2065 const char *s_al, *s_op;
2066 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2067 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2068 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2070 i = 1;
2072 break;
2073 default:
2074 i = 0;
2075 break;
2077 switch (c) {
2078 case INDEX_op_set_label:
2079 case INDEX_op_br:
2080 case INDEX_op_brcond_i32:
2081 case INDEX_op_brcond_i64:
2082 case INDEX_op_brcond2_i32:
2083 col += qemu_log("%s$L%d", k ? "," : "",
2084 arg_label(op->args[k])->id);
2085 i++, k++;
2086 break;
2087 default:
2088 break;
2090 for (; i < nb_cargs; i++, k++) {
2091 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2095 if (have_prefs || op->life) {
2096 for (; col < 40; ++col) {
2097 putc(' ', qemu_logfile);
2101 if (op->life) {
2102 unsigned life = op->life;
2104 if (life & (SYNC_ARG * 3)) {
2105 qemu_log(" sync:");
2106 for (i = 0; i < 2; ++i) {
2107 if (life & (SYNC_ARG << i)) {
2108 qemu_log(" %d", i);
2112 life /= DEAD_ARG;
2113 if (life) {
2114 qemu_log(" dead:");
2115 for (i = 0; life; ++i, life >>= 1) {
2116 if (life & 1) {
2117 qemu_log(" %d", i);
2123 if (have_prefs) {
2124 for (i = 0; i < nb_oargs; ++i) {
2125 TCGRegSet set = op->output_pref[i];
2127 if (i == 0) {
2128 qemu_log(" pref=");
2129 } else {
2130 qemu_log(",");
2132 if (set == 0) {
2133 qemu_log("none");
2134 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2135 qemu_log("all");
2136 #ifdef CONFIG_DEBUG_TCG
2137 } else if (tcg_regset_single(set)) {
2138 TCGReg reg = tcg_regset_first(set);
2139 qemu_log("%s", tcg_target_reg_names[reg]);
2140 #endif
2141 } else if (TCG_TARGET_NB_REGS <= 32) {
2142 qemu_log("%#x", (uint32_t)set);
2143 } else {
2144 qemu_log("%#" PRIx64, (uint64_t)set);
2149 qemu_log("\n");
2153 /* we give more priority to constraints with less registers */
2154 static int get_constraint_priority(const TCGOpDef *def, int k)
2156 const TCGArgConstraint *arg_ct;
2158 int i, n;
2159 arg_ct = &def->args_ct[k];
2160 if (arg_ct->ct & TCG_CT_ALIAS) {
2161 /* an alias is equivalent to a single register */
2162 n = 1;
2163 } else {
2164 if (!(arg_ct->ct & TCG_CT_REG))
2165 return 0;
2166 n = 0;
2167 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2168 if (tcg_regset_test_reg(arg_ct->u.regs, i))
2169 n++;
2172 return TCG_TARGET_NB_REGS - n + 1;
2175 /* sort from highest priority to lowest */
2176 static void sort_constraints(TCGOpDef *def, int start, int n)
2178 int i, j, p1, p2, tmp;
2180 for(i = 0; i < n; i++)
2181 def->sorted_args[start + i] = start + i;
2182 if (n <= 1)
2183 return;
2184 for(i = 0; i < n - 1; i++) {
2185 for(j = i + 1; j < n; j++) {
2186 p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2187 p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2188 if (p1 < p2) {
2189 tmp = def->sorted_args[start + i];
2190 def->sorted_args[start + i] = def->sorted_args[start + j];
2191 def->sorted_args[start + j] = tmp;
2197 static void process_op_defs(TCGContext *s)
2199 TCGOpcode op;
2201 for (op = 0; op < NB_OPS; op++) {
2202 TCGOpDef *def = &tcg_op_defs[op];
2203 const TCGTargetOpDef *tdefs;
2204 TCGType type;
2205 int i, nb_args;
2207 if (def->flags & TCG_OPF_NOT_PRESENT) {
2208 continue;
2211 nb_args = def->nb_iargs + def->nb_oargs;
2212 if (nb_args == 0) {
2213 continue;
2216 tdefs = tcg_target_op_def(op);
2217 /* Missing TCGTargetOpDef entry. */
2218 tcg_debug_assert(tdefs != NULL);
2220 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2221 for (i = 0; i < nb_args; i++) {
2222 const char *ct_str = tdefs->args_ct_str[i];
2223 /* Incomplete TCGTargetOpDef entry. */
2224 tcg_debug_assert(ct_str != NULL);
2226 def->args_ct[i].u.regs = 0;
2227 def->args_ct[i].ct = 0;
2228 while (*ct_str != '\0') {
2229 switch(*ct_str) {
2230 case '0' ... '9':
2232 int oarg = *ct_str - '0';
2233 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2234 tcg_debug_assert(oarg < def->nb_oargs);
2235 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2236 /* TCG_CT_ALIAS is for the output arguments.
2237 The input is tagged with TCG_CT_IALIAS. */
2238 def->args_ct[i] = def->args_ct[oarg];
2239 def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2240 def->args_ct[oarg].alias_index = i;
2241 def->args_ct[i].ct |= TCG_CT_IALIAS;
2242 def->args_ct[i].alias_index = oarg;
2244 ct_str++;
2245 break;
2246 case '&':
2247 def->args_ct[i].ct |= TCG_CT_NEWREG;
2248 ct_str++;
2249 break;
2250 case 'i':
2251 def->args_ct[i].ct |= TCG_CT_CONST;
2252 ct_str++;
2253 break;
2254 default:
2255 ct_str = target_parse_constraint(&def->args_ct[i],
2256 ct_str, type);
2257 /* Typo in TCGTargetOpDef constraint. */
2258 tcg_debug_assert(ct_str != NULL);
2263 /* TCGTargetOpDef entry with too much information? */
2264 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2266 /* sort the constraints (XXX: this is just an heuristic) */
2267 sort_constraints(def, 0, def->nb_oargs);
2268 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2272 void tcg_op_remove(TCGContext *s, TCGOp *op)
2274 TCGLabel *label;
2276 switch (op->opc) {
2277 case INDEX_op_br:
2278 label = arg_label(op->args[0]);
2279 label->refs--;
2280 break;
2281 case INDEX_op_brcond_i32:
2282 case INDEX_op_brcond_i64:
2283 label = arg_label(op->args[3]);
2284 label->refs--;
2285 break;
2286 case INDEX_op_brcond2_i32:
2287 label = arg_label(op->args[5]);
2288 label->refs--;
2289 break;
2290 default:
2291 break;
2294 QTAILQ_REMOVE(&s->ops, op, link);
2295 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2296 s->nb_ops--;
2298 #ifdef CONFIG_PROFILER
2299 atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2300 #endif
2303 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2305 TCGContext *s = tcg_ctx;
2306 TCGOp *op;
2308 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2309 op = tcg_malloc(sizeof(TCGOp));
2310 } else {
2311 op = QTAILQ_FIRST(&s->free_ops);
2312 QTAILQ_REMOVE(&s->free_ops, op, link);
2314 memset(op, 0, offsetof(TCGOp, link));
2315 op->opc = opc;
2316 s->nb_ops++;
2318 return op;
2321 TCGOp *tcg_emit_op(TCGOpcode opc)
2323 TCGOp *op = tcg_op_alloc(opc);
2324 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2325 return op;
2328 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2330 TCGOp *new_op = tcg_op_alloc(opc);
2331 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2332 return new_op;
2335 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2337 TCGOp *new_op = tcg_op_alloc(opc);
2338 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2339 return new_op;
2342 /* Reachable analysis : remove unreachable code. */
2343 static void reachable_code_pass(TCGContext *s)
2345 TCGOp *op, *op_next;
2346 bool dead = false;
2348 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2349 bool remove = dead;
2350 TCGLabel *label;
2351 int call_flags;
2353 switch (op->opc) {
2354 case INDEX_op_set_label:
2355 label = arg_label(op->args[0]);
2356 if (label->refs == 0) {
2358 * While there is an occasional backward branch, virtually
2359 * all branches generated by the translators are forward.
2360 * Which means that generally we will have already removed
2361 * all references to the label that will be, and there is
2362 * little to be gained by iterating.
2364 remove = true;
2365 } else {
2366 /* Once we see a label, insns become live again. */
2367 dead = false;
2368 remove = false;
2371 * Optimization can fold conditional branches to unconditional.
2372 * If we find a label with one reference which is preceded by
2373 * an unconditional branch to it, remove both. This needed to
2374 * wait until the dead code in between them was removed.
2376 if (label->refs == 1) {
2377 TCGOp *op_prev = QTAILQ_PREV(op, link);
2378 if (op_prev->opc == INDEX_op_br &&
2379 label == arg_label(op_prev->args[0])) {
2380 tcg_op_remove(s, op_prev);
2381 remove = true;
2385 break;
2387 case INDEX_op_br:
2388 case INDEX_op_exit_tb:
2389 case INDEX_op_goto_ptr:
2390 /* Unconditional branches; everything following is dead. */
2391 dead = true;
2392 break;
2394 case INDEX_op_call:
2395 /* Notice noreturn helper calls, raising exceptions. */
2396 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2397 if (call_flags & TCG_CALL_NO_RETURN) {
2398 dead = true;
2400 break;
2402 case INDEX_op_insn_start:
2403 /* Never remove -- we need to keep these for unwind. */
2404 remove = false;
2405 break;
2407 default:
2408 break;
2411 if (remove) {
2412 tcg_op_remove(s, op);
2417 #define TS_DEAD 1
2418 #define TS_MEM 2
2420 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2421 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2423 /* For liveness_pass_1, the register preferences for a given temp. */
2424 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2426 return ts->state_ptr;
2429 /* For liveness_pass_1, reset the preferences for a given temp to the
2430 * maximal regset for its type.
2432 static inline void la_reset_pref(TCGTemp *ts)
2434 *la_temp_pref(ts)
2435 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2438 /* liveness analysis: end of function: all temps are dead, and globals
2439 should be in memory. */
2440 static void la_func_end(TCGContext *s, int ng, int nt)
2442 int i;
2444 for (i = 0; i < ng; ++i) {
2445 s->temps[i].state = TS_DEAD | TS_MEM;
2446 la_reset_pref(&s->temps[i]);
2448 for (i = ng; i < nt; ++i) {
2449 s->temps[i].state = TS_DEAD;
2450 la_reset_pref(&s->temps[i]);
2454 /* liveness analysis: end of basic block: all temps are dead, globals
2455 and local temps should be in memory. */
2456 static void la_bb_end(TCGContext *s, int ng, int nt)
2458 int i;
2460 for (i = 0; i < ng; ++i) {
2461 s->temps[i].state = TS_DEAD | TS_MEM;
2462 la_reset_pref(&s->temps[i]);
2464 for (i = ng; i < nt; ++i) {
2465 s->temps[i].state = (s->temps[i].temp_local
2466 ? TS_DEAD | TS_MEM
2467 : TS_DEAD);
2468 la_reset_pref(&s->temps[i]);
2472 /* liveness analysis: sync globals back to memory. */
2473 static void la_global_sync(TCGContext *s, int ng)
2475 int i;
2477 for (i = 0; i < ng; ++i) {
2478 int state = s->temps[i].state;
2479 s->temps[i].state = state | TS_MEM;
2480 if (state == TS_DEAD) {
2481 /* If the global was previously dead, reset prefs. */
2482 la_reset_pref(&s->temps[i]);
2487 /* liveness analysis: sync globals back to memory and kill. */
2488 static void la_global_kill(TCGContext *s, int ng)
2490 int i;
2492 for (i = 0; i < ng; i++) {
2493 s->temps[i].state = TS_DEAD | TS_MEM;
2494 la_reset_pref(&s->temps[i]);
2498 /* liveness analysis: note live globals crossing calls. */
2499 static void la_cross_call(TCGContext *s, int nt)
2501 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2502 int i;
2504 for (i = 0; i < nt; i++) {
2505 TCGTemp *ts = &s->temps[i];
2506 if (!(ts->state & TS_DEAD)) {
2507 TCGRegSet *pset = la_temp_pref(ts);
2508 TCGRegSet set = *pset;
2510 set &= mask;
2511 /* If the combination is not possible, restart. */
2512 if (set == 0) {
2513 set = tcg_target_available_regs[ts->type] & mask;
2515 *pset = set;
2520 /* Liveness analysis : update the opc_arg_life array to tell if a
2521 given input arguments is dead. Instructions updating dead
2522 temporaries are removed. */
2523 static void liveness_pass_1(TCGContext *s)
2525 int nb_globals = s->nb_globals;
2526 int nb_temps = s->nb_temps;
2527 TCGOp *op, *op_prev;
2528 TCGRegSet *prefs;
2529 int i;
2531 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2532 for (i = 0; i < nb_temps; ++i) {
2533 s->temps[i].state_ptr = prefs + i;
2536 /* ??? Should be redundant with the exit_tb that ends the TB. */
2537 la_func_end(s, nb_globals, nb_temps);
2539 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2540 int nb_iargs, nb_oargs;
2541 TCGOpcode opc_new, opc_new2;
2542 bool have_opc_new2;
2543 TCGLifeData arg_life = 0;
2544 TCGTemp *ts;
2545 TCGOpcode opc = op->opc;
2546 const TCGOpDef *def = &tcg_op_defs[opc];
2548 switch (opc) {
2549 case INDEX_op_call:
2551 int call_flags;
2552 int nb_call_regs;
2554 nb_oargs = TCGOP_CALLO(op);
2555 nb_iargs = TCGOP_CALLI(op);
2556 call_flags = op->args[nb_oargs + nb_iargs + 1];
2558 /* pure functions can be removed if their result is unused */
2559 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2560 for (i = 0; i < nb_oargs; i++) {
2561 ts = arg_temp(op->args[i]);
2562 if (ts->state != TS_DEAD) {
2563 goto do_not_remove_call;
2566 goto do_remove;
2568 do_not_remove_call:
2570 /* Output args are dead. */
2571 for (i = 0; i < nb_oargs; i++) {
2572 ts = arg_temp(op->args[i]);
2573 if (ts->state & TS_DEAD) {
2574 arg_life |= DEAD_ARG << i;
2576 if (ts->state & TS_MEM) {
2577 arg_life |= SYNC_ARG << i;
2579 ts->state = TS_DEAD;
2580 la_reset_pref(ts);
2582 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2583 op->output_pref[i] = 0;
2586 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2587 TCG_CALL_NO_READ_GLOBALS))) {
2588 la_global_kill(s, nb_globals);
2589 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2590 la_global_sync(s, nb_globals);
2593 /* Record arguments that die in this helper. */
2594 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2595 ts = arg_temp(op->args[i]);
2596 if (ts && ts->state & TS_DEAD) {
2597 arg_life |= DEAD_ARG << i;
2601 /* For all live registers, remove call-clobbered prefs. */
2602 la_cross_call(s, nb_temps);
2604 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2606 /* Input arguments are live for preceding opcodes. */
2607 for (i = 0; i < nb_iargs; i++) {
2608 ts = arg_temp(op->args[i + nb_oargs]);
2609 if (ts && ts->state & TS_DEAD) {
2610 /* For those arguments that die, and will be allocated
2611 * in registers, clear the register set for that arg,
2612 * to be filled in below. For args that will be on
2613 * the stack, reset to any available reg.
2615 *la_temp_pref(ts)
2616 = (i < nb_call_regs ? 0 :
2617 tcg_target_available_regs[ts->type]);
2618 ts->state &= ~TS_DEAD;
2622 /* For each input argument, add its input register to prefs.
2623 If a temp is used once, this produces a single set bit. */
2624 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2625 ts = arg_temp(op->args[i + nb_oargs]);
2626 if (ts) {
2627 tcg_regset_set_reg(*la_temp_pref(ts),
2628 tcg_target_call_iarg_regs[i]);
2632 break;
2633 case INDEX_op_insn_start:
2634 break;
2635 case INDEX_op_discard:
2636 /* mark the temporary as dead */
2637 ts = arg_temp(op->args[0]);
2638 ts->state = TS_DEAD;
2639 la_reset_pref(ts);
2640 break;
2642 case INDEX_op_add2_i32:
2643 opc_new = INDEX_op_add_i32;
2644 goto do_addsub2;
2645 case INDEX_op_sub2_i32:
2646 opc_new = INDEX_op_sub_i32;
2647 goto do_addsub2;
2648 case INDEX_op_add2_i64:
2649 opc_new = INDEX_op_add_i64;
2650 goto do_addsub2;
2651 case INDEX_op_sub2_i64:
2652 opc_new = INDEX_op_sub_i64;
2653 do_addsub2:
2654 nb_iargs = 4;
2655 nb_oargs = 2;
2656 /* Test if the high part of the operation is dead, but not
2657 the low part. The result can be optimized to a simple
2658 add or sub. This happens often for x86_64 guest when the
2659 cpu mode is set to 32 bit. */
2660 if (arg_temp(op->args[1])->state == TS_DEAD) {
2661 if (arg_temp(op->args[0])->state == TS_DEAD) {
2662 goto do_remove;
2664 /* Replace the opcode and adjust the args in place,
2665 leaving 3 unused args at the end. */
2666 op->opc = opc = opc_new;
2667 op->args[1] = op->args[2];
2668 op->args[2] = op->args[4];
2669 /* Fall through and mark the single-word operation live. */
2670 nb_iargs = 2;
2671 nb_oargs = 1;
2673 goto do_not_remove;
2675 case INDEX_op_mulu2_i32:
2676 opc_new = INDEX_op_mul_i32;
2677 opc_new2 = INDEX_op_muluh_i32;
2678 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2679 goto do_mul2;
2680 case INDEX_op_muls2_i32:
2681 opc_new = INDEX_op_mul_i32;
2682 opc_new2 = INDEX_op_mulsh_i32;
2683 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2684 goto do_mul2;
2685 case INDEX_op_mulu2_i64:
2686 opc_new = INDEX_op_mul_i64;
2687 opc_new2 = INDEX_op_muluh_i64;
2688 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2689 goto do_mul2;
2690 case INDEX_op_muls2_i64:
2691 opc_new = INDEX_op_mul_i64;
2692 opc_new2 = INDEX_op_mulsh_i64;
2693 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2694 goto do_mul2;
2695 do_mul2:
2696 nb_iargs = 2;
2697 nb_oargs = 2;
2698 if (arg_temp(op->args[1])->state == TS_DEAD) {
2699 if (arg_temp(op->args[0])->state == TS_DEAD) {
2700 /* Both parts of the operation are dead. */
2701 goto do_remove;
2703 /* The high part of the operation is dead; generate the low. */
2704 op->opc = opc = opc_new;
2705 op->args[1] = op->args[2];
2706 op->args[2] = op->args[3];
2707 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2708 /* The low part of the operation is dead; generate the high. */
2709 op->opc = opc = opc_new2;
2710 op->args[0] = op->args[1];
2711 op->args[1] = op->args[2];
2712 op->args[2] = op->args[3];
2713 } else {
2714 goto do_not_remove;
2716 /* Mark the single-word operation live. */
2717 nb_oargs = 1;
2718 goto do_not_remove;
2720 default:
2721 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2722 nb_iargs = def->nb_iargs;
2723 nb_oargs = def->nb_oargs;
2725 /* Test if the operation can be removed because all
2726 its outputs are dead. We assume that nb_oargs == 0
2727 implies side effects */
2728 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2729 for (i = 0; i < nb_oargs; i++) {
2730 if (arg_temp(op->args[i])->state != TS_DEAD) {
2731 goto do_not_remove;
2734 goto do_remove;
2736 goto do_not_remove;
2738 do_remove:
2739 tcg_op_remove(s, op);
2740 break;
2742 do_not_remove:
2743 for (i = 0; i < nb_oargs; i++) {
2744 ts = arg_temp(op->args[i]);
2746 /* Remember the preference of the uses that followed. */
2747 op->output_pref[i] = *la_temp_pref(ts);
2749 /* Output args are dead. */
2750 if (ts->state & TS_DEAD) {
2751 arg_life |= DEAD_ARG << i;
2753 if (ts->state & TS_MEM) {
2754 arg_life |= SYNC_ARG << i;
2756 ts->state = TS_DEAD;
2757 la_reset_pref(ts);
2760 /* If end of basic block, update. */
2761 if (def->flags & TCG_OPF_BB_EXIT) {
2762 la_func_end(s, nb_globals, nb_temps);
2763 } else if (def->flags & TCG_OPF_BB_END) {
2764 la_bb_end(s, nb_globals, nb_temps);
2765 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2766 la_global_sync(s, nb_globals);
2767 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2768 la_cross_call(s, nb_temps);
2772 /* Record arguments that die in this opcode. */
2773 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2774 ts = arg_temp(op->args[i]);
2775 if (ts->state & TS_DEAD) {
2776 arg_life |= DEAD_ARG << i;
2780 /* Input arguments are live for preceding opcodes. */
2781 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2782 ts = arg_temp(op->args[i]);
2783 if (ts->state & TS_DEAD) {
2784 /* For operands that were dead, initially allow
2785 all regs for the type. */
2786 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2787 ts->state &= ~TS_DEAD;
2791 /* Incorporate constraints for this operand. */
2792 switch (opc) {
2793 case INDEX_op_mov_i32:
2794 case INDEX_op_mov_i64:
2795 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2796 have proper constraints. That said, special case
2797 moves to propagate preferences backward. */
2798 if (IS_DEAD_ARG(1)) {
2799 *la_temp_pref(arg_temp(op->args[0]))
2800 = *la_temp_pref(arg_temp(op->args[1]));
2802 break;
2804 default:
2805 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2806 const TCGArgConstraint *ct = &def->args_ct[i];
2807 TCGRegSet set, *pset;
2809 ts = arg_temp(op->args[i]);
2810 pset = la_temp_pref(ts);
2811 set = *pset;
2813 set &= ct->u.regs;
2814 if (ct->ct & TCG_CT_IALIAS) {
2815 set &= op->output_pref[ct->alias_index];
2817 /* If the combination is not possible, restart. */
2818 if (set == 0) {
2819 set = ct->u.regs;
2821 *pset = set;
2823 break;
2825 break;
2827 op->life = arg_life;
2831 /* Liveness analysis: Convert indirect regs to direct temporaries. */
2832 static bool liveness_pass_2(TCGContext *s)
2834 int nb_globals = s->nb_globals;
2835 int nb_temps, i;
2836 bool changes = false;
2837 TCGOp *op, *op_next;
2839 /* Create a temporary for each indirect global. */
2840 for (i = 0; i < nb_globals; ++i) {
2841 TCGTemp *its = &s->temps[i];
2842 if (its->indirect_reg) {
2843 TCGTemp *dts = tcg_temp_alloc(s);
2844 dts->type = its->type;
2845 dts->base_type = its->base_type;
2846 its->state_ptr = dts;
2847 } else {
2848 its->state_ptr = NULL;
2850 /* All globals begin dead. */
2851 its->state = TS_DEAD;
2853 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2854 TCGTemp *its = &s->temps[i];
2855 its->state_ptr = NULL;
2856 its->state = TS_DEAD;
2859 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2860 TCGOpcode opc = op->opc;
2861 const TCGOpDef *def = &tcg_op_defs[opc];
2862 TCGLifeData arg_life = op->life;
2863 int nb_iargs, nb_oargs, call_flags;
2864 TCGTemp *arg_ts, *dir_ts;
2866 if (opc == INDEX_op_call) {
2867 nb_oargs = TCGOP_CALLO(op);
2868 nb_iargs = TCGOP_CALLI(op);
2869 call_flags = op->args[nb_oargs + nb_iargs + 1];
2870 } else {
2871 nb_iargs = def->nb_iargs;
2872 nb_oargs = def->nb_oargs;
2874 /* Set flags similar to how calls require. */
2875 if (def->flags & TCG_OPF_BB_END) {
2876 /* Like writing globals: save_globals */
2877 call_flags = 0;
2878 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2879 /* Like reading globals: sync_globals */
2880 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2881 } else {
2882 /* No effect on globals. */
2883 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2884 TCG_CALL_NO_WRITE_GLOBALS);
2888 /* Make sure that input arguments are available. */
2889 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2890 arg_ts = arg_temp(op->args[i]);
2891 if (arg_ts) {
2892 dir_ts = arg_ts->state_ptr;
2893 if (dir_ts && arg_ts->state == TS_DEAD) {
2894 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2895 ? INDEX_op_ld_i32
2896 : INDEX_op_ld_i64);
2897 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2899 lop->args[0] = temp_arg(dir_ts);
2900 lop->args[1] = temp_arg(arg_ts->mem_base);
2901 lop->args[2] = arg_ts->mem_offset;
2903 /* Loaded, but synced with memory. */
2904 arg_ts->state = TS_MEM;
2909 /* Perform input replacement, and mark inputs that became dead.
2910 No action is required except keeping temp_state up to date
2911 so that we reload when needed. */
2912 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2913 arg_ts = arg_temp(op->args[i]);
2914 if (arg_ts) {
2915 dir_ts = arg_ts->state_ptr;
2916 if (dir_ts) {
2917 op->args[i] = temp_arg(dir_ts);
2918 changes = true;
2919 if (IS_DEAD_ARG(i)) {
2920 arg_ts->state = TS_DEAD;
2926 /* Liveness analysis should ensure that the following are
2927 all correct, for call sites and basic block end points. */
2928 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2929 /* Nothing to do */
2930 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2931 for (i = 0; i < nb_globals; ++i) {
2932 /* Liveness should see that globals are synced back,
2933 that is, either TS_DEAD or TS_MEM. */
2934 arg_ts = &s->temps[i];
2935 tcg_debug_assert(arg_ts->state_ptr == 0
2936 || arg_ts->state != 0);
2938 } else {
2939 for (i = 0; i < nb_globals; ++i) {
2940 /* Liveness should see that globals are saved back,
2941 that is, TS_DEAD, waiting to be reloaded. */
2942 arg_ts = &s->temps[i];
2943 tcg_debug_assert(arg_ts->state_ptr == 0
2944 || arg_ts->state == TS_DEAD);
2948 /* Outputs become available. */
2949 for (i = 0; i < nb_oargs; i++) {
2950 arg_ts = arg_temp(op->args[i]);
2951 dir_ts = arg_ts->state_ptr;
2952 if (!dir_ts) {
2953 continue;
2955 op->args[i] = temp_arg(dir_ts);
2956 changes = true;
2958 /* The output is now live and modified. */
2959 arg_ts->state = 0;
2961 /* Sync outputs upon their last write. */
2962 if (NEED_SYNC_ARG(i)) {
2963 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2964 ? INDEX_op_st_i32
2965 : INDEX_op_st_i64);
2966 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2968 sop->args[0] = temp_arg(dir_ts);
2969 sop->args[1] = temp_arg(arg_ts->mem_base);
2970 sop->args[2] = arg_ts->mem_offset;
2972 arg_ts->state = TS_MEM;
2974 /* Drop outputs that are dead. */
2975 if (IS_DEAD_ARG(i)) {
2976 arg_ts->state = TS_DEAD;
2981 return changes;
2984 #ifdef CONFIG_DEBUG_TCG
2985 static void dump_regs(TCGContext *s)
2987 TCGTemp *ts;
2988 int i;
2989 char buf[64];
2991 for(i = 0; i < s->nb_temps; i++) {
2992 ts = &s->temps[i];
2993 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2994 switch(ts->val_type) {
2995 case TEMP_VAL_REG:
2996 printf("%s", tcg_target_reg_names[ts->reg]);
2997 break;
2998 case TEMP_VAL_MEM:
2999 printf("%d(%s)", (int)ts->mem_offset,
3000 tcg_target_reg_names[ts->mem_base->reg]);
3001 break;
3002 case TEMP_VAL_CONST:
3003 printf("$0x%" TCG_PRIlx, ts->val);
3004 break;
3005 case TEMP_VAL_DEAD:
3006 printf("D");
3007 break;
3008 default:
3009 printf("???");
3010 break;
3012 printf("\n");
3015 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3016 if (s->reg_to_temp[i] != NULL) {
3017 printf("%s: %s\n",
3018 tcg_target_reg_names[i],
3019 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3024 static void check_regs(TCGContext *s)
3026 int reg;
3027 int k;
3028 TCGTemp *ts;
3029 char buf[64];
3031 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3032 ts = s->reg_to_temp[reg];
3033 if (ts != NULL) {
3034 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3035 printf("Inconsistency for register %s:\n",
3036 tcg_target_reg_names[reg]);
3037 goto fail;
3041 for (k = 0; k < s->nb_temps; k++) {
3042 ts = &s->temps[k];
3043 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3044 && s->reg_to_temp[ts->reg] != ts) {
3045 printf("Inconsistency for temp %s:\n",
3046 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3047 fail:
3048 printf("reg state:\n");
3049 dump_regs(s);
3050 tcg_abort();
3054 #endif
3056 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3058 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3059 /* Sparc64 stack is accessed with offset of 2047 */
3060 s->current_frame_offset = (s->current_frame_offset +
3061 (tcg_target_long)sizeof(tcg_target_long) - 1) &
3062 ~(sizeof(tcg_target_long) - 1);
3063 #endif
3064 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3065 s->frame_end) {
3066 tcg_abort();
3068 ts->mem_offset = s->current_frame_offset;
3069 ts->mem_base = s->frame_temp;
3070 ts->mem_allocated = 1;
3071 s->current_frame_offset += sizeof(tcg_target_long);
3074 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3076 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3077 mark it free; otherwise mark it dead. */
3078 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3080 if (ts->fixed_reg) {
3081 return;
3083 if (ts->val_type == TEMP_VAL_REG) {
3084 s->reg_to_temp[ts->reg] = NULL;
3086 ts->val_type = (free_or_dead < 0
3087 || ts->temp_local
3088 || ts->temp_global
3089 ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3092 /* Mark a temporary as dead. */
3093 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3095 temp_free_or_dead(s, ts, 1);
3098 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3099 registers needs to be allocated to store a constant. If 'free_or_dead'
3100 is non-zero, subsequently release the temporary; if it is positive, the
3101 temp is dead; if it is negative, the temp is free. */
3102 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3103 TCGRegSet preferred_regs, int free_or_dead)
3105 if (ts->fixed_reg) {
3106 return;
3108 if (!ts->mem_coherent) {
3109 if (!ts->mem_allocated) {
3110 temp_allocate_frame(s, ts);
3112 switch (ts->val_type) {
3113 case TEMP_VAL_CONST:
3114 /* If we're going to free the temp immediately, then we won't
3115 require it later in a register, so attempt to store the
3116 constant to memory directly. */
3117 if (free_or_dead
3118 && tcg_out_sti(s, ts->type, ts->val,
3119 ts->mem_base->reg, ts->mem_offset)) {
3120 break;
3122 temp_load(s, ts, tcg_target_available_regs[ts->type],
3123 allocated_regs, preferred_regs);
3124 /* fallthrough */
3126 case TEMP_VAL_REG:
3127 tcg_out_st(s, ts->type, ts->reg,
3128 ts->mem_base->reg, ts->mem_offset);
3129 break;
3131 case TEMP_VAL_MEM:
3132 break;
3134 case TEMP_VAL_DEAD:
3135 default:
3136 tcg_abort();
3138 ts->mem_coherent = 1;
3140 if (free_or_dead) {
3141 temp_free_or_dead(s, ts, free_or_dead);
3145 /* free register 'reg' by spilling the corresponding temporary if necessary */
3146 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3148 TCGTemp *ts = s->reg_to_temp[reg];
3149 if (ts != NULL) {
3150 temp_sync(s, ts, allocated_regs, 0, -1);
3155 * tcg_reg_alloc:
3156 * @required_regs: Set of registers in which we must allocate.
3157 * @allocated_regs: Set of registers which must be avoided.
3158 * @preferred_regs: Set of registers we should prefer.
3159 * @rev: True if we search the registers in "indirect" order.
3161 * The allocated register must be in @required_regs & ~@allocated_regs,
3162 * but if we can put it in @preferred_regs we may save a move later.
3164 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3165 TCGRegSet allocated_regs,
3166 TCGRegSet preferred_regs, bool rev)
3168 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3169 TCGRegSet reg_ct[2];
3170 const int *order;
3172 reg_ct[1] = required_regs & ~allocated_regs;
3173 tcg_debug_assert(reg_ct[1] != 0);
3174 reg_ct[0] = reg_ct[1] & preferred_regs;
3176 /* Skip the preferred_regs option if it cannot be satisfied,
3177 or if the preference made no difference. */
3178 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3180 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3182 /* Try free registers, preferences first. */
3183 for (j = f; j < 2; j++) {
3184 TCGRegSet set = reg_ct[j];
3186 if (tcg_regset_single(set)) {
3187 /* One register in the set. */
3188 TCGReg reg = tcg_regset_first(set);
3189 if (s->reg_to_temp[reg] == NULL) {
3190 return reg;
3192 } else {
3193 for (i = 0; i < n; i++) {
3194 TCGReg reg = order[i];
3195 if (s->reg_to_temp[reg] == NULL &&
3196 tcg_regset_test_reg(set, reg)) {
3197 return reg;
3203 /* We must spill something. */
3204 for (j = f; j < 2; j++) {
3205 TCGRegSet set = reg_ct[j];
3207 if (tcg_regset_single(set)) {
3208 /* One register in the set. */
3209 TCGReg reg = tcg_regset_first(set);
3210 tcg_reg_free(s, reg, allocated_regs);
3211 return reg;
3212 } else {
3213 for (i = 0; i < n; i++) {
3214 TCGReg reg = order[i];
3215 if (tcg_regset_test_reg(set, reg)) {
3216 tcg_reg_free(s, reg, allocated_regs);
3217 return reg;
3223 tcg_abort();
3226 /* Make sure the temporary is in a register. If needed, allocate the register
3227 from DESIRED while avoiding ALLOCATED. */
3228 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3229 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3231 TCGReg reg;
3233 switch (ts->val_type) {
3234 case TEMP_VAL_REG:
3235 return;
3236 case TEMP_VAL_CONST:
3237 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3238 preferred_regs, ts->indirect_base);
3239 tcg_out_movi(s, ts->type, reg, ts->val);
3240 ts->mem_coherent = 0;
3241 break;
3242 case TEMP_VAL_MEM:
3243 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3244 preferred_regs, ts->indirect_base);
3245 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3246 ts->mem_coherent = 1;
3247 break;
3248 case TEMP_VAL_DEAD:
3249 default:
3250 tcg_abort();
3252 ts->reg = reg;
3253 ts->val_type = TEMP_VAL_REG;
3254 s->reg_to_temp[reg] = ts;
3257 /* Save a temporary to memory. 'allocated_regs' is used in case a
3258 temporary registers needs to be allocated to store a constant. */
3259 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3261 /* The liveness analysis already ensures that globals are back
3262 in memory. Keep an tcg_debug_assert for safety. */
3263 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3266 /* save globals to their canonical location and assume they can be
3267 modified be the following code. 'allocated_regs' is used in case a
3268 temporary registers needs to be allocated to store a constant. */
3269 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3271 int i, n;
3273 for (i = 0, n = s->nb_globals; i < n; i++) {
3274 temp_save(s, &s->temps[i], allocated_regs);
3278 /* sync globals to their canonical location and assume they can be
3279 read by the following code. 'allocated_regs' is used in case a
3280 temporary registers needs to be allocated to store a constant. */
3281 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3283 int i, n;
3285 for (i = 0, n = s->nb_globals; i < n; i++) {
3286 TCGTemp *ts = &s->temps[i];
3287 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3288 || ts->fixed_reg
3289 || ts->mem_coherent);
3293 /* at the end of a basic block, we assume all temporaries are dead and
3294 all globals are stored at their canonical location. */
3295 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3297 int i;
3299 for (i = s->nb_globals; i < s->nb_temps; i++) {
3300 TCGTemp *ts = &s->temps[i];
3301 if (ts->temp_local) {
3302 temp_save(s, ts, allocated_regs);
3303 } else {
3304 /* The liveness analysis already ensures that temps are dead.
3305 Keep an tcg_debug_assert for safety. */
3306 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3310 save_globals(s, allocated_regs);
3314 * Specialized code generation for INDEX_op_movi_*.
3316 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3317 tcg_target_ulong val, TCGLifeData arg_life,
3318 TCGRegSet preferred_regs)
3320 /* ENV should not be modified. */
3321 tcg_debug_assert(!ots->fixed_reg);
3323 /* The movi is not explicitly generated here. */
3324 if (ots->val_type == TEMP_VAL_REG) {
3325 s->reg_to_temp[ots->reg] = NULL;
3327 ots->val_type = TEMP_VAL_CONST;
3328 ots->val = val;
3329 ots->mem_coherent = 0;
3330 if (NEED_SYNC_ARG(0)) {
3331 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3332 } else if (IS_DEAD_ARG(0)) {
3333 temp_dead(s, ots);
3337 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3339 TCGTemp *ots = arg_temp(op->args[0]);
3340 tcg_target_ulong val = op->args[1];
3342 tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3346 * Specialized code generation for INDEX_op_mov_*.
3348 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3350 const TCGLifeData arg_life = op->life;
3351 TCGRegSet allocated_regs, preferred_regs;
3352 TCGTemp *ts, *ots;
3353 TCGType otype, itype;
3355 allocated_regs = s->reserved_regs;
3356 preferred_regs = op->output_pref[0];
3357 ots = arg_temp(op->args[0]);
3358 ts = arg_temp(op->args[1]);
3360 /* ENV should not be modified. */
3361 tcg_debug_assert(!ots->fixed_reg);
3363 /* Note that otype != itype for no-op truncation. */
3364 otype = ots->type;
3365 itype = ts->type;
3367 if (ts->val_type == TEMP_VAL_CONST) {
3368 /* propagate constant or generate sti */
3369 tcg_target_ulong val = ts->val;
3370 if (IS_DEAD_ARG(1)) {
3371 temp_dead(s, ts);
3373 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3374 return;
3377 /* If the source value is in memory we're going to be forced
3378 to have it in a register in order to perform the copy. Copy
3379 the SOURCE value into its own register first, that way we
3380 don't have to reload SOURCE the next time it is used. */
3381 if (ts->val_type == TEMP_VAL_MEM) {
3382 temp_load(s, ts, tcg_target_available_regs[itype],
3383 allocated_regs, preferred_regs);
3386 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3387 if (IS_DEAD_ARG(0)) {
3388 /* mov to a non-saved dead register makes no sense (even with
3389 liveness analysis disabled). */
3390 tcg_debug_assert(NEED_SYNC_ARG(0));
3391 if (!ots->mem_allocated) {
3392 temp_allocate_frame(s, ots);
3394 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3395 if (IS_DEAD_ARG(1)) {
3396 temp_dead(s, ts);
3398 temp_dead(s, ots);
3399 } else {
3400 if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3401 /* the mov can be suppressed */
3402 if (ots->val_type == TEMP_VAL_REG) {
3403 s->reg_to_temp[ots->reg] = NULL;
3405 ots->reg = ts->reg;
3406 temp_dead(s, ts);
3407 } else {
3408 if (ots->val_type != TEMP_VAL_REG) {
3409 /* When allocating a new register, make sure to not spill the
3410 input one. */
3411 tcg_regset_set_reg(allocated_regs, ts->reg);
3412 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3413 allocated_regs, preferred_regs,
3414 ots->indirect_base);
3416 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3418 * Cross register class move not supported.
3419 * Store the source register into the destination slot
3420 * and leave the destination temp as TEMP_VAL_MEM.
3422 assert(!ots->fixed_reg);
3423 if (!ts->mem_allocated) {
3424 temp_allocate_frame(s, ots);
3426 tcg_out_st(s, ts->type, ts->reg,
3427 ots->mem_base->reg, ots->mem_offset);
3428 ots->mem_coherent = 1;
3429 temp_free_or_dead(s, ots, -1);
3430 return;
3433 ots->val_type = TEMP_VAL_REG;
3434 ots->mem_coherent = 0;
3435 s->reg_to_temp[ots->reg] = ots;
3436 if (NEED_SYNC_ARG(0)) {
3437 temp_sync(s, ots, allocated_regs, 0, 0);
3443 * Specialized code generation for INDEX_op_dup_vec.
3445 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3447 const TCGLifeData arg_life = op->life;
3448 TCGRegSet dup_out_regs, dup_in_regs;
3449 TCGTemp *its, *ots;
3450 TCGType itype, vtype;
3451 intptr_t endian_fixup;
3452 unsigned vece;
3453 bool ok;
3455 ots = arg_temp(op->args[0]);
3456 its = arg_temp(op->args[1]);
3458 /* ENV should not be modified. */
3459 tcg_debug_assert(!ots->fixed_reg);
3461 itype = its->type;
3462 vece = TCGOP_VECE(op);
3463 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3465 if (its->val_type == TEMP_VAL_CONST) {
3466 /* Propagate constant via movi -> dupi. */
3467 tcg_target_ulong val = its->val;
3468 if (IS_DEAD_ARG(1)) {
3469 temp_dead(s, its);
3471 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3472 return;
3475 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
3476 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
3478 /* Allocate the output register now. */
3479 if (ots->val_type != TEMP_VAL_REG) {
3480 TCGRegSet allocated_regs = s->reserved_regs;
3482 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3483 /* Make sure to not spill the input register. */
3484 tcg_regset_set_reg(allocated_regs, its->reg);
3486 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3487 op->output_pref[0], ots->indirect_base);
3488 ots->val_type = TEMP_VAL_REG;
3489 ots->mem_coherent = 0;
3490 s->reg_to_temp[ots->reg] = ots;
3493 switch (its->val_type) {
3494 case TEMP_VAL_REG:
3496 * The dup constriaints must be broad, covering all possible VECE.
3497 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3498 * to fail, indicating that extra moves are required for that case.
3500 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3501 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3502 goto done;
3504 /* Try again from memory or a vector input register. */
3506 if (!its->mem_coherent) {
3508 * The input register is not synced, and so an extra store
3509 * would be required to use memory. Attempt an integer-vector
3510 * register move first. We do not have a TCGRegSet for this.
3512 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3513 break;
3515 /* Sync the temp back to its slot and load from there. */
3516 temp_sync(s, its, s->reserved_regs, 0, 0);
3518 /* fall through */
3520 case TEMP_VAL_MEM:
3521 #ifdef HOST_WORDS_BIGENDIAN
3522 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3523 endian_fixup -= 1 << vece;
3524 #else
3525 endian_fixup = 0;
3526 #endif
3527 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3528 its->mem_offset + endian_fixup)) {
3529 goto done;
3531 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3532 break;
3534 default:
3535 g_assert_not_reached();
3538 /* We now have a vector input register, so dup must succeed. */
3539 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3540 tcg_debug_assert(ok);
3542 done:
3543 if (IS_DEAD_ARG(1)) {
3544 temp_dead(s, its);
3546 if (NEED_SYNC_ARG(0)) {
3547 temp_sync(s, ots, s->reserved_regs, 0, 0);
3549 if (IS_DEAD_ARG(0)) {
3550 temp_dead(s, ots);
3554 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3556 const TCGLifeData arg_life = op->life;
3557 const TCGOpDef * const def = &tcg_op_defs[op->opc];
3558 TCGRegSet i_allocated_regs;
3559 TCGRegSet o_allocated_regs;
3560 int i, k, nb_iargs, nb_oargs;
3561 TCGReg reg;
3562 TCGArg arg;
3563 const TCGArgConstraint *arg_ct;
3564 TCGTemp *ts;
3565 TCGArg new_args[TCG_MAX_OP_ARGS];
3566 int const_args[TCG_MAX_OP_ARGS];
3568 nb_oargs = def->nb_oargs;
3569 nb_iargs = def->nb_iargs;
3571 /* copy constants */
3572 memcpy(new_args + nb_oargs + nb_iargs,
3573 op->args + nb_oargs + nb_iargs,
3574 sizeof(TCGArg) * def->nb_cargs);
3576 i_allocated_regs = s->reserved_regs;
3577 o_allocated_regs = s->reserved_regs;
3579 /* satisfy input constraints */
3580 for (k = 0; k < nb_iargs; k++) {
3581 TCGRegSet i_preferred_regs, o_preferred_regs;
3583 i = def->sorted_args[nb_oargs + k];
3584 arg = op->args[i];
3585 arg_ct = &def->args_ct[i];
3586 ts = arg_temp(arg);
3588 if (ts->val_type == TEMP_VAL_CONST
3589 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3590 /* constant is OK for instruction */
3591 const_args[i] = 1;
3592 new_args[i] = ts->val;
3593 continue;
3596 i_preferred_regs = o_preferred_regs = 0;
3597 if (arg_ct->ct & TCG_CT_IALIAS) {
3598 o_preferred_regs = op->output_pref[arg_ct->alias_index];
3599 if (ts->fixed_reg) {
3600 /* if fixed register, we must allocate a new register
3601 if the alias is not the same register */
3602 if (arg != op->args[arg_ct->alias_index]) {
3603 goto allocate_in_reg;
3605 } else {
3606 /* if the input is aliased to an output and if it is
3607 not dead after the instruction, we must allocate
3608 a new register and move it */
3609 if (!IS_DEAD_ARG(i)) {
3610 goto allocate_in_reg;
3613 /* check if the current register has already been allocated
3614 for another input aliased to an output */
3615 if (ts->val_type == TEMP_VAL_REG) {
3616 int k2, i2;
3617 reg = ts->reg;
3618 for (k2 = 0 ; k2 < k ; k2++) {
3619 i2 = def->sorted_args[nb_oargs + k2];
3620 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3621 reg == new_args[i2]) {
3622 goto allocate_in_reg;
3626 i_preferred_regs = o_preferred_regs;
3630 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3631 reg = ts->reg;
3633 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3634 /* nothing to do : the constraint is satisfied */
3635 } else {
3636 allocate_in_reg:
3637 /* allocate a new register matching the constraint
3638 and move the temporary register into it */
3639 temp_load(s, ts, tcg_target_available_regs[ts->type],
3640 i_allocated_regs, 0);
3641 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3642 o_preferred_regs, ts->indirect_base);
3643 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3645 * Cross register class move not supported. Sync the
3646 * temp back to its slot and load from there.
3648 temp_sync(s, ts, i_allocated_regs, 0, 0);
3649 tcg_out_ld(s, ts->type, reg,
3650 ts->mem_base->reg, ts->mem_offset);
3653 new_args[i] = reg;
3654 const_args[i] = 0;
3655 tcg_regset_set_reg(i_allocated_regs, reg);
3658 /* mark dead temporaries and free the associated registers */
3659 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3660 if (IS_DEAD_ARG(i)) {
3661 temp_dead(s, arg_temp(op->args[i]));
3665 if (def->flags & TCG_OPF_BB_END) {
3666 tcg_reg_alloc_bb_end(s, i_allocated_regs);
3667 } else {
3668 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3669 /* XXX: permit generic clobber register list ? */
3670 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3671 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3672 tcg_reg_free(s, i, i_allocated_regs);
3676 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3677 /* sync globals if the op has side effects and might trigger
3678 an exception. */
3679 sync_globals(s, i_allocated_regs);
3682 /* satisfy the output constraints */
3683 for(k = 0; k < nb_oargs; k++) {
3684 i = def->sorted_args[k];
3685 arg = op->args[i];
3686 arg_ct = &def->args_ct[i];
3687 ts = arg_temp(arg);
3689 /* ENV should not be modified. */
3690 tcg_debug_assert(!ts->fixed_reg);
3692 if ((arg_ct->ct & TCG_CT_ALIAS)
3693 && !const_args[arg_ct->alias_index]) {
3694 reg = new_args[arg_ct->alias_index];
3695 } else if (arg_ct->ct & TCG_CT_NEWREG) {
3696 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3697 i_allocated_regs | o_allocated_regs,
3698 op->output_pref[k], ts->indirect_base);
3699 } else {
3700 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3701 op->output_pref[k], ts->indirect_base);
3703 tcg_regset_set_reg(o_allocated_regs, reg);
3704 if (ts->val_type == TEMP_VAL_REG) {
3705 s->reg_to_temp[ts->reg] = NULL;
3707 ts->val_type = TEMP_VAL_REG;
3708 ts->reg = reg;
3710 * Temp value is modified, so the value kept in memory is
3711 * potentially not the same.
3713 ts->mem_coherent = 0;
3714 s->reg_to_temp[reg] = ts;
3715 new_args[i] = reg;
3719 /* emit instruction */
3720 if (def->flags & TCG_OPF_VECTOR) {
3721 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3722 new_args, const_args);
3723 } else {
3724 tcg_out_op(s, op->opc, new_args, const_args);
3727 /* move the outputs in the correct register if needed */
3728 for(i = 0; i < nb_oargs; i++) {
3729 ts = arg_temp(op->args[i]);
3731 /* ENV should not be modified. */
3732 tcg_debug_assert(!ts->fixed_reg);
3734 if (NEED_SYNC_ARG(i)) {
3735 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3736 } else if (IS_DEAD_ARG(i)) {
3737 temp_dead(s, ts);
3742 #ifdef TCG_TARGET_STACK_GROWSUP
3743 #define STACK_DIR(x) (-(x))
3744 #else
3745 #define STACK_DIR(x) (x)
3746 #endif
3748 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3750 const int nb_oargs = TCGOP_CALLO(op);
3751 const int nb_iargs = TCGOP_CALLI(op);
3752 const TCGLifeData arg_life = op->life;
3753 int flags, nb_regs, i;
3754 TCGReg reg;
3755 TCGArg arg;
3756 TCGTemp *ts;
3757 intptr_t stack_offset;
3758 size_t call_stack_size;
3759 tcg_insn_unit *func_addr;
3760 int allocate_args;
3761 TCGRegSet allocated_regs;
3763 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3764 flags = op->args[nb_oargs + nb_iargs + 1];
3766 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3767 if (nb_regs > nb_iargs) {
3768 nb_regs = nb_iargs;
3771 /* assign stack slots first */
3772 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3773 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3774 ~(TCG_TARGET_STACK_ALIGN - 1);
3775 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3776 if (allocate_args) {
3777 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3778 preallocate call stack */
3779 tcg_abort();
3782 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3783 for (i = nb_regs; i < nb_iargs; i++) {
3784 arg = op->args[nb_oargs + i];
3785 #ifdef TCG_TARGET_STACK_GROWSUP
3786 stack_offset -= sizeof(tcg_target_long);
3787 #endif
3788 if (arg != TCG_CALL_DUMMY_ARG) {
3789 ts = arg_temp(arg);
3790 temp_load(s, ts, tcg_target_available_regs[ts->type],
3791 s->reserved_regs, 0);
3792 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3794 #ifndef TCG_TARGET_STACK_GROWSUP
3795 stack_offset += sizeof(tcg_target_long);
3796 #endif
3799 /* assign input registers */
3800 allocated_regs = s->reserved_regs;
3801 for (i = 0; i < nb_regs; i++) {
3802 arg = op->args[nb_oargs + i];
3803 if (arg != TCG_CALL_DUMMY_ARG) {
3804 ts = arg_temp(arg);
3805 reg = tcg_target_call_iarg_regs[i];
3807 if (ts->val_type == TEMP_VAL_REG) {
3808 if (ts->reg != reg) {
3809 tcg_reg_free(s, reg, allocated_regs);
3810 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3812 * Cross register class move not supported. Sync the
3813 * temp back to its slot and load from there.
3815 temp_sync(s, ts, allocated_regs, 0, 0);
3816 tcg_out_ld(s, ts->type, reg,
3817 ts->mem_base->reg, ts->mem_offset);
3820 } else {
3821 TCGRegSet arg_set = 0;
3823 tcg_reg_free(s, reg, allocated_regs);
3824 tcg_regset_set_reg(arg_set, reg);
3825 temp_load(s, ts, arg_set, allocated_regs, 0);
3828 tcg_regset_set_reg(allocated_regs, reg);
3832 /* mark dead temporaries and free the associated registers */
3833 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3834 if (IS_DEAD_ARG(i)) {
3835 temp_dead(s, arg_temp(op->args[i]));
3839 /* clobber call registers */
3840 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3841 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3842 tcg_reg_free(s, i, allocated_regs);
3846 /* Save globals if they might be written by the helper, sync them if
3847 they might be read. */
3848 if (flags & TCG_CALL_NO_READ_GLOBALS) {
3849 /* Nothing to do */
3850 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3851 sync_globals(s, allocated_regs);
3852 } else {
3853 save_globals(s, allocated_regs);
3856 tcg_out_call(s, func_addr);
3858 /* assign output registers and emit moves if needed */
3859 for(i = 0; i < nb_oargs; i++) {
3860 arg = op->args[i];
3861 ts = arg_temp(arg);
3863 /* ENV should not be modified. */
3864 tcg_debug_assert(!ts->fixed_reg);
3866 reg = tcg_target_call_oarg_regs[i];
3867 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3868 if (ts->val_type == TEMP_VAL_REG) {
3869 s->reg_to_temp[ts->reg] = NULL;
3871 ts->val_type = TEMP_VAL_REG;
3872 ts->reg = reg;
3873 ts->mem_coherent = 0;
3874 s->reg_to_temp[reg] = ts;
3875 if (NEED_SYNC_ARG(i)) {
3876 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3877 } else if (IS_DEAD_ARG(i)) {
3878 temp_dead(s, ts);
3883 #ifdef CONFIG_PROFILER
3885 /* avoid copy/paste errors */
3886 #define PROF_ADD(to, from, field) \
3887 do { \
3888 (to)->field += atomic_read(&((from)->field)); \
3889 } while (0)
3891 #define PROF_MAX(to, from, field) \
3892 do { \
3893 typeof((from)->field) val__ = atomic_read(&((from)->field)); \
3894 if (val__ > (to)->field) { \
3895 (to)->field = val__; \
3897 } while (0)
3899 /* Pass in a zero'ed @prof */
3900 static inline
3901 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3903 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3904 unsigned int i;
3906 for (i = 0; i < n_ctxs; i++) {
3907 TCGContext *s = atomic_read(&tcg_ctxs[i]);
3908 const TCGProfile *orig = &s->prof;
3910 if (counters) {
3911 PROF_ADD(prof, orig, cpu_exec_time);
3912 PROF_ADD(prof, orig, tb_count1);
3913 PROF_ADD(prof, orig, tb_count);
3914 PROF_ADD(prof, orig, op_count);
3915 PROF_MAX(prof, orig, op_count_max);
3916 PROF_ADD(prof, orig, temp_count);
3917 PROF_MAX(prof, orig, temp_count_max);
3918 PROF_ADD(prof, orig, del_op_count);
3919 PROF_ADD(prof, orig, code_in_len);
3920 PROF_ADD(prof, orig, code_out_len);
3921 PROF_ADD(prof, orig, search_out_len);
3922 PROF_ADD(prof, orig, interm_time);
3923 PROF_ADD(prof, orig, code_time);
3924 PROF_ADD(prof, orig, la_time);
3925 PROF_ADD(prof, orig, opt_time);
3926 PROF_ADD(prof, orig, restore_count);
3927 PROF_ADD(prof, orig, restore_time);
3929 if (table) {
3930 int i;
3932 for (i = 0; i < NB_OPS; i++) {
3933 PROF_ADD(prof, orig, table_op_count[i]);
3939 #undef PROF_ADD
3940 #undef PROF_MAX
3942 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3944 tcg_profile_snapshot(prof, true, false);
3947 static void tcg_profile_snapshot_table(TCGProfile *prof)
3949 tcg_profile_snapshot(prof, false, true);
3952 void tcg_dump_op_count(void)
3954 TCGProfile prof = {};
3955 int i;
3957 tcg_profile_snapshot_table(&prof);
3958 for (i = 0; i < NB_OPS; i++) {
3959 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
3960 prof.table_op_count[i]);
3964 int64_t tcg_cpu_exec_time(void)
3966 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3967 unsigned int i;
3968 int64_t ret = 0;
3970 for (i = 0; i < n_ctxs; i++) {
3971 const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3972 const TCGProfile *prof = &s->prof;
3974 ret += atomic_read(&prof->cpu_exec_time);
3976 return ret;
3978 #else
3979 void tcg_dump_op_count(void)
3981 qemu_printf("[TCG profiler not compiled]\n");
3984 int64_t tcg_cpu_exec_time(void)
3986 error_report("%s: TCG profiler not compiled", __func__);
3987 exit(EXIT_FAILURE);
3989 #endif
3992 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3994 #ifdef CONFIG_PROFILER
3995 TCGProfile *prof = &s->prof;
3996 #endif
3997 int i, num_insns;
3998 TCGOp *op;
4000 #ifdef CONFIG_PROFILER
4002 int n = 0;
4004 QTAILQ_FOREACH(op, &s->ops, link) {
4005 n++;
4007 atomic_set(&prof->op_count, prof->op_count + n);
4008 if (n > prof->op_count_max) {
4009 atomic_set(&prof->op_count_max, n);
4012 n = s->nb_temps;
4013 atomic_set(&prof->temp_count, prof->temp_count + n);
4014 if (n > prof->temp_count_max) {
4015 atomic_set(&prof->temp_count_max, n);
4018 #endif
4020 #ifdef DEBUG_DISAS
4021 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4022 && qemu_log_in_addr_range(tb->pc))) {
4023 qemu_log_lock();
4024 qemu_log("OP:\n");
4025 tcg_dump_ops(s, false);
4026 qemu_log("\n");
4027 qemu_log_unlock();
4029 #endif
4031 #ifdef CONFIG_DEBUG_TCG
4032 /* Ensure all labels referenced have been emitted. */
4034 TCGLabel *l;
4035 bool error = false;
4037 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4038 if (unlikely(!l->present) && l->refs) {
4039 qemu_log_mask(CPU_LOG_TB_OP,
4040 "$L%d referenced but not present.\n", l->id);
4041 error = true;
4044 assert(!error);
4046 #endif
4048 #ifdef CONFIG_PROFILER
4049 atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4050 #endif
4052 #ifdef USE_TCG_OPTIMIZATIONS
4053 tcg_optimize(s);
4054 #endif
4056 #ifdef CONFIG_PROFILER
4057 atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4058 atomic_set(&prof->la_time, prof->la_time - profile_getclock());
4059 #endif
4061 reachable_code_pass(s);
4062 liveness_pass_1(s);
4064 if (s->nb_indirects > 0) {
4065 #ifdef DEBUG_DISAS
4066 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4067 && qemu_log_in_addr_range(tb->pc))) {
4068 qemu_log_lock();
4069 qemu_log("OP before indirect lowering:\n");
4070 tcg_dump_ops(s, false);
4071 qemu_log("\n");
4072 qemu_log_unlock();
4074 #endif
4075 /* Replace indirect temps with direct temps. */
4076 if (liveness_pass_2(s)) {
4077 /* If changes were made, re-run liveness. */
4078 liveness_pass_1(s);
4082 #ifdef CONFIG_PROFILER
4083 atomic_set(&prof->la_time, prof->la_time + profile_getclock());
4084 #endif
4086 #ifdef DEBUG_DISAS
4087 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4088 && qemu_log_in_addr_range(tb->pc))) {
4089 qemu_log_lock();
4090 qemu_log("OP after optimization and liveness analysis:\n");
4091 tcg_dump_ops(s, true);
4092 qemu_log("\n");
4093 qemu_log_unlock();
4095 #endif
4097 tcg_reg_alloc_start(s);
4099 s->code_buf = tb->tc.ptr;
4100 s->code_ptr = tb->tc.ptr;
4102 #ifdef TCG_TARGET_NEED_LDST_LABELS
4103 QSIMPLEQ_INIT(&s->ldst_labels);
4104 #endif
4105 #ifdef TCG_TARGET_NEED_POOL_LABELS
4106 s->pool_labels = NULL;
4107 #endif
4109 num_insns = -1;
4110 QTAILQ_FOREACH(op, &s->ops, link) {
4111 TCGOpcode opc = op->opc;
4113 #ifdef CONFIG_PROFILER
4114 atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4115 #endif
4117 switch (opc) {
4118 case INDEX_op_mov_i32:
4119 case INDEX_op_mov_i64:
4120 case INDEX_op_mov_vec:
4121 tcg_reg_alloc_mov(s, op);
4122 break;
4123 case INDEX_op_movi_i32:
4124 case INDEX_op_movi_i64:
4125 case INDEX_op_dupi_vec:
4126 tcg_reg_alloc_movi(s, op);
4127 break;
4128 case INDEX_op_dup_vec:
4129 tcg_reg_alloc_dup(s, op);
4130 break;
4131 case INDEX_op_insn_start:
4132 if (num_insns >= 0) {
4133 size_t off = tcg_current_code_size(s);
4134 s->gen_insn_end_off[num_insns] = off;
4135 /* Assert that we do not overflow our stored offset. */
4136 assert(s->gen_insn_end_off[num_insns] == off);
4138 num_insns++;
4139 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4140 target_ulong a;
4141 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4142 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4143 #else
4144 a = op->args[i];
4145 #endif
4146 s->gen_insn_data[num_insns][i] = a;
4148 break;
4149 case INDEX_op_discard:
4150 temp_dead(s, arg_temp(op->args[0]));
4151 break;
4152 case INDEX_op_set_label:
4153 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4154 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4155 break;
4156 case INDEX_op_call:
4157 tcg_reg_alloc_call(s, op);
4158 break;
4159 default:
4160 /* Sanity check that we've not introduced any unhandled opcodes. */
4161 tcg_debug_assert(tcg_op_supported(opc));
4162 /* Note: in order to speed up the code, it would be much
4163 faster to have specialized register allocator functions for
4164 some common argument patterns */
4165 tcg_reg_alloc_op(s, op);
4166 break;
4168 #ifdef CONFIG_DEBUG_TCG
4169 check_regs(s);
4170 #endif
4171 /* Test for (pending) buffer overflow. The assumption is that any
4172 one operation beginning below the high water mark cannot overrun
4173 the buffer completely. Thus we can test for overflow after
4174 generating code without having to check during generation. */
4175 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4176 return -1;
4178 /* Test for TB overflow, as seen by gen_insn_end_off. */
4179 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4180 return -2;
4183 tcg_debug_assert(num_insns >= 0);
4184 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4186 /* Generate TB finalization at the end of block */
4187 #ifdef TCG_TARGET_NEED_LDST_LABELS
4188 i = tcg_out_ldst_finalize(s);
4189 if (i < 0) {
4190 return i;
4192 #endif
4193 #ifdef TCG_TARGET_NEED_POOL_LABELS
4194 i = tcg_out_pool_finalize(s);
4195 if (i < 0) {
4196 return i;
4198 #endif
4199 if (!tcg_resolve_relocs(s)) {
4200 return -2;
4203 /* flush instruction cache */
4204 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4206 return tcg_current_code_size(s);
4209 #ifdef CONFIG_PROFILER
4210 void tcg_dump_info(void)
4212 TCGProfile prof = {};
4213 const TCGProfile *s;
4214 int64_t tb_count;
4215 int64_t tb_div_count;
4216 int64_t tot;
4218 tcg_profile_snapshot_counters(&prof);
4219 s = &prof;
4220 tb_count = s->tb_count;
4221 tb_div_count = tb_count ? tb_count : 1;
4222 tot = s->interm_time + s->code_time;
4224 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4225 tot, tot / 2.4e9);
4226 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4227 " %0.1f%%)\n",
4228 tb_count, s->tb_count1 - tb_count,
4229 (double)(s->tb_count1 - s->tb_count)
4230 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4231 qemu_printf("avg ops/TB %0.1f max=%d\n",
4232 (double)s->op_count / tb_div_count, s->op_count_max);
4233 qemu_printf("deleted ops/TB %0.2f\n",
4234 (double)s->del_op_count / tb_div_count);
4235 qemu_printf("avg temps/TB %0.2f max=%d\n",
4236 (double)s->temp_count / tb_div_count, s->temp_count_max);
4237 qemu_printf("avg host code/TB %0.1f\n",
4238 (double)s->code_out_len / tb_div_count);
4239 qemu_printf("avg search data/TB %0.1f\n",
4240 (double)s->search_out_len / tb_div_count);
4242 qemu_printf("cycles/op %0.1f\n",
4243 s->op_count ? (double)tot / s->op_count : 0);
4244 qemu_printf("cycles/in byte %0.1f\n",
4245 s->code_in_len ? (double)tot / s->code_in_len : 0);
4246 qemu_printf("cycles/out byte %0.1f\n",
4247 s->code_out_len ? (double)tot / s->code_out_len : 0);
4248 qemu_printf("cycles/search byte %0.1f\n",
4249 s->search_out_len ? (double)tot / s->search_out_len : 0);
4250 if (tot == 0) {
4251 tot = 1;
4253 qemu_printf(" gen_interm time %0.1f%%\n",
4254 (double)s->interm_time / tot * 100.0);
4255 qemu_printf(" gen_code time %0.1f%%\n",
4256 (double)s->code_time / tot * 100.0);
4257 qemu_printf("optim./code time %0.1f%%\n",
4258 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4259 * 100.0);
4260 qemu_printf("liveness/code time %0.1f%%\n",
4261 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4262 qemu_printf("cpu_restore count %" PRId64 "\n",
4263 s->restore_count);
4264 qemu_printf(" avg cycles %0.1f\n",
4265 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4267 #else
4268 void tcg_dump_info(void)
4270 qemu_printf("[TCG profiler not compiled]\n");
4272 #endif
4274 #ifdef ELF_HOST_MACHINE
4275 /* In order to use this feature, the backend needs to do three things:
4277 (1) Define ELF_HOST_MACHINE to indicate both what value to
4278 put into the ELF image and to indicate support for the feature.
4280 (2) Define tcg_register_jit. This should create a buffer containing
4281 the contents of a .debug_frame section that describes the post-
4282 prologue unwind info for the tcg machine.
4284 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4287 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4288 typedef enum {
4289 JIT_NOACTION = 0,
4290 JIT_REGISTER_FN,
4291 JIT_UNREGISTER_FN
4292 } jit_actions_t;
4294 struct jit_code_entry {
4295 struct jit_code_entry *next_entry;
4296 struct jit_code_entry *prev_entry;
4297 const void *symfile_addr;
4298 uint64_t symfile_size;
4301 struct jit_descriptor {
4302 uint32_t version;
4303 uint32_t action_flag;
4304 struct jit_code_entry *relevant_entry;
4305 struct jit_code_entry *first_entry;
4308 void __jit_debug_register_code(void) __attribute__((noinline));
4309 void __jit_debug_register_code(void)
4311 asm("");
4314 /* Must statically initialize the version, because GDB may check
4315 the version before we can set it. */
4316 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4318 /* End GDB interface. */
4320 static int find_string(const char *strtab, const char *str)
4322 const char *p = strtab + 1;
4324 while (1) {
4325 if (strcmp(p, str) == 0) {
4326 return p - strtab;
4328 p += strlen(p) + 1;
4332 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4333 const void *debug_frame,
4334 size_t debug_frame_size)
4336 struct __attribute__((packed)) DebugInfo {
4337 uint32_t len;
4338 uint16_t version;
4339 uint32_t abbrev;
4340 uint8_t ptr_size;
4341 uint8_t cu_die;
4342 uint16_t cu_lang;
4343 uintptr_t cu_low_pc;
4344 uintptr_t cu_high_pc;
4345 uint8_t fn_die;
4346 char fn_name[16];
4347 uintptr_t fn_low_pc;
4348 uintptr_t fn_high_pc;
4349 uint8_t cu_eoc;
4352 struct ElfImage {
4353 ElfW(Ehdr) ehdr;
4354 ElfW(Phdr) phdr;
4355 ElfW(Shdr) shdr[7];
4356 ElfW(Sym) sym[2];
4357 struct DebugInfo di;
4358 uint8_t da[24];
4359 char str[80];
4362 struct ElfImage *img;
4364 static const struct ElfImage img_template = {
4365 .ehdr = {
4366 .e_ident[EI_MAG0] = ELFMAG0,
4367 .e_ident[EI_MAG1] = ELFMAG1,
4368 .e_ident[EI_MAG2] = ELFMAG2,
4369 .e_ident[EI_MAG3] = ELFMAG3,
4370 .e_ident[EI_CLASS] = ELF_CLASS,
4371 .e_ident[EI_DATA] = ELF_DATA,
4372 .e_ident[EI_VERSION] = EV_CURRENT,
4373 .e_type = ET_EXEC,
4374 .e_machine = ELF_HOST_MACHINE,
4375 .e_version = EV_CURRENT,
4376 .e_phoff = offsetof(struct ElfImage, phdr),
4377 .e_shoff = offsetof(struct ElfImage, shdr),
4378 .e_ehsize = sizeof(ElfW(Shdr)),
4379 .e_phentsize = sizeof(ElfW(Phdr)),
4380 .e_phnum = 1,
4381 .e_shentsize = sizeof(ElfW(Shdr)),
4382 .e_shnum = ARRAY_SIZE(img->shdr),
4383 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4384 #ifdef ELF_HOST_FLAGS
4385 .e_flags = ELF_HOST_FLAGS,
4386 #endif
4387 #ifdef ELF_OSABI
4388 .e_ident[EI_OSABI] = ELF_OSABI,
4389 #endif
4391 .phdr = {
4392 .p_type = PT_LOAD,
4393 .p_flags = PF_X,
4395 .shdr = {
4396 [0] = { .sh_type = SHT_NULL },
4397 /* Trick: The contents of code_gen_buffer are not present in
4398 this fake ELF file; that got allocated elsewhere. Therefore
4399 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4400 will not look for contents. We can record any address. */
4401 [1] = { /* .text */
4402 .sh_type = SHT_NOBITS,
4403 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4405 [2] = { /* .debug_info */
4406 .sh_type = SHT_PROGBITS,
4407 .sh_offset = offsetof(struct ElfImage, di),
4408 .sh_size = sizeof(struct DebugInfo),
4410 [3] = { /* .debug_abbrev */
4411 .sh_type = SHT_PROGBITS,
4412 .sh_offset = offsetof(struct ElfImage, da),
4413 .sh_size = sizeof(img->da),
4415 [4] = { /* .debug_frame */
4416 .sh_type = SHT_PROGBITS,
4417 .sh_offset = sizeof(struct ElfImage),
4419 [5] = { /* .symtab */
4420 .sh_type = SHT_SYMTAB,
4421 .sh_offset = offsetof(struct ElfImage, sym),
4422 .sh_size = sizeof(img->sym),
4423 .sh_info = 1,
4424 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4425 .sh_entsize = sizeof(ElfW(Sym)),
4427 [6] = { /* .strtab */
4428 .sh_type = SHT_STRTAB,
4429 .sh_offset = offsetof(struct ElfImage, str),
4430 .sh_size = sizeof(img->str),
4433 .sym = {
4434 [1] = { /* code_gen_buffer */
4435 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4436 .st_shndx = 1,
4439 .di = {
4440 .len = sizeof(struct DebugInfo) - 4,
4441 .version = 2,
4442 .ptr_size = sizeof(void *),
4443 .cu_die = 1,
4444 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
4445 .fn_die = 2,
4446 .fn_name = "code_gen_buffer"
4448 .da = {
4449 1, /* abbrev number (the cu) */
4450 0x11, 1, /* DW_TAG_compile_unit, has children */
4451 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4452 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4453 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4454 0, 0, /* end of abbrev */
4455 2, /* abbrev number (the fn) */
4456 0x2e, 0, /* DW_TAG_subprogram, no children */
4457 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4458 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4459 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4460 0, 0, /* end of abbrev */
4461 0 /* no more abbrev */
4463 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4464 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4467 /* We only need a single jit entry; statically allocate it. */
4468 static struct jit_code_entry one_entry;
4470 uintptr_t buf = (uintptr_t)buf_ptr;
4471 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4472 DebugFrameHeader *dfh;
4474 img = g_malloc(img_size);
4475 *img = img_template;
4477 img->phdr.p_vaddr = buf;
4478 img->phdr.p_paddr = buf;
4479 img->phdr.p_memsz = buf_size;
4481 img->shdr[1].sh_name = find_string(img->str, ".text");
4482 img->shdr[1].sh_addr = buf;
4483 img->shdr[1].sh_size = buf_size;
4485 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4486 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4488 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4489 img->shdr[4].sh_size = debug_frame_size;
4491 img->shdr[5].sh_name = find_string(img->str, ".symtab");
4492 img->shdr[6].sh_name = find_string(img->str, ".strtab");
4494 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4495 img->sym[1].st_value = buf;
4496 img->sym[1].st_size = buf_size;
4498 img->di.cu_low_pc = buf;
4499 img->di.cu_high_pc = buf + buf_size;
4500 img->di.fn_low_pc = buf;
4501 img->di.fn_high_pc = buf + buf_size;
4503 dfh = (DebugFrameHeader *)(img + 1);
4504 memcpy(dfh, debug_frame, debug_frame_size);
4505 dfh->fde.func_start = buf;
4506 dfh->fde.func_len = buf_size;
4508 #ifdef DEBUG_JIT
4509 /* Enable this block to be able to debug the ELF image file creation.
4510 One can use readelf, objdump, or other inspection utilities. */
4512 FILE *f = fopen("/tmp/qemu.jit", "w+b");
4513 if (f) {
4514 if (fwrite(img, img_size, 1, f) != img_size) {
4515 /* Avoid stupid unused return value warning for fwrite. */
4517 fclose(f);
4520 #endif
4522 one_entry.symfile_addr = img;
4523 one_entry.symfile_size = img_size;
4525 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4526 __jit_debug_descriptor.relevant_entry = &one_entry;
4527 __jit_debug_descriptor.first_entry = &one_entry;
4528 __jit_debug_register_code();
4530 #else
4531 /* No support for the feature. Provide the entry point expected by exec.c,
4532 and implement the internal function we declared earlier. */
4534 static void tcg_register_jit_int(void *buf, size_t size,
4535 const void *debug_frame,
4536 size_t debug_frame_size)
4540 void tcg_register_jit(void *buf, size_t buf_size)
4543 #endif /* ELF_HOST_MACHINE */
4545 #if !TCG_TARGET_MAYBE_vec
4546 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4548 g_assert_not_reached();
4550 #endif