util: Extract flush_icache_range to cacheflush.c
[qemu/ar7.git] / tcg / tcg.c
blobebb9466ffc878245470af225136ef332594208aa
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
42 instructions */
43 #define NO_CPU_IO_DEFS
44 #include "cpu.h"
46 #include "exec/exec-all.h"
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
52 #include "tcg/tcg-op.h"
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS ELFCLASS32
56 #else
57 # define ELF_CLASS ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA ELFDATA2MSB
61 #else
62 # define ELF_DATA ELFDATA2LSB
63 #endif
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70 used here. */
71 static void tcg_target_init(TCGContext *s);
72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
73 static void tcg_target_qemu_prologue(TCGContext *s);
74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
75 intptr_t value, intptr_t addend);
77 /* The CIE and FDE header definitions will be common to all hosts. */
78 typedef struct {
79 uint32_t len __attribute__((aligned((sizeof(void *)))));
80 uint32_t id;
81 uint8_t version;
82 char augmentation[1];
83 uint8_t code_align;
84 uint8_t data_align;
85 uint8_t return_column;
86 } DebugFrameCIE;
88 typedef struct QEMU_PACKED {
89 uint32_t len __attribute__((aligned((sizeof(void *)))));
90 uint32_t cie_offset;
91 uintptr_t func_start;
92 uintptr_t func_len;
93 } DebugFrameFDEHeader;
95 typedef struct QEMU_PACKED {
96 DebugFrameCIE cie;
97 DebugFrameFDEHeader fde;
98 } DebugFrameHeader;
100 static void tcg_register_jit_int(void *buf, size_t size,
101 const void *debug_frame,
102 size_t debug_frame_size)
103 __attribute__((unused));
105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
106 static const char *target_parse_constraint(TCGArgConstraint *ct,
107 const char *ct_str, TCGType type);
108 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
109 intptr_t arg2);
110 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
111 static void tcg_out_movi(TCGContext *s, TCGType type,
112 TCGReg ret, tcg_target_long arg);
113 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
114 const int *const_args);
115 #if TCG_TARGET_MAYBE_vec
116 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
117 TCGReg dst, TCGReg src);
118 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
119 TCGReg dst, TCGReg base, intptr_t offset);
120 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
121 TCGReg dst, tcg_target_long arg);
122 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
123 unsigned vece, const TCGArg *args,
124 const int *const_args);
125 #else
126 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
127 TCGReg dst, TCGReg src)
129 g_assert_not_reached();
131 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
132 TCGReg dst, TCGReg base, intptr_t offset)
134 g_assert_not_reached();
136 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
137 TCGReg dst, tcg_target_long arg)
139 g_assert_not_reached();
141 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
142 unsigned vece, const TCGArg *args,
143 const int *const_args)
145 g_assert_not_reached();
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149 intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151 TCGReg base, intptr_t ofs);
152 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
153 static int tcg_target_const_match(tcg_target_long val, TCGType type,
154 const TCGArgConstraint *arg_ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
157 #endif
159 #define TCG_HIGHWATER 1024
161 static TCGContext **tcg_ctxs;
162 static unsigned int n_tcg_ctxs;
163 TCGv_env cpu_env = 0;
165 struct tcg_region_tree {
166 QemuMutex lock;
167 GTree *tree;
168 /* padding to avoid false sharing is computed at run-time */
172 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
173 * dynamically allocate from as demand dictates. Given appropriate region
174 * sizing, this minimizes flushes even when some TCG threads generate a lot
175 * more code than others.
177 struct tcg_region_state {
178 QemuMutex lock;
180 /* fields set at init time */
181 void *start;
182 void *start_aligned;
183 void *end;
184 size_t n;
185 size_t size; /* size of one region */
186 size_t stride; /* .size + guard size */
188 /* fields protected by the lock */
189 size_t current; /* current region index */
190 size_t agg_size_full; /* aggregate size of full regions */
193 static struct tcg_region_state region;
195 * This is an array of struct tcg_region_tree's, with padding.
196 * We use void * to simplify the computation of region_trees[i]; each
197 * struct is found every tree_size bytes.
199 static void *region_trees;
200 static size_t tree_size;
201 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
202 static TCGRegSet tcg_target_call_clobber_regs;
204 #if TCG_TARGET_INSN_UNIT_SIZE == 1
205 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
207 *s->code_ptr++ = v;
210 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
211 uint8_t v)
213 *p = v;
215 #endif
217 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
218 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
220 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
221 *s->code_ptr++ = v;
222 } else {
223 tcg_insn_unit *p = s->code_ptr;
224 memcpy(p, &v, sizeof(v));
225 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
229 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
230 uint16_t v)
232 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
233 *p = v;
234 } else {
235 memcpy(p, &v, sizeof(v));
238 #endif
240 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
241 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
243 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
244 *s->code_ptr++ = v;
245 } else {
246 tcg_insn_unit *p = s->code_ptr;
247 memcpy(p, &v, sizeof(v));
248 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
252 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
253 uint32_t v)
255 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
256 *p = v;
257 } else {
258 memcpy(p, &v, sizeof(v));
261 #endif
263 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
264 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
266 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
267 *s->code_ptr++ = v;
268 } else {
269 tcg_insn_unit *p = s->code_ptr;
270 memcpy(p, &v, sizeof(v));
271 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
275 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
276 uint64_t v)
278 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
279 *p = v;
280 } else {
281 memcpy(p, &v, sizeof(v));
284 #endif
286 /* label relocation processing */
288 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
289 TCGLabel *l, intptr_t addend)
291 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
293 r->type = type;
294 r->ptr = code_ptr;
295 r->addend = addend;
296 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
299 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
301 tcg_debug_assert(!l->has_value);
302 l->has_value = 1;
303 l->u.value_ptr = ptr;
306 TCGLabel *gen_new_label(void)
308 TCGContext *s = tcg_ctx;
309 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
311 memset(l, 0, sizeof(TCGLabel));
312 l->id = s->nb_labels++;
313 QSIMPLEQ_INIT(&l->relocs);
315 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
317 return l;
320 static bool tcg_resolve_relocs(TCGContext *s)
322 TCGLabel *l;
324 QSIMPLEQ_FOREACH(l, &s->labels, next) {
325 TCGRelocation *r;
326 uintptr_t value = l->u.value;
328 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
329 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
330 return false;
334 return true;
337 static void set_jmp_reset_offset(TCGContext *s, int which)
340 * We will check for overflow at the end of the opcode loop in
341 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
343 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
346 #include "tcg-target.c.inc"
348 /* compare a pointer @ptr and a tb_tc @s */
349 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
351 if (ptr >= s->ptr + s->size) {
352 return 1;
353 } else if (ptr < s->ptr) {
354 return -1;
356 return 0;
359 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
361 const struct tb_tc *a = ap;
362 const struct tb_tc *b = bp;
365 * When both sizes are set, we know this isn't a lookup.
366 * This is the most likely case: every TB must be inserted; lookups
367 * are a lot less frequent.
369 if (likely(a->size && b->size)) {
370 if (a->ptr > b->ptr) {
371 return 1;
372 } else if (a->ptr < b->ptr) {
373 return -1;
375 /* a->ptr == b->ptr should happen only on deletions */
376 g_assert(a->size == b->size);
377 return 0;
380 * All lookups have either .size field set to 0.
381 * From the glib sources we see that @ap is always the lookup key. However
382 * the docs provide no guarantee, so we just mark this case as likely.
384 if (likely(a->size == 0)) {
385 return ptr_cmp_tb_tc(a->ptr, b);
387 return ptr_cmp_tb_tc(b->ptr, a);
390 static void tcg_region_trees_init(void)
392 size_t i;
394 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
395 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
396 for (i = 0; i < region.n; i++) {
397 struct tcg_region_tree *rt = region_trees + i * tree_size;
399 qemu_mutex_init(&rt->lock);
400 rt->tree = g_tree_new(tb_tc_cmp);
404 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
406 size_t region_idx;
408 if (p < region.start_aligned) {
409 region_idx = 0;
410 } else {
411 ptrdiff_t offset = p - region.start_aligned;
413 if (offset > region.stride * (region.n - 1)) {
414 region_idx = region.n - 1;
415 } else {
416 region_idx = offset / region.stride;
419 return region_trees + region_idx * tree_size;
422 void tcg_tb_insert(TranslationBlock *tb)
424 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
426 qemu_mutex_lock(&rt->lock);
427 g_tree_insert(rt->tree, &tb->tc, tb);
428 qemu_mutex_unlock(&rt->lock);
431 void tcg_tb_remove(TranslationBlock *tb)
433 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
435 qemu_mutex_lock(&rt->lock);
436 g_tree_remove(rt->tree, &tb->tc);
437 qemu_mutex_unlock(&rt->lock);
441 * Find the TB 'tb' such that
442 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
443 * Return NULL if not found.
445 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
447 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
448 TranslationBlock *tb;
449 struct tb_tc s = { .ptr = (void *)tc_ptr };
451 qemu_mutex_lock(&rt->lock);
452 tb = g_tree_lookup(rt->tree, &s);
453 qemu_mutex_unlock(&rt->lock);
454 return tb;
457 static void tcg_region_tree_lock_all(void)
459 size_t i;
461 for (i = 0; i < region.n; i++) {
462 struct tcg_region_tree *rt = region_trees + i * tree_size;
464 qemu_mutex_lock(&rt->lock);
468 static void tcg_region_tree_unlock_all(void)
470 size_t i;
472 for (i = 0; i < region.n; i++) {
473 struct tcg_region_tree *rt = region_trees + i * tree_size;
475 qemu_mutex_unlock(&rt->lock);
479 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
481 size_t i;
483 tcg_region_tree_lock_all();
484 for (i = 0; i < region.n; i++) {
485 struct tcg_region_tree *rt = region_trees + i * tree_size;
487 g_tree_foreach(rt->tree, func, user_data);
489 tcg_region_tree_unlock_all();
492 size_t tcg_nb_tbs(void)
494 size_t nb_tbs = 0;
495 size_t i;
497 tcg_region_tree_lock_all();
498 for (i = 0; i < region.n; i++) {
499 struct tcg_region_tree *rt = region_trees + i * tree_size;
501 nb_tbs += g_tree_nnodes(rt->tree);
503 tcg_region_tree_unlock_all();
504 return nb_tbs;
507 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
509 TranslationBlock *tb = v;
511 tb_destroy(tb);
512 return FALSE;
515 static void tcg_region_tree_reset_all(void)
517 size_t i;
519 tcg_region_tree_lock_all();
520 for (i = 0; i < region.n; i++) {
521 struct tcg_region_tree *rt = region_trees + i * tree_size;
523 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
524 /* Increment the refcount first so that destroy acts as a reset */
525 g_tree_ref(rt->tree);
526 g_tree_destroy(rt->tree);
528 tcg_region_tree_unlock_all();
531 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
533 void *start, *end;
535 start = region.start_aligned + curr_region * region.stride;
536 end = start + region.size;
538 if (curr_region == 0) {
539 start = region.start;
541 if (curr_region == region.n - 1) {
542 end = region.end;
545 *pstart = start;
546 *pend = end;
549 static void tcg_region_assign(TCGContext *s, size_t curr_region)
551 void *start, *end;
553 tcg_region_bounds(curr_region, &start, &end);
555 s->code_gen_buffer = start;
556 s->code_gen_ptr = start;
557 s->code_gen_buffer_size = end - start;
558 s->code_gen_highwater = end - TCG_HIGHWATER;
561 static bool tcg_region_alloc__locked(TCGContext *s)
563 if (region.current == region.n) {
564 return true;
566 tcg_region_assign(s, region.current);
567 region.current++;
568 return false;
572 * Request a new region once the one in use has filled up.
573 * Returns true on error.
575 static bool tcg_region_alloc(TCGContext *s)
577 bool err;
578 /* read the region size now; alloc__locked will overwrite it on success */
579 size_t size_full = s->code_gen_buffer_size;
581 qemu_mutex_lock(&region.lock);
582 err = tcg_region_alloc__locked(s);
583 if (!err) {
584 region.agg_size_full += size_full - TCG_HIGHWATER;
586 qemu_mutex_unlock(&region.lock);
587 return err;
591 * Perform a context's first region allocation.
592 * This function does _not_ increment region.agg_size_full.
594 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
596 return tcg_region_alloc__locked(s);
599 /* Call from a safe-work context */
600 void tcg_region_reset_all(void)
602 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
603 unsigned int i;
605 qemu_mutex_lock(&region.lock);
606 region.current = 0;
607 region.agg_size_full = 0;
609 for (i = 0; i < n_ctxs; i++) {
610 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
611 bool err = tcg_region_initial_alloc__locked(s);
613 g_assert(!err);
615 qemu_mutex_unlock(&region.lock);
617 tcg_region_tree_reset_all();
620 #ifdef CONFIG_USER_ONLY
621 static size_t tcg_n_regions(void)
623 return 1;
625 #else
627 * It is likely that some vCPUs will translate more code than others, so we
628 * first try to set more regions than max_cpus, with those regions being of
629 * reasonable size. If that's not possible we make do by evenly dividing
630 * the code_gen_buffer among the vCPUs.
632 static size_t tcg_n_regions(void)
634 size_t i;
636 /* Use a single region if all we have is one vCPU thread */
637 #if !defined(CONFIG_USER_ONLY)
638 MachineState *ms = MACHINE(qdev_get_machine());
639 unsigned int max_cpus = ms->smp.max_cpus;
640 #endif
641 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
642 return 1;
645 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
646 for (i = 8; i > 0; i--) {
647 size_t regions_per_thread = i;
648 size_t region_size;
650 region_size = tcg_init_ctx.code_gen_buffer_size;
651 region_size /= max_cpus * regions_per_thread;
653 if (region_size >= 2 * 1024u * 1024) {
654 return max_cpus * regions_per_thread;
657 /* If we can't, then just allocate one region per vCPU thread */
658 return max_cpus;
660 #endif
663 * Initializes region partitioning.
665 * Called at init time from the parent thread (i.e. the one calling
666 * tcg_context_init), after the target's TCG globals have been set.
668 * Region partitioning works by splitting code_gen_buffer into separate regions,
669 * and then assigning regions to TCG threads so that the threads can translate
670 * code in parallel without synchronization.
672 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
673 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
674 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
675 * must have been parsed before calling this function, since it calls
676 * qemu_tcg_mttcg_enabled().
678 * In user-mode we use a single region. Having multiple regions in user-mode
679 * is not supported, because the number of vCPU threads (recall that each thread
680 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
681 * OS, and usually this number is huge (tens of thousands is not uncommon).
682 * Thus, given this large bound on the number of vCPU threads and the fact
683 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
684 * that the availability of at least one region per vCPU thread.
686 * However, this user-mode limitation is unlikely to be a significant problem
687 * in practice. Multi-threaded guests share most if not all of their translated
688 * code, which makes parallel code generation less appealing than in softmmu.
690 void tcg_region_init(void)
692 void *buf = tcg_init_ctx.code_gen_buffer;
693 void *aligned;
694 size_t size = tcg_init_ctx.code_gen_buffer_size;
695 size_t page_size = qemu_real_host_page_size;
696 size_t region_size;
697 size_t n_regions;
698 size_t i;
700 n_regions = tcg_n_regions();
702 /* The first region will be 'aligned - buf' bytes larger than the others */
703 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
704 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
706 * Make region_size a multiple of page_size, using aligned as the start.
707 * As a result of this we might end up with a few extra pages at the end of
708 * the buffer; we will assign those to the last region.
710 region_size = (size - (aligned - buf)) / n_regions;
711 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
713 /* A region must have at least 2 pages; one code, one guard */
714 g_assert(region_size >= 2 * page_size);
716 /* init the region struct */
717 qemu_mutex_init(&region.lock);
718 region.n = n_regions;
719 region.size = region_size - page_size;
720 region.stride = region_size;
721 region.start = buf;
722 region.start_aligned = aligned;
723 /* page-align the end, since its last page will be a guard page */
724 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
725 /* account for that last guard page */
726 region.end -= page_size;
728 /* set guard pages */
729 for (i = 0; i < region.n; i++) {
730 void *start, *end;
731 int rc;
733 tcg_region_bounds(i, &start, &end);
734 rc = qemu_mprotect_none(end, page_size);
735 g_assert(!rc);
738 tcg_region_trees_init();
740 /* In user-mode we support only one ctx, so do the initial allocation now */
741 #ifdef CONFIG_USER_ONLY
743 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
745 g_assert(!err);
747 #endif
750 static void alloc_tcg_plugin_context(TCGContext *s)
752 #ifdef CONFIG_PLUGIN
753 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
754 s->plugin_tb->insns =
755 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
756 #endif
760 * All TCG threads except the parent (i.e. the one that called tcg_context_init
761 * and registered the target's TCG globals) must register with this function
762 * before initiating translation.
764 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
765 * of tcg_region_init() for the reasoning behind this.
767 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
768 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
769 * is not used anymore for translation once this function is called.
771 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
772 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
774 #ifdef CONFIG_USER_ONLY
775 void tcg_register_thread(void)
777 tcg_ctx = &tcg_init_ctx;
779 #else
780 void tcg_register_thread(void)
782 MachineState *ms = MACHINE(qdev_get_machine());
783 TCGContext *s = g_malloc(sizeof(*s));
784 unsigned int i, n;
785 bool err;
787 *s = tcg_init_ctx;
789 /* Relink mem_base. */
790 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
791 if (tcg_init_ctx.temps[i].mem_base) {
792 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
793 tcg_debug_assert(b >= 0 && b < n);
794 s->temps[i].mem_base = &s->temps[b];
798 /* Claim an entry in tcg_ctxs */
799 n = qatomic_fetch_inc(&n_tcg_ctxs);
800 g_assert(n < ms->smp.max_cpus);
801 qatomic_set(&tcg_ctxs[n], s);
803 if (n > 0) {
804 alloc_tcg_plugin_context(s);
807 tcg_ctx = s;
808 qemu_mutex_lock(&region.lock);
809 err = tcg_region_initial_alloc__locked(tcg_ctx);
810 g_assert(!err);
811 qemu_mutex_unlock(&region.lock);
813 #endif /* !CONFIG_USER_ONLY */
816 * Returns the size (in bytes) of all translated code (i.e. from all regions)
817 * currently in the cache.
818 * See also: tcg_code_capacity()
819 * Do not confuse with tcg_current_code_size(); that one applies to a single
820 * TCG context.
822 size_t tcg_code_size(void)
824 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
825 unsigned int i;
826 size_t total;
828 qemu_mutex_lock(&region.lock);
829 total = region.agg_size_full;
830 for (i = 0; i < n_ctxs; i++) {
831 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
832 size_t size;
834 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
835 g_assert(size <= s->code_gen_buffer_size);
836 total += size;
838 qemu_mutex_unlock(&region.lock);
839 return total;
843 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
844 * regions.
845 * See also: tcg_code_size()
847 size_t tcg_code_capacity(void)
849 size_t guard_size, capacity;
851 /* no need for synchronization; these variables are set at init time */
852 guard_size = region.stride - region.size;
853 capacity = region.end + guard_size - region.start;
854 capacity -= region.n * (guard_size + TCG_HIGHWATER);
855 return capacity;
858 size_t tcg_tb_phys_invalidate_count(void)
860 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
861 unsigned int i;
862 size_t total = 0;
864 for (i = 0; i < n_ctxs; i++) {
865 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
867 total += qatomic_read(&s->tb_phys_invalidate_count);
869 return total;
872 /* pool based memory allocation */
873 void *tcg_malloc_internal(TCGContext *s, int size)
875 TCGPool *p;
876 int pool_size;
878 if (size > TCG_POOL_CHUNK_SIZE) {
879 /* big malloc: insert a new pool (XXX: could optimize) */
880 p = g_malloc(sizeof(TCGPool) + size);
881 p->size = size;
882 p->next = s->pool_first_large;
883 s->pool_first_large = p;
884 return p->data;
885 } else {
886 p = s->pool_current;
887 if (!p) {
888 p = s->pool_first;
889 if (!p)
890 goto new_pool;
891 } else {
892 if (!p->next) {
893 new_pool:
894 pool_size = TCG_POOL_CHUNK_SIZE;
895 p = g_malloc(sizeof(TCGPool) + pool_size);
896 p->size = pool_size;
897 p->next = NULL;
898 if (s->pool_current)
899 s->pool_current->next = p;
900 else
901 s->pool_first = p;
902 } else {
903 p = p->next;
907 s->pool_current = p;
908 s->pool_cur = p->data + size;
909 s->pool_end = p->data + p->size;
910 return p->data;
913 void tcg_pool_reset(TCGContext *s)
915 TCGPool *p, *t;
916 for (p = s->pool_first_large; p; p = t) {
917 t = p->next;
918 g_free(p);
920 s->pool_first_large = NULL;
921 s->pool_cur = s->pool_end = NULL;
922 s->pool_current = NULL;
925 typedef struct TCGHelperInfo {
926 void *func;
927 const char *name;
928 unsigned flags;
929 unsigned sizemask;
930 } TCGHelperInfo;
932 #include "exec/helper-proto.h"
934 static const TCGHelperInfo all_helpers[] = {
935 #include "exec/helper-tcg.h"
937 static GHashTable *helper_table;
939 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
940 static void process_op_defs(TCGContext *s);
941 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
942 TCGReg reg, const char *name);
944 void tcg_context_init(TCGContext *s)
946 int op, total_args, n, i;
947 TCGOpDef *def;
948 TCGArgConstraint *args_ct;
949 TCGTemp *ts;
951 memset(s, 0, sizeof(*s));
952 s->nb_globals = 0;
954 /* Count total number of arguments and allocate the corresponding
955 space */
956 total_args = 0;
957 for(op = 0; op < NB_OPS; op++) {
958 def = &tcg_op_defs[op];
959 n = def->nb_iargs + def->nb_oargs;
960 total_args += n;
963 args_ct = g_new0(TCGArgConstraint, total_args);
965 for(op = 0; op < NB_OPS; op++) {
966 def = &tcg_op_defs[op];
967 def->args_ct = args_ct;
968 n = def->nb_iargs + def->nb_oargs;
969 args_ct += n;
972 /* Register helpers. */
973 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
974 helper_table = g_hash_table_new(NULL, NULL);
976 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
977 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
978 (gpointer)&all_helpers[i]);
981 tcg_target_init(s);
982 process_op_defs(s);
984 /* Reverse the order of the saved registers, assuming they're all at
985 the start of tcg_target_reg_alloc_order. */
986 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
987 int r = tcg_target_reg_alloc_order[n];
988 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
989 break;
992 for (i = 0; i < n; ++i) {
993 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
995 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
996 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
999 alloc_tcg_plugin_context(s);
1001 tcg_ctx = s;
1003 * In user-mode we simply share the init context among threads, since we
1004 * use a single region. See the documentation tcg_region_init() for the
1005 * reasoning behind this.
1006 * In softmmu we will have at most max_cpus TCG threads.
1008 #ifdef CONFIG_USER_ONLY
1009 tcg_ctxs = &tcg_ctx;
1010 n_tcg_ctxs = 1;
1011 #else
1012 MachineState *ms = MACHINE(qdev_get_machine());
1013 unsigned int max_cpus = ms->smp.max_cpus;
1014 tcg_ctxs = g_new(TCGContext *, max_cpus);
1015 #endif
1017 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1018 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1019 cpu_env = temp_tcgv_ptr(ts);
1023 * Allocate TBs right before their corresponding translated code, making
1024 * sure that TBs and code are on different cache lines.
1026 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1028 uintptr_t align = qemu_icache_linesize;
1029 TranslationBlock *tb;
1030 void *next;
1032 retry:
1033 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1034 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1036 if (unlikely(next > s->code_gen_highwater)) {
1037 if (tcg_region_alloc(s)) {
1038 return NULL;
1040 goto retry;
1042 qatomic_set(&s->code_gen_ptr, next);
1043 s->data_gen_ptr = NULL;
1044 return tb;
1047 void tcg_prologue_init(TCGContext *s)
1049 size_t prologue_size, total_size;
1050 void *buf0, *buf1;
1052 /* Put the prologue at the beginning of code_gen_buffer. */
1053 buf0 = s->code_gen_buffer;
1054 total_size = s->code_gen_buffer_size;
1055 s->code_ptr = buf0;
1056 s->code_buf = buf0;
1057 s->data_gen_ptr = NULL;
1058 s->code_gen_prologue = buf0;
1060 /* Compute a high-water mark, at which we voluntarily flush the buffer
1061 and start over. The size here is arbitrary, significantly larger
1062 than we expect the code generation for any one opcode to require. */
1063 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1065 #ifdef TCG_TARGET_NEED_POOL_LABELS
1066 s->pool_labels = NULL;
1067 #endif
1069 /* Generate the prologue. */
1070 tcg_target_qemu_prologue(s);
1072 #ifdef TCG_TARGET_NEED_POOL_LABELS
1073 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1075 int result = tcg_out_pool_finalize(s);
1076 tcg_debug_assert(result == 0);
1078 #endif
1080 buf1 = s->code_ptr;
1081 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1083 /* Deduct the prologue from the buffer. */
1084 prologue_size = tcg_current_code_size(s);
1085 s->code_gen_ptr = buf1;
1086 s->code_gen_buffer = buf1;
1087 s->code_buf = buf1;
1088 total_size -= prologue_size;
1089 s->code_gen_buffer_size = total_size;
1091 tcg_register_jit(s->code_gen_buffer, total_size);
1093 #ifdef DEBUG_DISAS
1094 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1095 FILE *logfile = qemu_log_lock();
1096 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1097 if (s->data_gen_ptr) {
1098 size_t code_size = s->data_gen_ptr - buf0;
1099 size_t data_size = prologue_size - code_size;
1100 size_t i;
1102 log_disas(buf0, code_size);
1104 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1105 if (sizeof(tcg_target_ulong) == 8) {
1106 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1107 (uintptr_t)s->data_gen_ptr + i,
1108 *(uint64_t *)(s->data_gen_ptr + i));
1109 } else {
1110 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
1111 (uintptr_t)s->data_gen_ptr + i,
1112 *(uint32_t *)(s->data_gen_ptr + i));
1115 } else {
1116 log_disas(buf0, prologue_size);
1118 qemu_log("\n");
1119 qemu_log_flush();
1120 qemu_log_unlock(logfile);
1122 #endif
1124 /* Assert that goto_ptr is implemented completely. */
1125 if (TCG_TARGET_HAS_goto_ptr) {
1126 tcg_debug_assert(s->code_gen_epilogue != NULL);
1130 void tcg_func_start(TCGContext *s)
1132 tcg_pool_reset(s);
1133 s->nb_temps = s->nb_globals;
1135 /* No temps have been previously allocated for size or locality. */
1136 memset(s->free_temps, 0, sizeof(s->free_temps));
1138 s->nb_ops = 0;
1139 s->nb_labels = 0;
1140 s->current_frame_offset = s->frame_start;
1142 #ifdef CONFIG_DEBUG_TCG
1143 s->goto_tb_issue_mask = 0;
1144 #endif
1146 QTAILQ_INIT(&s->ops);
1147 QTAILQ_INIT(&s->free_ops);
1148 QSIMPLEQ_INIT(&s->labels);
1151 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1153 int n = s->nb_temps++;
1154 tcg_debug_assert(n < TCG_MAX_TEMPS);
1155 return memset(&s->temps[n], 0, sizeof(TCGTemp));
1158 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1160 TCGTemp *ts;
1162 tcg_debug_assert(s->nb_globals == s->nb_temps);
1163 s->nb_globals++;
1164 ts = tcg_temp_alloc(s);
1165 ts->temp_global = 1;
1167 return ts;
1170 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1171 TCGReg reg, const char *name)
1173 TCGTemp *ts;
1175 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1176 tcg_abort();
1179 ts = tcg_global_alloc(s);
1180 ts->base_type = type;
1181 ts->type = type;
1182 ts->fixed_reg = 1;
1183 ts->reg = reg;
1184 ts->name = name;
1185 tcg_regset_set_reg(s->reserved_regs, reg);
1187 return ts;
1190 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1192 s->frame_start = start;
1193 s->frame_end = start + size;
1194 s->frame_temp
1195 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1198 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1199 intptr_t offset, const char *name)
1201 TCGContext *s = tcg_ctx;
1202 TCGTemp *base_ts = tcgv_ptr_temp(base);
1203 TCGTemp *ts = tcg_global_alloc(s);
1204 int indirect_reg = 0, bigendian = 0;
1205 #ifdef HOST_WORDS_BIGENDIAN
1206 bigendian = 1;
1207 #endif
1209 if (!base_ts->fixed_reg) {
1210 /* We do not support double-indirect registers. */
1211 tcg_debug_assert(!base_ts->indirect_reg);
1212 base_ts->indirect_base = 1;
1213 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1214 ? 2 : 1);
1215 indirect_reg = 1;
1218 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1219 TCGTemp *ts2 = tcg_global_alloc(s);
1220 char buf[64];
1222 ts->base_type = TCG_TYPE_I64;
1223 ts->type = TCG_TYPE_I32;
1224 ts->indirect_reg = indirect_reg;
1225 ts->mem_allocated = 1;
1226 ts->mem_base = base_ts;
1227 ts->mem_offset = offset + bigendian * 4;
1228 pstrcpy(buf, sizeof(buf), name);
1229 pstrcat(buf, sizeof(buf), "_0");
1230 ts->name = strdup(buf);
1232 tcg_debug_assert(ts2 == ts + 1);
1233 ts2->base_type = TCG_TYPE_I64;
1234 ts2->type = TCG_TYPE_I32;
1235 ts2->indirect_reg = indirect_reg;
1236 ts2->mem_allocated = 1;
1237 ts2->mem_base = base_ts;
1238 ts2->mem_offset = offset + (1 - bigendian) * 4;
1239 pstrcpy(buf, sizeof(buf), name);
1240 pstrcat(buf, sizeof(buf), "_1");
1241 ts2->name = strdup(buf);
1242 } else {
1243 ts->base_type = type;
1244 ts->type = type;
1245 ts->indirect_reg = indirect_reg;
1246 ts->mem_allocated = 1;
1247 ts->mem_base = base_ts;
1248 ts->mem_offset = offset;
1249 ts->name = name;
1251 return ts;
1254 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1256 TCGContext *s = tcg_ctx;
1257 TCGTemp *ts;
1258 int idx, k;
1260 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1261 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1262 if (idx < TCG_MAX_TEMPS) {
1263 /* There is already an available temp with the right type. */
1264 clear_bit(idx, s->free_temps[k].l);
1266 ts = &s->temps[idx];
1267 ts->temp_allocated = 1;
1268 tcg_debug_assert(ts->base_type == type);
1269 tcg_debug_assert(ts->temp_local == temp_local);
1270 } else {
1271 ts = tcg_temp_alloc(s);
1272 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1273 TCGTemp *ts2 = tcg_temp_alloc(s);
1275 ts->base_type = type;
1276 ts->type = TCG_TYPE_I32;
1277 ts->temp_allocated = 1;
1278 ts->temp_local = temp_local;
1280 tcg_debug_assert(ts2 == ts + 1);
1281 ts2->base_type = TCG_TYPE_I64;
1282 ts2->type = TCG_TYPE_I32;
1283 ts2->temp_allocated = 1;
1284 ts2->temp_local = temp_local;
1285 } else {
1286 ts->base_type = type;
1287 ts->type = type;
1288 ts->temp_allocated = 1;
1289 ts->temp_local = temp_local;
1293 #if defined(CONFIG_DEBUG_TCG)
1294 s->temps_in_use++;
1295 #endif
1296 return ts;
1299 TCGv_vec tcg_temp_new_vec(TCGType type)
1301 TCGTemp *t;
1303 #ifdef CONFIG_DEBUG_TCG
1304 switch (type) {
1305 case TCG_TYPE_V64:
1306 assert(TCG_TARGET_HAS_v64);
1307 break;
1308 case TCG_TYPE_V128:
1309 assert(TCG_TARGET_HAS_v128);
1310 break;
1311 case TCG_TYPE_V256:
1312 assert(TCG_TARGET_HAS_v256);
1313 break;
1314 default:
1315 g_assert_not_reached();
1317 #endif
1319 t = tcg_temp_new_internal(type, 0);
1320 return temp_tcgv_vec(t);
1323 /* Create a new temp of the same type as an existing temp. */
1324 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1326 TCGTemp *t = tcgv_vec_temp(match);
1328 tcg_debug_assert(t->temp_allocated != 0);
1330 t = tcg_temp_new_internal(t->base_type, 0);
1331 return temp_tcgv_vec(t);
1334 void tcg_temp_free_internal(TCGTemp *ts)
1336 TCGContext *s = tcg_ctx;
1337 int k, idx;
1339 #if defined(CONFIG_DEBUG_TCG)
1340 s->temps_in_use--;
1341 if (s->temps_in_use < 0) {
1342 fprintf(stderr, "More temporaries freed than allocated!\n");
1344 #endif
1346 tcg_debug_assert(ts->temp_global == 0);
1347 tcg_debug_assert(ts->temp_allocated != 0);
1348 ts->temp_allocated = 0;
1350 idx = temp_idx(ts);
1351 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1352 set_bit(idx, s->free_temps[k].l);
1355 TCGv_i32 tcg_const_i32(int32_t val)
1357 TCGv_i32 t0;
1358 t0 = tcg_temp_new_i32();
1359 tcg_gen_movi_i32(t0, val);
1360 return t0;
1363 TCGv_i64 tcg_const_i64(int64_t val)
1365 TCGv_i64 t0;
1366 t0 = tcg_temp_new_i64();
1367 tcg_gen_movi_i64(t0, val);
1368 return t0;
1371 TCGv_i32 tcg_const_local_i32(int32_t val)
1373 TCGv_i32 t0;
1374 t0 = tcg_temp_local_new_i32();
1375 tcg_gen_movi_i32(t0, val);
1376 return t0;
1379 TCGv_i64 tcg_const_local_i64(int64_t val)
1381 TCGv_i64 t0;
1382 t0 = tcg_temp_local_new_i64();
1383 tcg_gen_movi_i64(t0, val);
1384 return t0;
1387 #if defined(CONFIG_DEBUG_TCG)
1388 void tcg_clear_temp_count(void)
1390 TCGContext *s = tcg_ctx;
1391 s->temps_in_use = 0;
1394 int tcg_check_temp_count(void)
1396 TCGContext *s = tcg_ctx;
1397 if (s->temps_in_use) {
1398 /* Clear the count so that we don't give another
1399 * warning immediately next time around.
1401 s->temps_in_use = 0;
1402 return 1;
1404 return 0;
1406 #endif
1408 /* Return true if OP may appear in the opcode stream.
1409 Test the runtime variable that controls each opcode. */
1410 bool tcg_op_supported(TCGOpcode op)
1412 const bool have_vec
1413 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1415 switch (op) {
1416 case INDEX_op_discard:
1417 case INDEX_op_set_label:
1418 case INDEX_op_call:
1419 case INDEX_op_br:
1420 case INDEX_op_mb:
1421 case INDEX_op_insn_start:
1422 case INDEX_op_exit_tb:
1423 case INDEX_op_goto_tb:
1424 case INDEX_op_qemu_ld_i32:
1425 case INDEX_op_qemu_st_i32:
1426 case INDEX_op_qemu_ld_i64:
1427 case INDEX_op_qemu_st_i64:
1428 return true;
1430 case INDEX_op_goto_ptr:
1431 return TCG_TARGET_HAS_goto_ptr;
1433 case INDEX_op_mov_i32:
1434 case INDEX_op_movi_i32:
1435 case INDEX_op_setcond_i32:
1436 case INDEX_op_brcond_i32:
1437 case INDEX_op_ld8u_i32:
1438 case INDEX_op_ld8s_i32:
1439 case INDEX_op_ld16u_i32:
1440 case INDEX_op_ld16s_i32:
1441 case INDEX_op_ld_i32:
1442 case INDEX_op_st8_i32:
1443 case INDEX_op_st16_i32:
1444 case INDEX_op_st_i32:
1445 case INDEX_op_add_i32:
1446 case INDEX_op_sub_i32:
1447 case INDEX_op_mul_i32:
1448 case INDEX_op_and_i32:
1449 case INDEX_op_or_i32:
1450 case INDEX_op_xor_i32:
1451 case INDEX_op_shl_i32:
1452 case INDEX_op_shr_i32:
1453 case INDEX_op_sar_i32:
1454 return true;
1456 case INDEX_op_movcond_i32:
1457 return TCG_TARGET_HAS_movcond_i32;
1458 case INDEX_op_div_i32:
1459 case INDEX_op_divu_i32:
1460 return TCG_TARGET_HAS_div_i32;
1461 case INDEX_op_rem_i32:
1462 case INDEX_op_remu_i32:
1463 return TCG_TARGET_HAS_rem_i32;
1464 case INDEX_op_div2_i32:
1465 case INDEX_op_divu2_i32:
1466 return TCG_TARGET_HAS_div2_i32;
1467 case INDEX_op_rotl_i32:
1468 case INDEX_op_rotr_i32:
1469 return TCG_TARGET_HAS_rot_i32;
1470 case INDEX_op_deposit_i32:
1471 return TCG_TARGET_HAS_deposit_i32;
1472 case INDEX_op_extract_i32:
1473 return TCG_TARGET_HAS_extract_i32;
1474 case INDEX_op_sextract_i32:
1475 return TCG_TARGET_HAS_sextract_i32;
1476 case INDEX_op_extract2_i32:
1477 return TCG_TARGET_HAS_extract2_i32;
1478 case INDEX_op_add2_i32:
1479 return TCG_TARGET_HAS_add2_i32;
1480 case INDEX_op_sub2_i32:
1481 return TCG_TARGET_HAS_sub2_i32;
1482 case INDEX_op_mulu2_i32:
1483 return TCG_TARGET_HAS_mulu2_i32;
1484 case INDEX_op_muls2_i32:
1485 return TCG_TARGET_HAS_muls2_i32;
1486 case INDEX_op_muluh_i32:
1487 return TCG_TARGET_HAS_muluh_i32;
1488 case INDEX_op_mulsh_i32:
1489 return TCG_TARGET_HAS_mulsh_i32;
1490 case INDEX_op_ext8s_i32:
1491 return TCG_TARGET_HAS_ext8s_i32;
1492 case INDEX_op_ext16s_i32:
1493 return TCG_TARGET_HAS_ext16s_i32;
1494 case INDEX_op_ext8u_i32:
1495 return TCG_TARGET_HAS_ext8u_i32;
1496 case INDEX_op_ext16u_i32:
1497 return TCG_TARGET_HAS_ext16u_i32;
1498 case INDEX_op_bswap16_i32:
1499 return TCG_TARGET_HAS_bswap16_i32;
1500 case INDEX_op_bswap32_i32:
1501 return TCG_TARGET_HAS_bswap32_i32;
1502 case INDEX_op_not_i32:
1503 return TCG_TARGET_HAS_not_i32;
1504 case INDEX_op_neg_i32:
1505 return TCG_TARGET_HAS_neg_i32;
1506 case INDEX_op_andc_i32:
1507 return TCG_TARGET_HAS_andc_i32;
1508 case INDEX_op_orc_i32:
1509 return TCG_TARGET_HAS_orc_i32;
1510 case INDEX_op_eqv_i32:
1511 return TCG_TARGET_HAS_eqv_i32;
1512 case INDEX_op_nand_i32:
1513 return TCG_TARGET_HAS_nand_i32;
1514 case INDEX_op_nor_i32:
1515 return TCG_TARGET_HAS_nor_i32;
1516 case INDEX_op_clz_i32:
1517 return TCG_TARGET_HAS_clz_i32;
1518 case INDEX_op_ctz_i32:
1519 return TCG_TARGET_HAS_ctz_i32;
1520 case INDEX_op_ctpop_i32:
1521 return TCG_TARGET_HAS_ctpop_i32;
1523 case INDEX_op_brcond2_i32:
1524 case INDEX_op_setcond2_i32:
1525 return TCG_TARGET_REG_BITS == 32;
1527 case INDEX_op_mov_i64:
1528 case INDEX_op_movi_i64:
1529 case INDEX_op_setcond_i64:
1530 case INDEX_op_brcond_i64:
1531 case INDEX_op_ld8u_i64:
1532 case INDEX_op_ld8s_i64:
1533 case INDEX_op_ld16u_i64:
1534 case INDEX_op_ld16s_i64:
1535 case INDEX_op_ld32u_i64:
1536 case INDEX_op_ld32s_i64:
1537 case INDEX_op_ld_i64:
1538 case INDEX_op_st8_i64:
1539 case INDEX_op_st16_i64:
1540 case INDEX_op_st32_i64:
1541 case INDEX_op_st_i64:
1542 case INDEX_op_add_i64:
1543 case INDEX_op_sub_i64:
1544 case INDEX_op_mul_i64:
1545 case INDEX_op_and_i64:
1546 case INDEX_op_or_i64:
1547 case INDEX_op_xor_i64:
1548 case INDEX_op_shl_i64:
1549 case INDEX_op_shr_i64:
1550 case INDEX_op_sar_i64:
1551 case INDEX_op_ext_i32_i64:
1552 case INDEX_op_extu_i32_i64:
1553 return TCG_TARGET_REG_BITS == 64;
1555 case INDEX_op_movcond_i64:
1556 return TCG_TARGET_HAS_movcond_i64;
1557 case INDEX_op_div_i64:
1558 case INDEX_op_divu_i64:
1559 return TCG_TARGET_HAS_div_i64;
1560 case INDEX_op_rem_i64:
1561 case INDEX_op_remu_i64:
1562 return TCG_TARGET_HAS_rem_i64;
1563 case INDEX_op_div2_i64:
1564 case INDEX_op_divu2_i64:
1565 return TCG_TARGET_HAS_div2_i64;
1566 case INDEX_op_rotl_i64:
1567 case INDEX_op_rotr_i64:
1568 return TCG_TARGET_HAS_rot_i64;
1569 case INDEX_op_deposit_i64:
1570 return TCG_TARGET_HAS_deposit_i64;
1571 case INDEX_op_extract_i64:
1572 return TCG_TARGET_HAS_extract_i64;
1573 case INDEX_op_sextract_i64:
1574 return TCG_TARGET_HAS_sextract_i64;
1575 case INDEX_op_extract2_i64:
1576 return TCG_TARGET_HAS_extract2_i64;
1577 case INDEX_op_extrl_i64_i32:
1578 return TCG_TARGET_HAS_extrl_i64_i32;
1579 case INDEX_op_extrh_i64_i32:
1580 return TCG_TARGET_HAS_extrh_i64_i32;
1581 case INDEX_op_ext8s_i64:
1582 return TCG_TARGET_HAS_ext8s_i64;
1583 case INDEX_op_ext16s_i64:
1584 return TCG_TARGET_HAS_ext16s_i64;
1585 case INDEX_op_ext32s_i64:
1586 return TCG_TARGET_HAS_ext32s_i64;
1587 case INDEX_op_ext8u_i64:
1588 return TCG_TARGET_HAS_ext8u_i64;
1589 case INDEX_op_ext16u_i64:
1590 return TCG_TARGET_HAS_ext16u_i64;
1591 case INDEX_op_ext32u_i64:
1592 return TCG_TARGET_HAS_ext32u_i64;
1593 case INDEX_op_bswap16_i64:
1594 return TCG_TARGET_HAS_bswap16_i64;
1595 case INDEX_op_bswap32_i64:
1596 return TCG_TARGET_HAS_bswap32_i64;
1597 case INDEX_op_bswap64_i64:
1598 return TCG_TARGET_HAS_bswap64_i64;
1599 case INDEX_op_not_i64:
1600 return TCG_TARGET_HAS_not_i64;
1601 case INDEX_op_neg_i64:
1602 return TCG_TARGET_HAS_neg_i64;
1603 case INDEX_op_andc_i64:
1604 return TCG_TARGET_HAS_andc_i64;
1605 case INDEX_op_orc_i64:
1606 return TCG_TARGET_HAS_orc_i64;
1607 case INDEX_op_eqv_i64:
1608 return TCG_TARGET_HAS_eqv_i64;
1609 case INDEX_op_nand_i64:
1610 return TCG_TARGET_HAS_nand_i64;
1611 case INDEX_op_nor_i64:
1612 return TCG_TARGET_HAS_nor_i64;
1613 case INDEX_op_clz_i64:
1614 return TCG_TARGET_HAS_clz_i64;
1615 case INDEX_op_ctz_i64:
1616 return TCG_TARGET_HAS_ctz_i64;
1617 case INDEX_op_ctpop_i64:
1618 return TCG_TARGET_HAS_ctpop_i64;
1619 case INDEX_op_add2_i64:
1620 return TCG_TARGET_HAS_add2_i64;
1621 case INDEX_op_sub2_i64:
1622 return TCG_TARGET_HAS_sub2_i64;
1623 case INDEX_op_mulu2_i64:
1624 return TCG_TARGET_HAS_mulu2_i64;
1625 case INDEX_op_muls2_i64:
1626 return TCG_TARGET_HAS_muls2_i64;
1627 case INDEX_op_muluh_i64:
1628 return TCG_TARGET_HAS_muluh_i64;
1629 case INDEX_op_mulsh_i64:
1630 return TCG_TARGET_HAS_mulsh_i64;
1632 case INDEX_op_mov_vec:
1633 case INDEX_op_dup_vec:
1634 case INDEX_op_dupi_vec:
1635 case INDEX_op_dupm_vec:
1636 case INDEX_op_ld_vec:
1637 case INDEX_op_st_vec:
1638 case INDEX_op_add_vec:
1639 case INDEX_op_sub_vec:
1640 case INDEX_op_and_vec:
1641 case INDEX_op_or_vec:
1642 case INDEX_op_xor_vec:
1643 case INDEX_op_cmp_vec:
1644 return have_vec;
1645 case INDEX_op_dup2_vec:
1646 return have_vec && TCG_TARGET_REG_BITS == 32;
1647 case INDEX_op_not_vec:
1648 return have_vec && TCG_TARGET_HAS_not_vec;
1649 case INDEX_op_neg_vec:
1650 return have_vec && TCG_TARGET_HAS_neg_vec;
1651 case INDEX_op_abs_vec:
1652 return have_vec && TCG_TARGET_HAS_abs_vec;
1653 case INDEX_op_andc_vec:
1654 return have_vec && TCG_TARGET_HAS_andc_vec;
1655 case INDEX_op_orc_vec:
1656 return have_vec && TCG_TARGET_HAS_orc_vec;
1657 case INDEX_op_mul_vec:
1658 return have_vec && TCG_TARGET_HAS_mul_vec;
1659 case INDEX_op_shli_vec:
1660 case INDEX_op_shri_vec:
1661 case INDEX_op_sari_vec:
1662 return have_vec && TCG_TARGET_HAS_shi_vec;
1663 case INDEX_op_shls_vec:
1664 case INDEX_op_shrs_vec:
1665 case INDEX_op_sars_vec:
1666 return have_vec && TCG_TARGET_HAS_shs_vec;
1667 case INDEX_op_shlv_vec:
1668 case INDEX_op_shrv_vec:
1669 case INDEX_op_sarv_vec:
1670 return have_vec && TCG_TARGET_HAS_shv_vec;
1671 case INDEX_op_rotli_vec:
1672 return have_vec && TCG_TARGET_HAS_roti_vec;
1673 case INDEX_op_rotls_vec:
1674 return have_vec && TCG_TARGET_HAS_rots_vec;
1675 case INDEX_op_rotlv_vec:
1676 case INDEX_op_rotrv_vec:
1677 return have_vec && TCG_TARGET_HAS_rotv_vec;
1678 case INDEX_op_ssadd_vec:
1679 case INDEX_op_usadd_vec:
1680 case INDEX_op_sssub_vec:
1681 case INDEX_op_ussub_vec:
1682 return have_vec && TCG_TARGET_HAS_sat_vec;
1683 case INDEX_op_smin_vec:
1684 case INDEX_op_umin_vec:
1685 case INDEX_op_smax_vec:
1686 case INDEX_op_umax_vec:
1687 return have_vec && TCG_TARGET_HAS_minmax_vec;
1688 case INDEX_op_bitsel_vec:
1689 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1690 case INDEX_op_cmpsel_vec:
1691 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1693 default:
1694 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1695 return true;
1699 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1700 and endian swap. Maybe it would be better to do the alignment
1701 and endian swap in tcg_reg_alloc_call(). */
1702 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1704 int i, real_args, nb_rets, pi;
1705 unsigned sizemask, flags;
1706 TCGHelperInfo *info;
1707 TCGOp *op;
1709 info = g_hash_table_lookup(helper_table, (gpointer)func);
1710 flags = info->flags;
1711 sizemask = info->sizemask;
1713 #ifdef CONFIG_PLUGIN
1714 /* detect non-plugin helpers */
1715 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1716 tcg_ctx->plugin_insn->calls_helpers = true;
1718 #endif
1720 #if defined(__sparc__) && !defined(__arch64__) \
1721 && !defined(CONFIG_TCG_INTERPRETER)
1722 /* We have 64-bit values in one register, but need to pass as two
1723 separate parameters. Split them. */
1724 int orig_sizemask = sizemask;
1725 int orig_nargs = nargs;
1726 TCGv_i64 retl, reth;
1727 TCGTemp *split_args[MAX_OPC_PARAM];
1729 retl = NULL;
1730 reth = NULL;
1731 if (sizemask != 0) {
1732 for (i = real_args = 0; i < nargs; ++i) {
1733 int is_64bit = sizemask & (1 << (i+1)*2);
1734 if (is_64bit) {
1735 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1736 TCGv_i32 h = tcg_temp_new_i32();
1737 TCGv_i32 l = tcg_temp_new_i32();
1738 tcg_gen_extr_i64_i32(l, h, orig);
1739 split_args[real_args++] = tcgv_i32_temp(h);
1740 split_args[real_args++] = tcgv_i32_temp(l);
1741 } else {
1742 split_args[real_args++] = args[i];
1745 nargs = real_args;
1746 args = split_args;
1747 sizemask = 0;
1749 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1750 for (i = 0; i < nargs; ++i) {
1751 int is_64bit = sizemask & (1 << (i+1)*2);
1752 int is_signed = sizemask & (2 << (i+1)*2);
1753 if (!is_64bit) {
1754 TCGv_i64 temp = tcg_temp_new_i64();
1755 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1756 if (is_signed) {
1757 tcg_gen_ext32s_i64(temp, orig);
1758 } else {
1759 tcg_gen_ext32u_i64(temp, orig);
1761 args[i] = tcgv_i64_temp(temp);
1764 #endif /* TCG_TARGET_EXTEND_ARGS */
1766 op = tcg_emit_op(INDEX_op_call);
1768 pi = 0;
1769 if (ret != NULL) {
1770 #if defined(__sparc__) && !defined(__arch64__) \
1771 && !defined(CONFIG_TCG_INTERPRETER)
1772 if (orig_sizemask & 1) {
1773 /* The 32-bit ABI is going to return the 64-bit value in
1774 the %o0/%o1 register pair. Prepare for this by using
1775 two return temporaries, and reassemble below. */
1776 retl = tcg_temp_new_i64();
1777 reth = tcg_temp_new_i64();
1778 op->args[pi++] = tcgv_i64_arg(reth);
1779 op->args[pi++] = tcgv_i64_arg(retl);
1780 nb_rets = 2;
1781 } else {
1782 op->args[pi++] = temp_arg(ret);
1783 nb_rets = 1;
1785 #else
1786 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1787 #ifdef HOST_WORDS_BIGENDIAN
1788 op->args[pi++] = temp_arg(ret + 1);
1789 op->args[pi++] = temp_arg(ret);
1790 #else
1791 op->args[pi++] = temp_arg(ret);
1792 op->args[pi++] = temp_arg(ret + 1);
1793 #endif
1794 nb_rets = 2;
1795 } else {
1796 op->args[pi++] = temp_arg(ret);
1797 nb_rets = 1;
1799 #endif
1800 } else {
1801 nb_rets = 0;
1803 TCGOP_CALLO(op) = nb_rets;
1805 real_args = 0;
1806 for (i = 0; i < nargs; i++) {
1807 int is_64bit = sizemask & (1 << (i+1)*2);
1808 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1809 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1810 /* some targets want aligned 64 bit args */
1811 if (real_args & 1) {
1812 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1813 real_args++;
1815 #endif
1816 /* If stack grows up, then we will be placing successive
1817 arguments at lower addresses, which means we need to
1818 reverse the order compared to how we would normally
1819 treat either big or little-endian. For those arguments
1820 that will wind up in registers, this still works for
1821 HPPA (the only current STACK_GROWSUP target) since the
1822 argument registers are *also* allocated in decreasing
1823 order. If another such target is added, this logic may
1824 have to get more complicated to differentiate between
1825 stack arguments and register arguments. */
1826 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1827 op->args[pi++] = temp_arg(args[i] + 1);
1828 op->args[pi++] = temp_arg(args[i]);
1829 #else
1830 op->args[pi++] = temp_arg(args[i]);
1831 op->args[pi++] = temp_arg(args[i] + 1);
1832 #endif
1833 real_args += 2;
1834 continue;
1837 op->args[pi++] = temp_arg(args[i]);
1838 real_args++;
1840 op->args[pi++] = (uintptr_t)func;
1841 op->args[pi++] = flags;
1842 TCGOP_CALLI(op) = real_args;
1844 /* Make sure the fields didn't overflow. */
1845 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1846 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1848 #if defined(__sparc__) && !defined(__arch64__) \
1849 && !defined(CONFIG_TCG_INTERPRETER)
1850 /* Free all of the parts we allocated above. */
1851 for (i = real_args = 0; i < orig_nargs; ++i) {
1852 int is_64bit = orig_sizemask & (1 << (i+1)*2);
1853 if (is_64bit) {
1854 tcg_temp_free_internal(args[real_args++]);
1855 tcg_temp_free_internal(args[real_args++]);
1856 } else {
1857 real_args++;
1860 if (orig_sizemask & 1) {
1861 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1862 Note that describing these as TCGv_i64 eliminates an unnecessary
1863 zero-extension that tcg_gen_concat_i32_i64 would create. */
1864 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1865 tcg_temp_free_i64(retl);
1866 tcg_temp_free_i64(reth);
1868 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1869 for (i = 0; i < nargs; ++i) {
1870 int is_64bit = sizemask & (1 << (i+1)*2);
1871 if (!is_64bit) {
1872 tcg_temp_free_internal(args[i]);
1875 #endif /* TCG_TARGET_EXTEND_ARGS */
1878 static void tcg_reg_alloc_start(TCGContext *s)
1880 int i, n;
1881 TCGTemp *ts;
1883 for (i = 0, n = s->nb_globals; i < n; i++) {
1884 ts = &s->temps[i];
1885 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1887 for (n = s->nb_temps; i < n; i++) {
1888 ts = &s->temps[i];
1889 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1890 ts->mem_allocated = 0;
1891 ts->fixed_reg = 0;
1894 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1897 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1898 TCGTemp *ts)
1900 int idx = temp_idx(ts);
1902 if (ts->temp_global) {
1903 pstrcpy(buf, buf_size, ts->name);
1904 } else if (ts->temp_local) {
1905 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1906 } else {
1907 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1909 return buf;
1912 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1913 int buf_size, TCGArg arg)
1915 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1918 /* Find helper name. */
1919 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1921 const char *ret = NULL;
1922 if (helper_table) {
1923 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1924 if (info) {
1925 ret = info->name;
1928 return ret;
1931 static const char * const cond_name[] =
1933 [TCG_COND_NEVER] = "never",
1934 [TCG_COND_ALWAYS] = "always",
1935 [TCG_COND_EQ] = "eq",
1936 [TCG_COND_NE] = "ne",
1937 [TCG_COND_LT] = "lt",
1938 [TCG_COND_GE] = "ge",
1939 [TCG_COND_LE] = "le",
1940 [TCG_COND_GT] = "gt",
1941 [TCG_COND_LTU] = "ltu",
1942 [TCG_COND_GEU] = "geu",
1943 [TCG_COND_LEU] = "leu",
1944 [TCG_COND_GTU] = "gtu"
1947 static const char * const ldst_name[] =
1949 [MO_UB] = "ub",
1950 [MO_SB] = "sb",
1951 [MO_LEUW] = "leuw",
1952 [MO_LESW] = "lesw",
1953 [MO_LEUL] = "leul",
1954 [MO_LESL] = "lesl",
1955 [MO_LEQ] = "leq",
1956 [MO_BEUW] = "beuw",
1957 [MO_BESW] = "besw",
1958 [MO_BEUL] = "beul",
1959 [MO_BESL] = "besl",
1960 [MO_BEQ] = "beq",
1963 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1964 #ifdef TARGET_ALIGNED_ONLY
1965 [MO_UNALN >> MO_ASHIFT] = "un+",
1966 [MO_ALIGN >> MO_ASHIFT] = "",
1967 #else
1968 [MO_UNALN >> MO_ASHIFT] = "",
1969 [MO_ALIGN >> MO_ASHIFT] = "al+",
1970 #endif
1971 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
1972 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
1973 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
1974 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1975 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1976 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1979 static inline bool tcg_regset_single(TCGRegSet d)
1981 return (d & (d - 1)) == 0;
1984 static inline TCGReg tcg_regset_first(TCGRegSet d)
1986 if (TCG_TARGET_NB_REGS <= 32) {
1987 return ctz32(d);
1988 } else {
1989 return ctz64(d);
1993 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1995 char buf[128];
1996 TCGOp *op;
1998 QTAILQ_FOREACH(op, &s->ops, link) {
1999 int i, k, nb_oargs, nb_iargs, nb_cargs;
2000 const TCGOpDef *def;
2001 TCGOpcode c;
2002 int col = 0;
2004 c = op->opc;
2005 def = &tcg_op_defs[c];
2007 if (c == INDEX_op_insn_start) {
2008 nb_oargs = 0;
2009 col += qemu_log("\n ----");
2011 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2012 target_ulong a;
2013 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2014 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2015 #else
2016 a = op->args[i];
2017 #endif
2018 col += qemu_log(" " TARGET_FMT_lx, a);
2020 } else if (c == INDEX_op_call) {
2021 /* variable number of arguments */
2022 nb_oargs = TCGOP_CALLO(op);
2023 nb_iargs = TCGOP_CALLI(op);
2024 nb_cargs = def->nb_cargs;
2026 /* function name, flags, out args */
2027 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2028 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2029 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2030 for (i = 0; i < nb_oargs; i++) {
2031 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2032 op->args[i]));
2034 for (i = 0; i < nb_iargs; i++) {
2035 TCGArg arg = op->args[nb_oargs + i];
2036 const char *t = "<dummy>";
2037 if (arg != TCG_CALL_DUMMY_ARG) {
2038 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2040 col += qemu_log(",%s", t);
2042 } else {
2043 col += qemu_log(" %s ", def->name);
2045 nb_oargs = def->nb_oargs;
2046 nb_iargs = def->nb_iargs;
2047 nb_cargs = def->nb_cargs;
2049 if (def->flags & TCG_OPF_VECTOR) {
2050 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2051 8 << TCGOP_VECE(op));
2054 k = 0;
2055 for (i = 0; i < nb_oargs; i++) {
2056 if (k != 0) {
2057 col += qemu_log(",");
2059 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2060 op->args[k++]));
2062 for (i = 0; i < nb_iargs; i++) {
2063 if (k != 0) {
2064 col += qemu_log(",");
2066 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2067 op->args[k++]));
2069 switch (c) {
2070 case INDEX_op_brcond_i32:
2071 case INDEX_op_setcond_i32:
2072 case INDEX_op_movcond_i32:
2073 case INDEX_op_brcond2_i32:
2074 case INDEX_op_setcond2_i32:
2075 case INDEX_op_brcond_i64:
2076 case INDEX_op_setcond_i64:
2077 case INDEX_op_movcond_i64:
2078 case INDEX_op_cmp_vec:
2079 case INDEX_op_cmpsel_vec:
2080 if (op->args[k] < ARRAY_SIZE(cond_name)
2081 && cond_name[op->args[k]]) {
2082 col += qemu_log(",%s", cond_name[op->args[k++]]);
2083 } else {
2084 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2086 i = 1;
2087 break;
2088 case INDEX_op_qemu_ld_i32:
2089 case INDEX_op_qemu_st_i32:
2090 case INDEX_op_qemu_ld_i64:
2091 case INDEX_op_qemu_st_i64:
2093 TCGMemOpIdx oi = op->args[k++];
2094 MemOp op = get_memop(oi);
2095 unsigned ix = get_mmuidx(oi);
2097 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2098 col += qemu_log(",$0x%x,%u", op, ix);
2099 } else {
2100 const char *s_al, *s_op;
2101 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2102 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2103 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2105 i = 1;
2107 break;
2108 default:
2109 i = 0;
2110 break;
2112 switch (c) {
2113 case INDEX_op_set_label:
2114 case INDEX_op_br:
2115 case INDEX_op_brcond_i32:
2116 case INDEX_op_brcond_i64:
2117 case INDEX_op_brcond2_i32:
2118 col += qemu_log("%s$L%d", k ? "," : "",
2119 arg_label(op->args[k])->id);
2120 i++, k++;
2121 break;
2122 default:
2123 break;
2125 for (; i < nb_cargs; i++, k++) {
2126 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2130 if (have_prefs || op->life) {
2132 QemuLogFile *logfile;
2134 rcu_read_lock();
2135 logfile = qatomic_rcu_read(&qemu_logfile);
2136 if (logfile) {
2137 for (; col < 40; ++col) {
2138 putc(' ', logfile->fd);
2141 rcu_read_unlock();
2144 if (op->life) {
2145 unsigned life = op->life;
2147 if (life & (SYNC_ARG * 3)) {
2148 qemu_log(" sync:");
2149 for (i = 0; i < 2; ++i) {
2150 if (life & (SYNC_ARG << i)) {
2151 qemu_log(" %d", i);
2155 life /= DEAD_ARG;
2156 if (life) {
2157 qemu_log(" dead:");
2158 for (i = 0; life; ++i, life >>= 1) {
2159 if (life & 1) {
2160 qemu_log(" %d", i);
2166 if (have_prefs) {
2167 for (i = 0; i < nb_oargs; ++i) {
2168 TCGRegSet set = op->output_pref[i];
2170 if (i == 0) {
2171 qemu_log(" pref=");
2172 } else {
2173 qemu_log(",");
2175 if (set == 0) {
2176 qemu_log("none");
2177 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2178 qemu_log("all");
2179 #ifdef CONFIG_DEBUG_TCG
2180 } else if (tcg_regset_single(set)) {
2181 TCGReg reg = tcg_regset_first(set);
2182 qemu_log("%s", tcg_target_reg_names[reg]);
2183 #endif
2184 } else if (TCG_TARGET_NB_REGS <= 32) {
2185 qemu_log("%#x", (uint32_t)set);
2186 } else {
2187 qemu_log("%#" PRIx64, (uint64_t)set);
2192 qemu_log("\n");
2196 /* we give more priority to constraints with less registers */
2197 static int get_constraint_priority(const TCGOpDef *def, int k)
2199 const TCGArgConstraint *arg_ct = &def->args_ct[k];
2200 int n;
2202 if (arg_ct->oalias) {
2203 /* an alias is equivalent to a single register */
2204 n = 1;
2205 } else {
2206 n = ctpop64(arg_ct->regs);
2208 return TCG_TARGET_NB_REGS - n + 1;
2211 /* sort from highest priority to lowest */
2212 static void sort_constraints(TCGOpDef *def, int start, int n)
2214 int i, j;
2215 TCGArgConstraint *a = def->args_ct;
2217 for (i = 0; i < n; i++) {
2218 a[start + i].sort_index = start + i;
2220 if (n <= 1) {
2221 return;
2223 for (i = 0; i < n - 1; i++) {
2224 for (j = i + 1; j < n; j++) {
2225 int p1 = get_constraint_priority(def, a[start + i].sort_index);
2226 int p2 = get_constraint_priority(def, a[start + j].sort_index);
2227 if (p1 < p2) {
2228 int tmp = a[start + i].sort_index;
2229 a[start + i].sort_index = a[start + j].sort_index;
2230 a[start + j].sort_index = tmp;
2236 static void process_op_defs(TCGContext *s)
2238 TCGOpcode op;
2240 for (op = 0; op < NB_OPS; op++) {
2241 TCGOpDef *def = &tcg_op_defs[op];
2242 const TCGTargetOpDef *tdefs;
2243 TCGType type;
2244 int i, nb_args;
2246 if (def->flags & TCG_OPF_NOT_PRESENT) {
2247 continue;
2250 nb_args = def->nb_iargs + def->nb_oargs;
2251 if (nb_args == 0) {
2252 continue;
2255 tdefs = tcg_target_op_def(op);
2256 /* Missing TCGTargetOpDef entry. */
2257 tcg_debug_assert(tdefs != NULL);
2259 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2260 for (i = 0; i < nb_args; i++) {
2261 const char *ct_str = tdefs->args_ct_str[i];
2262 /* Incomplete TCGTargetOpDef entry. */
2263 tcg_debug_assert(ct_str != NULL);
2265 while (*ct_str != '\0') {
2266 switch(*ct_str) {
2267 case '0' ... '9':
2269 int oarg = *ct_str - '0';
2270 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2271 tcg_debug_assert(oarg < def->nb_oargs);
2272 tcg_debug_assert(def->args_ct[oarg].regs != 0);
2273 def->args_ct[i] = def->args_ct[oarg];
2274 /* The output sets oalias. */
2275 def->args_ct[oarg].oalias = true;
2276 def->args_ct[oarg].alias_index = i;
2277 /* The input sets ialias. */
2278 def->args_ct[i].ialias = true;
2279 def->args_ct[i].alias_index = oarg;
2281 ct_str++;
2282 break;
2283 case '&':
2284 def->args_ct[i].newreg = true;
2285 ct_str++;
2286 break;
2287 case 'i':
2288 def->args_ct[i].ct |= TCG_CT_CONST;
2289 ct_str++;
2290 break;
2291 default:
2292 ct_str = target_parse_constraint(&def->args_ct[i],
2293 ct_str, type);
2294 /* Typo in TCGTargetOpDef constraint. */
2295 tcg_debug_assert(ct_str != NULL);
2300 /* TCGTargetOpDef entry with too much information? */
2301 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2303 /* sort the constraints (XXX: this is just an heuristic) */
2304 sort_constraints(def, 0, def->nb_oargs);
2305 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2309 void tcg_op_remove(TCGContext *s, TCGOp *op)
2311 TCGLabel *label;
2313 switch (op->opc) {
2314 case INDEX_op_br:
2315 label = arg_label(op->args[0]);
2316 label->refs--;
2317 break;
2318 case INDEX_op_brcond_i32:
2319 case INDEX_op_brcond_i64:
2320 label = arg_label(op->args[3]);
2321 label->refs--;
2322 break;
2323 case INDEX_op_brcond2_i32:
2324 label = arg_label(op->args[5]);
2325 label->refs--;
2326 break;
2327 default:
2328 break;
2331 QTAILQ_REMOVE(&s->ops, op, link);
2332 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2333 s->nb_ops--;
2335 #ifdef CONFIG_PROFILER
2336 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2337 #endif
2340 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2342 TCGContext *s = tcg_ctx;
2343 TCGOp *op;
2345 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2346 op = tcg_malloc(sizeof(TCGOp));
2347 } else {
2348 op = QTAILQ_FIRST(&s->free_ops);
2349 QTAILQ_REMOVE(&s->free_ops, op, link);
2351 memset(op, 0, offsetof(TCGOp, link));
2352 op->opc = opc;
2353 s->nb_ops++;
2355 return op;
2358 TCGOp *tcg_emit_op(TCGOpcode opc)
2360 TCGOp *op = tcg_op_alloc(opc);
2361 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2362 return op;
2365 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2367 TCGOp *new_op = tcg_op_alloc(opc);
2368 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2369 return new_op;
2372 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2374 TCGOp *new_op = tcg_op_alloc(opc);
2375 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2376 return new_op;
2379 /* Reachable analysis : remove unreachable code. */
2380 static void reachable_code_pass(TCGContext *s)
2382 TCGOp *op, *op_next;
2383 bool dead = false;
2385 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2386 bool remove = dead;
2387 TCGLabel *label;
2388 int call_flags;
2390 switch (op->opc) {
2391 case INDEX_op_set_label:
2392 label = arg_label(op->args[0]);
2393 if (label->refs == 0) {
2395 * While there is an occasional backward branch, virtually
2396 * all branches generated by the translators are forward.
2397 * Which means that generally we will have already removed
2398 * all references to the label that will be, and there is
2399 * little to be gained by iterating.
2401 remove = true;
2402 } else {
2403 /* Once we see a label, insns become live again. */
2404 dead = false;
2405 remove = false;
2408 * Optimization can fold conditional branches to unconditional.
2409 * If we find a label with one reference which is preceded by
2410 * an unconditional branch to it, remove both. This needed to
2411 * wait until the dead code in between them was removed.
2413 if (label->refs == 1) {
2414 TCGOp *op_prev = QTAILQ_PREV(op, link);
2415 if (op_prev->opc == INDEX_op_br &&
2416 label == arg_label(op_prev->args[0])) {
2417 tcg_op_remove(s, op_prev);
2418 remove = true;
2422 break;
2424 case INDEX_op_br:
2425 case INDEX_op_exit_tb:
2426 case INDEX_op_goto_ptr:
2427 /* Unconditional branches; everything following is dead. */
2428 dead = true;
2429 break;
2431 case INDEX_op_call:
2432 /* Notice noreturn helper calls, raising exceptions. */
2433 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2434 if (call_flags & TCG_CALL_NO_RETURN) {
2435 dead = true;
2437 break;
2439 case INDEX_op_insn_start:
2440 /* Never remove -- we need to keep these for unwind. */
2441 remove = false;
2442 break;
2444 default:
2445 break;
2448 if (remove) {
2449 tcg_op_remove(s, op);
2454 #define TS_DEAD 1
2455 #define TS_MEM 2
2457 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2458 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2460 /* For liveness_pass_1, the register preferences for a given temp. */
2461 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2463 return ts->state_ptr;
2466 /* For liveness_pass_1, reset the preferences for a given temp to the
2467 * maximal regset for its type.
2469 static inline void la_reset_pref(TCGTemp *ts)
2471 *la_temp_pref(ts)
2472 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2475 /* liveness analysis: end of function: all temps are dead, and globals
2476 should be in memory. */
2477 static void la_func_end(TCGContext *s, int ng, int nt)
2479 int i;
2481 for (i = 0; i < ng; ++i) {
2482 s->temps[i].state = TS_DEAD | TS_MEM;
2483 la_reset_pref(&s->temps[i]);
2485 for (i = ng; i < nt; ++i) {
2486 s->temps[i].state = TS_DEAD;
2487 la_reset_pref(&s->temps[i]);
2491 /* liveness analysis: end of basic block: all temps are dead, globals
2492 and local temps should be in memory. */
2493 static void la_bb_end(TCGContext *s, int ng, int nt)
2495 int i;
2497 for (i = 0; i < ng; ++i) {
2498 s->temps[i].state = TS_DEAD | TS_MEM;
2499 la_reset_pref(&s->temps[i]);
2501 for (i = ng; i < nt; ++i) {
2502 s->temps[i].state = (s->temps[i].temp_local
2503 ? TS_DEAD | TS_MEM
2504 : TS_DEAD);
2505 la_reset_pref(&s->temps[i]);
2509 /* liveness analysis: sync globals back to memory. */
2510 static void la_global_sync(TCGContext *s, int ng)
2512 int i;
2514 for (i = 0; i < ng; ++i) {
2515 int state = s->temps[i].state;
2516 s->temps[i].state = state | TS_MEM;
2517 if (state == TS_DEAD) {
2518 /* If the global was previously dead, reset prefs. */
2519 la_reset_pref(&s->temps[i]);
2525 * liveness analysis: conditional branch: all temps are dead,
2526 * globals and local temps should be synced.
2528 static void la_bb_sync(TCGContext *s, int ng, int nt)
2530 la_global_sync(s, ng);
2532 for (int i = ng; i < nt; ++i) {
2533 if (s->temps[i].temp_local) {
2534 int state = s->temps[i].state;
2535 s->temps[i].state = state | TS_MEM;
2536 if (state != TS_DEAD) {
2537 continue;
2539 } else {
2540 s->temps[i].state = TS_DEAD;
2542 la_reset_pref(&s->temps[i]);
2546 /* liveness analysis: sync globals back to memory and kill. */
2547 static void la_global_kill(TCGContext *s, int ng)
2549 int i;
2551 for (i = 0; i < ng; i++) {
2552 s->temps[i].state = TS_DEAD | TS_MEM;
2553 la_reset_pref(&s->temps[i]);
2557 /* liveness analysis: note live globals crossing calls. */
2558 static void la_cross_call(TCGContext *s, int nt)
2560 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2561 int i;
2563 for (i = 0; i < nt; i++) {
2564 TCGTemp *ts = &s->temps[i];
2565 if (!(ts->state & TS_DEAD)) {
2566 TCGRegSet *pset = la_temp_pref(ts);
2567 TCGRegSet set = *pset;
2569 set &= mask;
2570 /* If the combination is not possible, restart. */
2571 if (set == 0) {
2572 set = tcg_target_available_regs[ts->type] & mask;
2574 *pset = set;
2579 /* Liveness analysis : update the opc_arg_life array to tell if a
2580 given input arguments is dead. Instructions updating dead
2581 temporaries are removed. */
2582 static void liveness_pass_1(TCGContext *s)
2584 int nb_globals = s->nb_globals;
2585 int nb_temps = s->nb_temps;
2586 TCGOp *op, *op_prev;
2587 TCGRegSet *prefs;
2588 int i;
2590 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2591 for (i = 0; i < nb_temps; ++i) {
2592 s->temps[i].state_ptr = prefs + i;
2595 /* ??? Should be redundant with the exit_tb that ends the TB. */
2596 la_func_end(s, nb_globals, nb_temps);
2598 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2599 int nb_iargs, nb_oargs;
2600 TCGOpcode opc_new, opc_new2;
2601 bool have_opc_new2;
2602 TCGLifeData arg_life = 0;
2603 TCGTemp *ts;
2604 TCGOpcode opc = op->opc;
2605 const TCGOpDef *def = &tcg_op_defs[opc];
2607 switch (opc) {
2608 case INDEX_op_call:
2610 int call_flags;
2611 int nb_call_regs;
2613 nb_oargs = TCGOP_CALLO(op);
2614 nb_iargs = TCGOP_CALLI(op);
2615 call_flags = op->args[nb_oargs + nb_iargs + 1];
2617 /* pure functions can be removed if their result is unused */
2618 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2619 for (i = 0; i < nb_oargs; i++) {
2620 ts = arg_temp(op->args[i]);
2621 if (ts->state != TS_DEAD) {
2622 goto do_not_remove_call;
2625 goto do_remove;
2627 do_not_remove_call:
2629 /* Output args are dead. */
2630 for (i = 0; i < nb_oargs; i++) {
2631 ts = arg_temp(op->args[i]);
2632 if (ts->state & TS_DEAD) {
2633 arg_life |= DEAD_ARG << i;
2635 if (ts->state & TS_MEM) {
2636 arg_life |= SYNC_ARG << i;
2638 ts->state = TS_DEAD;
2639 la_reset_pref(ts);
2641 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2642 op->output_pref[i] = 0;
2645 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2646 TCG_CALL_NO_READ_GLOBALS))) {
2647 la_global_kill(s, nb_globals);
2648 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2649 la_global_sync(s, nb_globals);
2652 /* Record arguments that die in this helper. */
2653 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2654 ts = arg_temp(op->args[i]);
2655 if (ts && ts->state & TS_DEAD) {
2656 arg_life |= DEAD_ARG << i;
2660 /* For all live registers, remove call-clobbered prefs. */
2661 la_cross_call(s, nb_temps);
2663 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2665 /* Input arguments are live for preceding opcodes. */
2666 for (i = 0; i < nb_iargs; i++) {
2667 ts = arg_temp(op->args[i + nb_oargs]);
2668 if (ts && ts->state & TS_DEAD) {
2669 /* For those arguments that die, and will be allocated
2670 * in registers, clear the register set for that arg,
2671 * to be filled in below. For args that will be on
2672 * the stack, reset to any available reg.
2674 *la_temp_pref(ts)
2675 = (i < nb_call_regs ? 0 :
2676 tcg_target_available_regs[ts->type]);
2677 ts->state &= ~TS_DEAD;
2681 /* For each input argument, add its input register to prefs.
2682 If a temp is used once, this produces a single set bit. */
2683 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2684 ts = arg_temp(op->args[i + nb_oargs]);
2685 if (ts) {
2686 tcg_regset_set_reg(*la_temp_pref(ts),
2687 tcg_target_call_iarg_regs[i]);
2691 break;
2692 case INDEX_op_insn_start:
2693 break;
2694 case INDEX_op_discard:
2695 /* mark the temporary as dead */
2696 ts = arg_temp(op->args[0]);
2697 ts->state = TS_DEAD;
2698 la_reset_pref(ts);
2699 break;
2701 case INDEX_op_add2_i32:
2702 opc_new = INDEX_op_add_i32;
2703 goto do_addsub2;
2704 case INDEX_op_sub2_i32:
2705 opc_new = INDEX_op_sub_i32;
2706 goto do_addsub2;
2707 case INDEX_op_add2_i64:
2708 opc_new = INDEX_op_add_i64;
2709 goto do_addsub2;
2710 case INDEX_op_sub2_i64:
2711 opc_new = INDEX_op_sub_i64;
2712 do_addsub2:
2713 nb_iargs = 4;
2714 nb_oargs = 2;
2715 /* Test if the high part of the operation is dead, but not
2716 the low part. The result can be optimized to a simple
2717 add or sub. This happens often for x86_64 guest when the
2718 cpu mode is set to 32 bit. */
2719 if (arg_temp(op->args[1])->state == TS_DEAD) {
2720 if (arg_temp(op->args[0])->state == TS_DEAD) {
2721 goto do_remove;
2723 /* Replace the opcode and adjust the args in place,
2724 leaving 3 unused args at the end. */
2725 op->opc = opc = opc_new;
2726 op->args[1] = op->args[2];
2727 op->args[2] = op->args[4];
2728 /* Fall through and mark the single-word operation live. */
2729 nb_iargs = 2;
2730 nb_oargs = 1;
2732 goto do_not_remove;
2734 case INDEX_op_mulu2_i32:
2735 opc_new = INDEX_op_mul_i32;
2736 opc_new2 = INDEX_op_muluh_i32;
2737 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2738 goto do_mul2;
2739 case INDEX_op_muls2_i32:
2740 opc_new = INDEX_op_mul_i32;
2741 opc_new2 = INDEX_op_mulsh_i32;
2742 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2743 goto do_mul2;
2744 case INDEX_op_mulu2_i64:
2745 opc_new = INDEX_op_mul_i64;
2746 opc_new2 = INDEX_op_muluh_i64;
2747 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2748 goto do_mul2;
2749 case INDEX_op_muls2_i64:
2750 opc_new = INDEX_op_mul_i64;
2751 opc_new2 = INDEX_op_mulsh_i64;
2752 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2753 goto do_mul2;
2754 do_mul2:
2755 nb_iargs = 2;
2756 nb_oargs = 2;
2757 if (arg_temp(op->args[1])->state == TS_DEAD) {
2758 if (arg_temp(op->args[0])->state == TS_DEAD) {
2759 /* Both parts of the operation are dead. */
2760 goto do_remove;
2762 /* The high part of the operation is dead; generate the low. */
2763 op->opc = opc = opc_new;
2764 op->args[1] = op->args[2];
2765 op->args[2] = op->args[3];
2766 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2767 /* The low part of the operation is dead; generate the high. */
2768 op->opc = opc = opc_new2;
2769 op->args[0] = op->args[1];
2770 op->args[1] = op->args[2];
2771 op->args[2] = op->args[3];
2772 } else {
2773 goto do_not_remove;
2775 /* Mark the single-word operation live. */
2776 nb_oargs = 1;
2777 goto do_not_remove;
2779 default:
2780 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2781 nb_iargs = def->nb_iargs;
2782 nb_oargs = def->nb_oargs;
2784 /* Test if the operation can be removed because all
2785 its outputs are dead. We assume that nb_oargs == 0
2786 implies side effects */
2787 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2788 for (i = 0; i < nb_oargs; i++) {
2789 if (arg_temp(op->args[i])->state != TS_DEAD) {
2790 goto do_not_remove;
2793 goto do_remove;
2795 goto do_not_remove;
2797 do_remove:
2798 tcg_op_remove(s, op);
2799 break;
2801 do_not_remove:
2802 for (i = 0; i < nb_oargs; i++) {
2803 ts = arg_temp(op->args[i]);
2805 /* Remember the preference of the uses that followed. */
2806 op->output_pref[i] = *la_temp_pref(ts);
2808 /* Output args are dead. */
2809 if (ts->state & TS_DEAD) {
2810 arg_life |= DEAD_ARG << i;
2812 if (ts->state & TS_MEM) {
2813 arg_life |= SYNC_ARG << i;
2815 ts->state = TS_DEAD;
2816 la_reset_pref(ts);
2819 /* If end of basic block, update. */
2820 if (def->flags & TCG_OPF_BB_EXIT) {
2821 la_func_end(s, nb_globals, nb_temps);
2822 } else if (def->flags & TCG_OPF_COND_BRANCH) {
2823 la_bb_sync(s, nb_globals, nb_temps);
2824 } else if (def->flags & TCG_OPF_BB_END) {
2825 la_bb_end(s, nb_globals, nb_temps);
2826 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2827 la_global_sync(s, nb_globals);
2828 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2829 la_cross_call(s, nb_temps);
2833 /* Record arguments that die in this opcode. */
2834 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2835 ts = arg_temp(op->args[i]);
2836 if (ts->state & TS_DEAD) {
2837 arg_life |= DEAD_ARG << i;
2841 /* Input arguments are live for preceding opcodes. */
2842 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2843 ts = arg_temp(op->args[i]);
2844 if (ts->state & TS_DEAD) {
2845 /* For operands that were dead, initially allow
2846 all regs for the type. */
2847 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2848 ts->state &= ~TS_DEAD;
2852 /* Incorporate constraints for this operand. */
2853 switch (opc) {
2854 case INDEX_op_mov_i32:
2855 case INDEX_op_mov_i64:
2856 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2857 have proper constraints. That said, special case
2858 moves to propagate preferences backward. */
2859 if (IS_DEAD_ARG(1)) {
2860 *la_temp_pref(arg_temp(op->args[0]))
2861 = *la_temp_pref(arg_temp(op->args[1]));
2863 break;
2865 default:
2866 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2867 const TCGArgConstraint *ct = &def->args_ct[i];
2868 TCGRegSet set, *pset;
2870 ts = arg_temp(op->args[i]);
2871 pset = la_temp_pref(ts);
2872 set = *pset;
2874 set &= ct->regs;
2875 if (ct->ialias) {
2876 set &= op->output_pref[ct->alias_index];
2878 /* If the combination is not possible, restart. */
2879 if (set == 0) {
2880 set = ct->regs;
2882 *pset = set;
2884 break;
2886 break;
2888 op->life = arg_life;
2892 /* Liveness analysis: Convert indirect regs to direct temporaries. */
2893 static bool liveness_pass_2(TCGContext *s)
2895 int nb_globals = s->nb_globals;
2896 int nb_temps, i;
2897 bool changes = false;
2898 TCGOp *op, *op_next;
2900 /* Create a temporary for each indirect global. */
2901 for (i = 0; i < nb_globals; ++i) {
2902 TCGTemp *its = &s->temps[i];
2903 if (its->indirect_reg) {
2904 TCGTemp *dts = tcg_temp_alloc(s);
2905 dts->type = its->type;
2906 dts->base_type = its->base_type;
2907 its->state_ptr = dts;
2908 } else {
2909 its->state_ptr = NULL;
2911 /* All globals begin dead. */
2912 its->state = TS_DEAD;
2914 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2915 TCGTemp *its = &s->temps[i];
2916 its->state_ptr = NULL;
2917 its->state = TS_DEAD;
2920 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2921 TCGOpcode opc = op->opc;
2922 const TCGOpDef *def = &tcg_op_defs[opc];
2923 TCGLifeData arg_life = op->life;
2924 int nb_iargs, nb_oargs, call_flags;
2925 TCGTemp *arg_ts, *dir_ts;
2927 if (opc == INDEX_op_call) {
2928 nb_oargs = TCGOP_CALLO(op);
2929 nb_iargs = TCGOP_CALLI(op);
2930 call_flags = op->args[nb_oargs + nb_iargs + 1];
2931 } else {
2932 nb_iargs = def->nb_iargs;
2933 nb_oargs = def->nb_oargs;
2935 /* Set flags similar to how calls require. */
2936 if (def->flags & TCG_OPF_COND_BRANCH) {
2937 /* Like reading globals: sync_globals */
2938 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2939 } else if (def->flags & TCG_OPF_BB_END) {
2940 /* Like writing globals: save_globals */
2941 call_flags = 0;
2942 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2943 /* Like reading globals: sync_globals */
2944 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2945 } else {
2946 /* No effect on globals. */
2947 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2948 TCG_CALL_NO_WRITE_GLOBALS);
2952 /* Make sure that input arguments are available. */
2953 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2954 arg_ts = arg_temp(op->args[i]);
2955 if (arg_ts) {
2956 dir_ts = arg_ts->state_ptr;
2957 if (dir_ts && arg_ts->state == TS_DEAD) {
2958 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2959 ? INDEX_op_ld_i32
2960 : INDEX_op_ld_i64);
2961 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2963 lop->args[0] = temp_arg(dir_ts);
2964 lop->args[1] = temp_arg(arg_ts->mem_base);
2965 lop->args[2] = arg_ts->mem_offset;
2967 /* Loaded, but synced with memory. */
2968 arg_ts->state = TS_MEM;
2973 /* Perform input replacement, and mark inputs that became dead.
2974 No action is required except keeping temp_state up to date
2975 so that we reload when needed. */
2976 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2977 arg_ts = arg_temp(op->args[i]);
2978 if (arg_ts) {
2979 dir_ts = arg_ts->state_ptr;
2980 if (dir_ts) {
2981 op->args[i] = temp_arg(dir_ts);
2982 changes = true;
2983 if (IS_DEAD_ARG(i)) {
2984 arg_ts->state = TS_DEAD;
2990 /* Liveness analysis should ensure that the following are
2991 all correct, for call sites and basic block end points. */
2992 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2993 /* Nothing to do */
2994 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2995 for (i = 0; i < nb_globals; ++i) {
2996 /* Liveness should see that globals are synced back,
2997 that is, either TS_DEAD or TS_MEM. */
2998 arg_ts = &s->temps[i];
2999 tcg_debug_assert(arg_ts->state_ptr == 0
3000 || arg_ts->state != 0);
3002 } else {
3003 for (i = 0; i < nb_globals; ++i) {
3004 /* Liveness should see that globals are saved back,
3005 that is, TS_DEAD, waiting to be reloaded. */
3006 arg_ts = &s->temps[i];
3007 tcg_debug_assert(arg_ts->state_ptr == 0
3008 || arg_ts->state == TS_DEAD);
3012 /* Outputs become available. */
3013 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3014 arg_ts = arg_temp(op->args[0]);
3015 dir_ts = arg_ts->state_ptr;
3016 if (dir_ts) {
3017 op->args[0] = temp_arg(dir_ts);
3018 changes = true;
3020 /* The output is now live and modified. */
3021 arg_ts->state = 0;
3023 if (NEED_SYNC_ARG(0)) {
3024 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3025 ? INDEX_op_st_i32
3026 : INDEX_op_st_i64);
3027 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3028 TCGTemp *out_ts = dir_ts;
3030 if (IS_DEAD_ARG(0)) {
3031 out_ts = arg_temp(op->args[1]);
3032 arg_ts->state = TS_DEAD;
3033 tcg_op_remove(s, op);
3034 } else {
3035 arg_ts->state = TS_MEM;
3038 sop->args[0] = temp_arg(out_ts);
3039 sop->args[1] = temp_arg(arg_ts->mem_base);
3040 sop->args[2] = arg_ts->mem_offset;
3041 } else {
3042 tcg_debug_assert(!IS_DEAD_ARG(0));
3045 } else {
3046 for (i = 0; i < nb_oargs; i++) {
3047 arg_ts = arg_temp(op->args[i]);
3048 dir_ts = arg_ts->state_ptr;
3049 if (!dir_ts) {
3050 continue;
3052 op->args[i] = temp_arg(dir_ts);
3053 changes = true;
3055 /* The output is now live and modified. */
3056 arg_ts->state = 0;
3058 /* Sync outputs upon their last write. */
3059 if (NEED_SYNC_ARG(i)) {
3060 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3061 ? INDEX_op_st_i32
3062 : INDEX_op_st_i64);
3063 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3065 sop->args[0] = temp_arg(dir_ts);
3066 sop->args[1] = temp_arg(arg_ts->mem_base);
3067 sop->args[2] = arg_ts->mem_offset;
3069 arg_ts->state = TS_MEM;
3071 /* Drop outputs that are dead. */
3072 if (IS_DEAD_ARG(i)) {
3073 arg_ts->state = TS_DEAD;
3079 return changes;
3082 #ifdef CONFIG_DEBUG_TCG
3083 static void dump_regs(TCGContext *s)
3085 TCGTemp *ts;
3086 int i;
3087 char buf[64];
3089 for(i = 0; i < s->nb_temps; i++) {
3090 ts = &s->temps[i];
3091 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3092 switch(ts->val_type) {
3093 case TEMP_VAL_REG:
3094 printf("%s", tcg_target_reg_names[ts->reg]);
3095 break;
3096 case TEMP_VAL_MEM:
3097 printf("%d(%s)", (int)ts->mem_offset,
3098 tcg_target_reg_names[ts->mem_base->reg]);
3099 break;
3100 case TEMP_VAL_CONST:
3101 printf("$0x%" TCG_PRIlx, ts->val);
3102 break;
3103 case TEMP_VAL_DEAD:
3104 printf("D");
3105 break;
3106 default:
3107 printf("???");
3108 break;
3110 printf("\n");
3113 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3114 if (s->reg_to_temp[i] != NULL) {
3115 printf("%s: %s\n",
3116 tcg_target_reg_names[i],
3117 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3122 static void check_regs(TCGContext *s)
3124 int reg;
3125 int k;
3126 TCGTemp *ts;
3127 char buf[64];
3129 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3130 ts = s->reg_to_temp[reg];
3131 if (ts != NULL) {
3132 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3133 printf("Inconsistency for register %s:\n",
3134 tcg_target_reg_names[reg]);
3135 goto fail;
3139 for (k = 0; k < s->nb_temps; k++) {
3140 ts = &s->temps[k];
3141 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3142 && s->reg_to_temp[ts->reg] != ts) {
3143 printf("Inconsistency for temp %s:\n",
3144 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3145 fail:
3146 printf("reg state:\n");
3147 dump_regs(s);
3148 tcg_abort();
3152 #endif
3154 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3156 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3157 /* Sparc64 stack is accessed with offset of 2047 */
3158 s->current_frame_offset = (s->current_frame_offset +
3159 (tcg_target_long)sizeof(tcg_target_long) - 1) &
3160 ~(sizeof(tcg_target_long) - 1);
3161 #endif
3162 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3163 s->frame_end) {
3164 tcg_abort();
3166 ts->mem_offset = s->current_frame_offset;
3167 ts->mem_base = s->frame_temp;
3168 ts->mem_allocated = 1;
3169 s->current_frame_offset += sizeof(tcg_target_long);
3172 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3174 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3175 mark it free; otherwise mark it dead. */
3176 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3178 if (ts->fixed_reg) {
3179 return;
3181 if (ts->val_type == TEMP_VAL_REG) {
3182 s->reg_to_temp[ts->reg] = NULL;
3184 ts->val_type = (free_or_dead < 0
3185 || ts->temp_local
3186 || ts->temp_global
3187 ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3190 /* Mark a temporary as dead. */
3191 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3193 temp_free_or_dead(s, ts, 1);
3196 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3197 registers needs to be allocated to store a constant. If 'free_or_dead'
3198 is non-zero, subsequently release the temporary; if it is positive, the
3199 temp is dead; if it is negative, the temp is free. */
3200 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3201 TCGRegSet preferred_regs, int free_or_dead)
3203 if (ts->fixed_reg) {
3204 return;
3206 if (!ts->mem_coherent) {
3207 if (!ts->mem_allocated) {
3208 temp_allocate_frame(s, ts);
3210 switch (ts->val_type) {
3211 case TEMP_VAL_CONST:
3212 /* If we're going to free the temp immediately, then we won't
3213 require it later in a register, so attempt to store the
3214 constant to memory directly. */
3215 if (free_or_dead
3216 && tcg_out_sti(s, ts->type, ts->val,
3217 ts->mem_base->reg, ts->mem_offset)) {
3218 break;
3220 temp_load(s, ts, tcg_target_available_regs[ts->type],
3221 allocated_regs, preferred_regs);
3222 /* fallthrough */
3224 case TEMP_VAL_REG:
3225 tcg_out_st(s, ts->type, ts->reg,
3226 ts->mem_base->reg, ts->mem_offset);
3227 break;
3229 case TEMP_VAL_MEM:
3230 break;
3232 case TEMP_VAL_DEAD:
3233 default:
3234 tcg_abort();
3236 ts->mem_coherent = 1;
3238 if (free_or_dead) {
3239 temp_free_or_dead(s, ts, free_or_dead);
3243 /* free register 'reg' by spilling the corresponding temporary if necessary */
3244 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3246 TCGTemp *ts = s->reg_to_temp[reg];
3247 if (ts != NULL) {
3248 temp_sync(s, ts, allocated_regs, 0, -1);
3253 * tcg_reg_alloc:
3254 * @required_regs: Set of registers in which we must allocate.
3255 * @allocated_regs: Set of registers which must be avoided.
3256 * @preferred_regs: Set of registers we should prefer.
3257 * @rev: True if we search the registers in "indirect" order.
3259 * The allocated register must be in @required_regs & ~@allocated_regs,
3260 * but if we can put it in @preferred_regs we may save a move later.
3262 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3263 TCGRegSet allocated_regs,
3264 TCGRegSet preferred_regs, bool rev)
3266 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3267 TCGRegSet reg_ct[2];
3268 const int *order;
3270 reg_ct[1] = required_regs & ~allocated_regs;
3271 tcg_debug_assert(reg_ct[1] != 0);
3272 reg_ct[0] = reg_ct[1] & preferred_regs;
3274 /* Skip the preferred_regs option if it cannot be satisfied,
3275 or if the preference made no difference. */
3276 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3278 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3280 /* Try free registers, preferences first. */
3281 for (j = f; j < 2; j++) {
3282 TCGRegSet set = reg_ct[j];
3284 if (tcg_regset_single(set)) {
3285 /* One register in the set. */
3286 TCGReg reg = tcg_regset_first(set);
3287 if (s->reg_to_temp[reg] == NULL) {
3288 return reg;
3290 } else {
3291 for (i = 0; i < n; i++) {
3292 TCGReg reg = order[i];
3293 if (s->reg_to_temp[reg] == NULL &&
3294 tcg_regset_test_reg(set, reg)) {
3295 return reg;
3301 /* We must spill something. */
3302 for (j = f; j < 2; j++) {
3303 TCGRegSet set = reg_ct[j];
3305 if (tcg_regset_single(set)) {
3306 /* One register in the set. */
3307 TCGReg reg = tcg_regset_first(set);
3308 tcg_reg_free(s, reg, allocated_regs);
3309 return reg;
3310 } else {
3311 for (i = 0; i < n; i++) {
3312 TCGReg reg = order[i];
3313 if (tcg_regset_test_reg(set, reg)) {
3314 tcg_reg_free(s, reg, allocated_regs);
3315 return reg;
3321 tcg_abort();
3324 /* Make sure the temporary is in a register. If needed, allocate the register
3325 from DESIRED while avoiding ALLOCATED. */
3326 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3327 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3329 TCGReg reg;
3331 switch (ts->val_type) {
3332 case TEMP_VAL_REG:
3333 return;
3334 case TEMP_VAL_CONST:
3335 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3336 preferred_regs, ts->indirect_base);
3337 tcg_out_movi(s, ts->type, reg, ts->val);
3338 ts->mem_coherent = 0;
3339 break;
3340 case TEMP_VAL_MEM:
3341 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3342 preferred_regs, ts->indirect_base);
3343 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3344 ts->mem_coherent = 1;
3345 break;
3346 case TEMP_VAL_DEAD:
3347 default:
3348 tcg_abort();
3350 ts->reg = reg;
3351 ts->val_type = TEMP_VAL_REG;
3352 s->reg_to_temp[reg] = ts;
3355 /* Save a temporary to memory. 'allocated_regs' is used in case a
3356 temporary registers needs to be allocated to store a constant. */
3357 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3359 /* The liveness analysis already ensures that globals are back
3360 in memory. Keep an tcg_debug_assert for safety. */
3361 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3364 /* save globals to their canonical location and assume they can be
3365 modified be the following code. 'allocated_regs' is used in case a
3366 temporary registers needs to be allocated to store a constant. */
3367 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3369 int i, n;
3371 for (i = 0, n = s->nb_globals; i < n; i++) {
3372 temp_save(s, &s->temps[i], allocated_regs);
3376 /* sync globals to their canonical location and assume they can be
3377 read by the following code. 'allocated_regs' is used in case a
3378 temporary registers needs to be allocated to store a constant. */
3379 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3381 int i, n;
3383 for (i = 0, n = s->nb_globals; i < n; i++) {
3384 TCGTemp *ts = &s->temps[i];
3385 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3386 || ts->fixed_reg
3387 || ts->mem_coherent);
3391 /* at the end of a basic block, we assume all temporaries are dead and
3392 all globals are stored at their canonical location. */
3393 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3395 int i;
3397 for (i = s->nb_globals; i < s->nb_temps; i++) {
3398 TCGTemp *ts = &s->temps[i];
3399 if (ts->temp_local) {
3400 temp_save(s, ts, allocated_regs);
3401 } else {
3402 /* The liveness analysis already ensures that temps are dead.
3403 Keep an tcg_debug_assert for safety. */
3404 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3408 save_globals(s, allocated_regs);
3412 * At a conditional branch, we assume all temporaries are dead and
3413 * all globals and local temps are synced to their location.
3415 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3417 sync_globals(s, allocated_regs);
3419 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3420 TCGTemp *ts = &s->temps[i];
3422 * The liveness analysis already ensures that temps are dead.
3423 * Keep tcg_debug_asserts for safety.
3425 if (ts->temp_local) {
3426 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3427 } else {
3428 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3434 * Specialized code generation for INDEX_op_movi_*.
3436 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3437 tcg_target_ulong val, TCGLifeData arg_life,
3438 TCGRegSet preferred_regs)
3440 /* ENV should not be modified. */
3441 tcg_debug_assert(!ots->fixed_reg);
3443 /* The movi is not explicitly generated here. */
3444 if (ots->val_type == TEMP_VAL_REG) {
3445 s->reg_to_temp[ots->reg] = NULL;
3447 ots->val_type = TEMP_VAL_CONST;
3448 ots->val = val;
3449 ots->mem_coherent = 0;
3450 if (NEED_SYNC_ARG(0)) {
3451 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3452 } else if (IS_DEAD_ARG(0)) {
3453 temp_dead(s, ots);
3457 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3459 TCGTemp *ots = arg_temp(op->args[0]);
3460 tcg_target_ulong val = op->args[1];
3462 tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3466 * Specialized code generation for INDEX_op_mov_*.
3468 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3470 const TCGLifeData arg_life = op->life;
3471 TCGRegSet allocated_regs, preferred_regs;
3472 TCGTemp *ts, *ots;
3473 TCGType otype, itype;
3475 allocated_regs = s->reserved_regs;
3476 preferred_regs = op->output_pref[0];
3477 ots = arg_temp(op->args[0]);
3478 ts = arg_temp(op->args[1]);
3480 /* ENV should not be modified. */
3481 tcg_debug_assert(!ots->fixed_reg);
3483 /* Note that otype != itype for no-op truncation. */
3484 otype = ots->type;
3485 itype = ts->type;
3487 if (ts->val_type == TEMP_VAL_CONST) {
3488 /* propagate constant or generate sti */
3489 tcg_target_ulong val = ts->val;
3490 if (IS_DEAD_ARG(1)) {
3491 temp_dead(s, ts);
3493 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3494 return;
3497 /* If the source value is in memory we're going to be forced
3498 to have it in a register in order to perform the copy. Copy
3499 the SOURCE value into its own register first, that way we
3500 don't have to reload SOURCE the next time it is used. */
3501 if (ts->val_type == TEMP_VAL_MEM) {
3502 temp_load(s, ts, tcg_target_available_regs[itype],
3503 allocated_regs, preferred_regs);
3506 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3507 if (IS_DEAD_ARG(0)) {
3508 /* mov to a non-saved dead register makes no sense (even with
3509 liveness analysis disabled). */
3510 tcg_debug_assert(NEED_SYNC_ARG(0));
3511 if (!ots->mem_allocated) {
3512 temp_allocate_frame(s, ots);
3514 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3515 if (IS_DEAD_ARG(1)) {
3516 temp_dead(s, ts);
3518 temp_dead(s, ots);
3519 } else {
3520 if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3521 /* the mov can be suppressed */
3522 if (ots->val_type == TEMP_VAL_REG) {
3523 s->reg_to_temp[ots->reg] = NULL;
3525 ots->reg = ts->reg;
3526 temp_dead(s, ts);
3527 } else {
3528 if (ots->val_type != TEMP_VAL_REG) {
3529 /* When allocating a new register, make sure to not spill the
3530 input one. */
3531 tcg_regset_set_reg(allocated_regs, ts->reg);
3532 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3533 allocated_regs, preferred_regs,
3534 ots->indirect_base);
3536 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3538 * Cross register class move not supported.
3539 * Store the source register into the destination slot
3540 * and leave the destination temp as TEMP_VAL_MEM.
3542 assert(!ots->fixed_reg);
3543 if (!ts->mem_allocated) {
3544 temp_allocate_frame(s, ots);
3546 tcg_out_st(s, ts->type, ts->reg,
3547 ots->mem_base->reg, ots->mem_offset);
3548 ots->mem_coherent = 1;
3549 temp_free_or_dead(s, ots, -1);
3550 return;
3553 ots->val_type = TEMP_VAL_REG;
3554 ots->mem_coherent = 0;
3555 s->reg_to_temp[ots->reg] = ots;
3556 if (NEED_SYNC_ARG(0)) {
3557 temp_sync(s, ots, allocated_regs, 0, 0);
3563 * Specialized code generation for INDEX_op_dup_vec.
3565 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3567 const TCGLifeData arg_life = op->life;
3568 TCGRegSet dup_out_regs, dup_in_regs;
3569 TCGTemp *its, *ots;
3570 TCGType itype, vtype;
3571 intptr_t endian_fixup;
3572 unsigned vece;
3573 bool ok;
3575 ots = arg_temp(op->args[0]);
3576 its = arg_temp(op->args[1]);
3578 /* ENV should not be modified. */
3579 tcg_debug_assert(!ots->fixed_reg);
3581 itype = its->type;
3582 vece = TCGOP_VECE(op);
3583 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3585 if (its->val_type == TEMP_VAL_CONST) {
3586 /* Propagate constant via movi -> dupi. */
3587 tcg_target_ulong val = its->val;
3588 if (IS_DEAD_ARG(1)) {
3589 temp_dead(s, its);
3591 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3592 return;
3595 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3596 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3598 /* Allocate the output register now. */
3599 if (ots->val_type != TEMP_VAL_REG) {
3600 TCGRegSet allocated_regs = s->reserved_regs;
3602 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3603 /* Make sure to not spill the input register. */
3604 tcg_regset_set_reg(allocated_regs, its->reg);
3606 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3607 op->output_pref[0], ots->indirect_base);
3608 ots->val_type = TEMP_VAL_REG;
3609 ots->mem_coherent = 0;
3610 s->reg_to_temp[ots->reg] = ots;
3613 switch (its->val_type) {
3614 case TEMP_VAL_REG:
3616 * The dup constriaints must be broad, covering all possible VECE.
3617 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3618 * to fail, indicating that extra moves are required for that case.
3620 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3621 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3622 goto done;
3624 /* Try again from memory or a vector input register. */
3626 if (!its->mem_coherent) {
3628 * The input register is not synced, and so an extra store
3629 * would be required to use memory. Attempt an integer-vector
3630 * register move first. We do not have a TCGRegSet for this.
3632 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3633 break;
3635 /* Sync the temp back to its slot and load from there. */
3636 temp_sync(s, its, s->reserved_regs, 0, 0);
3638 /* fall through */
3640 case TEMP_VAL_MEM:
3641 #ifdef HOST_WORDS_BIGENDIAN
3642 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3643 endian_fixup -= 1 << vece;
3644 #else
3645 endian_fixup = 0;
3646 #endif
3647 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3648 its->mem_offset + endian_fixup)) {
3649 goto done;
3651 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3652 break;
3654 default:
3655 g_assert_not_reached();
3658 /* We now have a vector input register, so dup must succeed. */
3659 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3660 tcg_debug_assert(ok);
3662 done:
3663 if (IS_DEAD_ARG(1)) {
3664 temp_dead(s, its);
3666 if (NEED_SYNC_ARG(0)) {
3667 temp_sync(s, ots, s->reserved_regs, 0, 0);
3669 if (IS_DEAD_ARG(0)) {
3670 temp_dead(s, ots);
3674 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3676 const TCGLifeData arg_life = op->life;
3677 const TCGOpDef * const def = &tcg_op_defs[op->opc];
3678 TCGRegSet i_allocated_regs;
3679 TCGRegSet o_allocated_regs;
3680 int i, k, nb_iargs, nb_oargs;
3681 TCGReg reg;
3682 TCGArg arg;
3683 const TCGArgConstraint *arg_ct;
3684 TCGTemp *ts;
3685 TCGArg new_args[TCG_MAX_OP_ARGS];
3686 int const_args[TCG_MAX_OP_ARGS];
3688 nb_oargs = def->nb_oargs;
3689 nb_iargs = def->nb_iargs;
3691 /* copy constants */
3692 memcpy(new_args + nb_oargs + nb_iargs,
3693 op->args + nb_oargs + nb_iargs,
3694 sizeof(TCGArg) * def->nb_cargs);
3696 i_allocated_regs = s->reserved_regs;
3697 o_allocated_regs = s->reserved_regs;
3699 /* satisfy input constraints */
3700 for (k = 0; k < nb_iargs; k++) {
3701 TCGRegSet i_preferred_regs, o_preferred_regs;
3703 i = def->args_ct[nb_oargs + k].sort_index;
3704 arg = op->args[i];
3705 arg_ct = &def->args_ct[i];
3706 ts = arg_temp(arg);
3708 if (ts->val_type == TEMP_VAL_CONST
3709 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3710 /* constant is OK for instruction */
3711 const_args[i] = 1;
3712 new_args[i] = ts->val;
3713 continue;
3716 i_preferred_regs = o_preferred_regs = 0;
3717 if (arg_ct->ialias) {
3718 o_preferred_regs = op->output_pref[arg_ct->alias_index];
3719 if (ts->fixed_reg) {
3720 /* if fixed register, we must allocate a new register
3721 if the alias is not the same register */
3722 if (arg != op->args[arg_ct->alias_index]) {
3723 goto allocate_in_reg;
3725 } else {
3726 /* if the input is aliased to an output and if it is
3727 not dead after the instruction, we must allocate
3728 a new register and move it */
3729 if (!IS_DEAD_ARG(i)) {
3730 goto allocate_in_reg;
3733 /* check if the current register has already been allocated
3734 for another input aliased to an output */
3735 if (ts->val_type == TEMP_VAL_REG) {
3736 int k2, i2;
3737 reg = ts->reg;
3738 for (k2 = 0 ; k2 < k ; k2++) {
3739 i2 = def->args_ct[nb_oargs + k2].sort_index;
3740 if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3741 goto allocate_in_reg;
3745 i_preferred_regs = o_preferred_regs;
3749 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3750 reg = ts->reg;
3752 if (tcg_regset_test_reg(arg_ct->regs, reg)) {
3753 /* nothing to do : the constraint is satisfied */
3754 } else {
3755 allocate_in_reg:
3756 /* allocate a new register matching the constraint
3757 and move the temporary register into it */
3758 temp_load(s, ts, tcg_target_available_regs[ts->type],
3759 i_allocated_regs, 0);
3760 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3761 o_preferred_regs, ts->indirect_base);
3762 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3764 * Cross register class move not supported. Sync the
3765 * temp back to its slot and load from there.
3767 temp_sync(s, ts, i_allocated_regs, 0, 0);
3768 tcg_out_ld(s, ts->type, reg,
3769 ts->mem_base->reg, ts->mem_offset);
3772 new_args[i] = reg;
3773 const_args[i] = 0;
3774 tcg_regset_set_reg(i_allocated_regs, reg);
3777 /* mark dead temporaries and free the associated registers */
3778 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3779 if (IS_DEAD_ARG(i)) {
3780 temp_dead(s, arg_temp(op->args[i]));
3784 if (def->flags & TCG_OPF_COND_BRANCH) {
3785 tcg_reg_alloc_cbranch(s, i_allocated_regs);
3786 } else if (def->flags & TCG_OPF_BB_END) {
3787 tcg_reg_alloc_bb_end(s, i_allocated_regs);
3788 } else {
3789 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3790 /* XXX: permit generic clobber register list ? */
3791 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3792 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3793 tcg_reg_free(s, i, i_allocated_regs);
3797 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3798 /* sync globals if the op has side effects and might trigger
3799 an exception. */
3800 sync_globals(s, i_allocated_regs);
3803 /* satisfy the output constraints */
3804 for(k = 0; k < nb_oargs; k++) {
3805 i = def->args_ct[k].sort_index;
3806 arg = op->args[i];
3807 arg_ct = &def->args_ct[i];
3808 ts = arg_temp(arg);
3810 /* ENV should not be modified. */
3811 tcg_debug_assert(!ts->fixed_reg);
3813 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3814 reg = new_args[arg_ct->alias_index];
3815 } else if (arg_ct->newreg) {
3816 reg = tcg_reg_alloc(s, arg_ct->regs,
3817 i_allocated_regs | o_allocated_regs,
3818 op->output_pref[k], ts->indirect_base);
3819 } else {
3820 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3821 op->output_pref[k], ts->indirect_base);
3823 tcg_regset_set_reg(o_allocated_regs, reg);
3824 if (ts->val_type == TEMP_VAL_REG) {
3825 s->reg_to_temp[ts->reg] = NULL;
3827 ts->val_type = TEMP_VAL_REG;
3828 ts->reg = reg;
3830 * Temp value is modified, so the value kept in memory is
3831 * potentially not the same.
3833 ts->mem_coherent = 0;
3834 s->reg_to_temp[reg] = ts;
3835 new_args[i] = reg;
3839 /* emit instruction */
3840 if (def->flags & TCG_OPF_VECTOR) {
3841 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3842 new_args, const_args);
3843 } else {
3844 tcg_out_op(s, op->opc, new_args, const_args);
3847 /* move the outputs in the correct register if needed */
3848 for(i = 0; i < nb_oargs; i++) {
3849 ts = arg_temp(op->args[i]);
3851 /* ENV should not be modified. */
3852 tcg_debug_assert(!ts->fixed_reg);
3854 if (NEED_SYNC_ARG(i)) {
3855 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3856 } else if (IS_DEAD_ARG(i)) {
3857 temp_dead(s, ts);
3862 #ifdef TCG_TARGET_STACK_GROWSUP
3863 #define STACK_DIR(x) (-(x))
3864 #else
3865 #define STACK_DIR(x) (x)
3866 #endif
3868 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3870 const int nb_oargs = TCGOP_CALLO(op);
3871 const int nb_iargs = TCGOP_CALLI(op);
3872 const TCGLifeData arg_life = op->life;
3873 int flags, nb_regs, i;
3874 TCGReg reg;
3875 TCGArg arg;
3876 TCGTemp *ts;
3877 intptr_t stack_offset;
3878 size_t call_stack_size;
3879 tcg_insn_unit *func_addr;
3880 int allocate_args;
3881 TCGRegSet allocated_regs;
3883 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3884 flags = op->args[nb_oargs + nb_iargs + 1];
3886 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3887 if (nb_regs > nb_iargs) {
3888 nb_regs = nb_iargs;
3891 /* assign stack slots first */
3892 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3893 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3894 ~(TCG_TARGET_STACK_ALIGN - 1);
3895 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3896 if (allocate_args) {
3897 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3898 preallocate call stack */
3899 tcg_abort();
3902 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3903 for (i = nb_regs; i < nb_iargs; i++) {
3904 arg = op->args[nb_oargs + i];
3905 #ifdef TCG_TARGET_STACK_GROWSUP
3906 stack_offset -= sizeof(tcg_target_long);
3907 #endif
3908 if (arg != TCG_CALL_DUMMY_ARG) {
3909 ts = arg_temp(arg);
3910 temp_load(s, ts, tcg_target_available_regs[ts->type],
3911 s->reserved_regs, 0);
3912 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3914 #ifndef TCG_TARGET_STACK_GROWSUP
3915 stack_offset += sizeof(tcg_target_long);
3916 #endif
3919 /* assign input registers */
3920 allocated_regs = s->reserved_regs;
3921 for (i = 0; i < nb_regs; i++) {
3922 arg = op->args[nb_oargs + i];
3923 if (arg != TCG_CALL_DUMMY_ARG) {
3924 ts = arg_temp(arg);
3925 reg = tcg_target_call_iarg_regs[i];
3927 if (ts->val_type == TEMP_VAL_REG) {
3928 if (ts->reg != reg) {
3929 tcg_reg_free(s, reg, allocated_regs);
3930 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3932 * Cross register class move not supported. Sync the
3933 * temp back to its slot and load from there.
3935 temp_sync(s, ts, allocated_regs, 0, 0);
3936 tcg_out_ld(s, ts->type, reg,
3937 ts->mem_base->reg, ts->mem_offset);
3940 } else {
3941 TCGRegSet arg_set = 0;
3943 tcg_reg_free(s, reg, allocated_regs);
3944 tcg_regset_set_reg(arg_set, reg);
3945 temp_load(s, ts, arg_set, allocated_regs, 0);
3948 tcg_regset_set_reg(allocated_regs, reg);
3952 /* mark dead temporaries and free the associated registers */
3953 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3954 if (IS_DEAD_ARG(i)) {
3955 temp_dead(s, arg_temp(op->args[i]));
3959 /* clobber call registers */
3960 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3961 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3962 tcg_reg_free(s, i, allocated_regs);
3966 /* Save globals if they might be written by the helper, sync them if
3967 they might be read. */
3968 if (flags & TCG_CALL_NO_READ_GLOBALS) {
3969 /* Nothing to do */
3970 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3971 sync_globals(s, allocated_regs);
3972 } else {
3973 save_globals(s, allocated_regs);
3976 tcg_out_call(s, func_addr);
3978 /* assign output registers and emit moves if needed */
3979 for(i = 0; i < nb_oargs; i++) {
3980 arg = op->args[i];
3981 ts = arg_temp(arg);
3983 /* ENV should not be modified. */
3984 tcg_debug_assert(!ts->fixed_reg);
3986 reg = tcg_target_call_oarg_regs[i];
3987 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3988 if (ts->val_type == TEMP_VAL_REG) {
3989 s->reg_to_temp[ts->reg] = NULL;
3991 ts->val_type = TEMP_VAL_REG;
3992 ts->reg = reg;
3993 ts->mem_coherent = 0;
3994 s->reg_to_temp[reg] = ts;
3995 if (NEED_SYNC_ARG(i)) {
3996 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3997 } else if (IS_DEAD_ARG(i)) {
3998 temp_dead(s, ts);
4003 #ifdef CONFIG_PROFILER
4005 /* avoid copy/paste errors */
4006 #define PROF_ADD(to, from, field) \
4007 do { \
4008 (to)->field += qatomic_read(&((from)->field)); \
4009 } while (0)
4011 #define PROF_MAX(to, from, field) \
4012 do { \
4013 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4014 if (val__ > (to)->field) { \
4015 (to)->field = val__; \
4017 } while (0)
4019 /* Pass in a zero'ed @prof */
4020 static inline
4021 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4023 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4024 unsigned int i;
4026 for (i = 0; i < n_ctxs; i++) {
4027 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4028 const TCGProfile *orig = &s->prof;
4030 if (counters) {
4031 PROF_ADD(prof, orig, cpu_exec_time);
4032 PROF_ADD(prof, orig, tb_count1);
4033 PROF_ADD(prof, orig, tb_count);
4034 PROF_ADD(prof, orig, op_count);
4035 PROF_MAX(prof, orig, op_count_max);
4036 PROF_ADD(prof, orig, temp_count);
4037 PROF_MAX(prof, orig, temp_count_max);
4038 PROF_ADD(prof, orig, del_op_count);
4039 PROF_ADD(prof, orig, code_in_len);
4040 PROF_ADD(prof, orig, code_out_len);
4041 PROF_ADD(prof, orig, search_out_len);
4042 PROF_ADD(prof, orig, interm_time);
4043 PROF_ADD(prof, orig, code_time);
4044 PROF_ADD(prof, orig, la_time);
4045 PROF_ADD(prof, orig, opt_time);
4046 PROF_ADD(prof, orig, restore_count);
4047 PROF_ADD(prof, orig, restore_time);
4049 if (table) {
4050 int i;
4052 for (i = 0; i < NB_OPS; i++) {
4053 PROF_ADD(prof, orig, table_op_count[i]);
4059 #undef PROF_ADD
4060 #undef PROF_MAX
4062 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4064 tcg_profile_snapshot(prof, true, false);
4067 static void tcg_profile_snapshot_table(TCGProfile *prof)
4069 tcg_profile_snapshot(prof, false, true);
4072 void tcg_dump_op_count(void)
4074 TCGProfile prof = {};
4075 int i;
4077 tcg_profile_snapshot_table(&prof);
4078 for (i = 0; i < NB_OPS; i++) {
4079 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4080 prof.table_op_count[i]);
4084 int64_t tcg_cpu_exec_time(void)
4086 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4087 unsigned int i;
4088 int64_t ret = 0;
4090 for (i = 0; i < n_ctxs; i++) {
4091 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4092 const TCGProfile *prof = &s->prof;
4094 ret += qatomic_read(&prof->cpu_exec_time);
4096 return ret;
4098 #else
4099 void tcg_dump_op_count(void)
4101 qemu_printf("[TCG profiler not compiled]\n");
4104 int64_t tcg_cpu_exec_time(void)
4106 error_report("%s: TCG profiler not compiled", __func__);
4107 exit(EXIT_FAILURE);
4109 #endif
4112 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4114 #ifdef CONFIG_PROFILER
4115 TCGProfile *prof = &s->prof;
4116 #endif
4117 int i, num_insns;
4118 TCGOp *op;
4120 #ifdef CONFIG_PROFILER
4122 int n = 0;
4124 QTAILQ_FOREACH(op, &s->ops, link) {
4125 n++;
4127 qatomic_set(&prof->op_count, prof->op_count + n);
4128 if (n > prof->op_count_max) {
4129 qatomic_set(&prof->op_count_max, n);
4132 n = s->nb_temps;
4133 qatomic_set(&prof->temp_count, prof->temp_count + n);
4134 if (n > prof->temp_count_max) {
4135 qatomic_set(&prof->temp_count_max, n);
4138 #endif
4140 #ifdef DEBUG_DISAS
4141 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4142 && qemu_log_in_addr_range(tb->pc))) {
4143 FILE *logfile = qemu_log_lock();
4144 qemu_log("OP:\n");
4145 tcg_dump_ops(s, false);
4146 qemu_log("\n");
4147 qemu_log_unlock(logfile);
4149 #endif
4151 #ifdef CONFIG_DEBUG_TCG
4152 /* Ensure all labels referenced have been emitted. */
4154 TCGLabel *l;
4155 bool error = false;
4157 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4158 if (unlikely(!l->present) && l->refs) {
4159 qemu_log_mask(CPU_LOG_TB_OP,
4160 "$L%d referenced but not present.\n", l->id);
4161 error = true;
4164 assert(!error);
4166 #endif
4168 #ifdef CONFIG_PROFILER
4169 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4170 #endif
4172 #ifdef USE_TCG_OPTIMIZATIONS
4173 tcg_optimize(s);
4174 #endif
4176 #ifdef CONFIG_PROFILER
4177 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4178 qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4179 #endif
4181 reachable_code_pass(s);
4182 liveness_pass_1(s);
4184 if (s->nb_indirects > 0) {
4185 #ifdef DEBUG_DISAS
4186 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4187 && qemu_log_in_addr_range(tb->pc))) {
4188 FILE *logfile = qemu_log_lock();
4189 qemu_log("OP before indirect lowering:\n");
4190 tcg_dump_ops(s, false);
4191 qemu_log("\n");
4192 qemu_log_unlock(logfile);
4194 #endif
4195 /* Replace indirect temps with direct temps. */
4196 if (liveness_pass_2(s)) {
4197 /* If changes were made, re-run liveness. */
4198 liveness_pass_1(s);
4202 #ifdef CONFIG_PROFILER
4203 qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4204 #endif
4206 #ifdef DEBUG_DISAS
4207 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4208 && qemu_log_in_addr_range(tb->pc))) {
4209 FILE *logfile = qemu_log_lock();
4210 qemu_log("OP after optimization and liveness analysis:\n");
4211 tcg_dump_ops(s, true);
4212 qemu_log("\n");
4213 qemu_log_unlock(logfile);
4215 #endif
4217 tcg_reg_alloc_start(s);
4219 s->code_buf = tb->tc.ptr;
4220 s->code_ptr = tb->tc.ptr;
4222 #ifdef TCG_TARGET_NEED_LDST_LABELS
4223 QSIMPLEQ_INIT(&s->ldst_labels);
4224 #endif
4225 #ifdef TCG_TARGET_NEED_POOL_LABELS
4226 s->pool_labels = NULL;
4227 #endif
4229 num_insns = -1;
4230 QTAILQ_FOREACH(op, &s->ops, link) {
4231 TCGOpcode opc = op->opc;
4233 #ifdef CONFIG_PROFILER
4234 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4235 #endif
4237 switch (opc) {
4238 case INDEX_op_mov_i32:
4239 case INDEX_op_mov_i64:
4240 case INDEX_op_mov_vec:
4241 tcg_reg_alloc_mov(s, op);
4242 break;
4243 case INDEX_op_movi_i32:
4244 case INDEX_op_movi_i64:
4245 case INDEX_op_dupi_vec:
4246 tcg_reg_alloc_movi(s, op);
4247 break;
4248 case INDEX_op_dup_vec:
4249 tcg_reg_alloc_dup(s, op);
4250 break;
4251 case INDEX_op_insn_start:
4252 if (num_insns >= 0) {
4253 size_t off = tcg_current_code_size(s);
4254 s->gen_insn_end_off[num_insns] = off;
4255 /* Assert that we do not overflow our stored offset. */
4256 assert(s->gen_insn_end_off[num_insns] == off);
4258 num_insns++;
4259 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4260 target_ulong a;
4261 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4262 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4263 #else
4264 a = op->args[i];
4265 #endif
4266 s->gen_insn_data[num_insns][i] = a;
4268 break;
4269 case INDEX_op_discard:
4270 temp_dead(s, arg_temp(op->args[0]));
4271 break;
4272 case INDEX_op_set_label:
4273 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4274 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4275 break;
4276 case INDEX_op_call:
4277 tcg_reg_alloc_call(s, op);
4278 break;
4279 default:
4280 /* Sanity check that we've not introduced any unhandled opcodes. */
4281 tcg_debug_assert(tcg_op_supported(opc));
4282 /* Note: in order to speed up the code, it would be much
4283 faster to have specialized register allocator functions for
4284 some common argument patterns */
4285 tcg_reg_alloc_op(s, op);
4286 break;
4288 #ifdef CONFIG_DEBUG_TCG
4289 check_regs(s);
4290 #endif
4291 /* Test for (pending) buffer overflow. The assumption is that any
4292 one operation beginning below the high water mark cannot overrun
4293 the buffer completely. Thus we can test for overflow after
4294 generating code without having to check during generation. */
4295 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4296 return -1;
4298 /* Test for TB overflow, as seen by gen_insn_end_off. */
4299 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4300 return -2;
4303 tcg_debug_assert(num_insns >= 0);
4304 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4306 /* Generate TB finalization at the end of block */
4307 #ifdef TCG_TARGET_NEED_LDST_LABELS
4308 i = tcg_out_ldst_finalize(s);
4309 if (i < 0) {
4310 return i;
4312 #endif
4313 #ifdef TCG_TARGET_NEED_POOL_LABELS
4314 i = tcg_out_pool_finalize(s);
4315 if (i < 0) {
4316 return i;
4318 #endif
4319 if (!tcg_resolve_relocs(s)) {
4320 return -2;
4323 /* flush instruction cache */
4324 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4326 return tcg_current_code_size(s);
4329 #ifdef CONFIG_PROFILER
4330 void tcg_dump_info(void)
4332 TCGProfile prof = {};
4333 const TCGProfile *s;
4334 int64_t tb_count;
4335 int64_t tb_div_count;
4336 int64_t tot;
4338 tcg_profile_snapshot_counters(&prof);
4339 s = &prof;
4340 tb_count = s->tb_count;
4341 tb_div_count = tb_count ? tb_count : 1;
4342 tot = s->interm_time + s->code_time;
4344 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4345 tot, tot / 2.4e9);
4346 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4347 " %0.1f%%)\n",
4348 tb_count, s->tb_count1 - tb_count,
4349 (double)(s->tb_count1 - s->tb_count)
4350 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4351 qemu_printf("avg ops/TB %0.1f max=%d\n",
4352 (double)s->op_count / tb_div_count, s->op_count_max);
4353 qemu_printf("deleted ops/TB %0.2f\n",
4354 (double)s->del_op_count / tb_div_count);
4355 qemu_printf("avg temps/TB %0.2f max=%d\n",
4356 (double)s->temp_count / tb_div_count, s->temp_count_max);
4357 qemu_printf("avg host code/TB %0.1f\n",
4358 (double)s->code_out_len / tb_div_count);
4359 qemu_printf("avg search data/TB %0.1f\n",
4360 (double)s->search_out_len / tb_div_count);
4362 qemu_printf("cycles/op %0.1f\n",
4363 s->op_count ? (double)tot / s->op_count : 0);
4364 qemu_printf("cycles/in byte %0.1f\n",
4365 s->code_in_len ? (double)tot / s->code_in_len : 0);
4366 qemu_printf("cycles/out byte %0.1f\n",
4367 s->code_out_len ? (double)tot / s->code_out_len : 0);
4368 qemu_printf("cycles/search byte %0.1f\n",
4369 s->search_out_len ? (double)tot / s->search_out_len : 0);
4370 if (tot == 0) {
4371 tot = 1;
4373 qemu_printf(" gen_interm time %0.1f%%\n",
4374 (double)s->interm_time / tot * 100.0);
4375 qemu_printf(" gen_code time %0.1f%%\n",
4376 (double)s->code_time / tot * 100.0);
4377 qemu_printf("optim./code time %0.1f%%\n",
4378 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4379 * 100.0);
4380 qemu_printf("liveness/code time %0.1f%%\n",
4381 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4382 qemu_printf("cpu_restore count %" PRId64 "\n",
4383 s->restore_count);
4384 qemu_printf(" avg cycles %0.1f\n",
4385 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4387 #else
4388 void tcg_dump_info(void)
4390 qemu_printf("[TCG profiler not compiled]\n");
4392 #endif
4394 #ifdef ELF_HOST_MACHINE
4395 /* In order to use this feature, the backend needs to do three things:
4397 (1) Define ELF_HOST_MACHINE to indicate both what value to
4398 put into the ELF image and to indicate support for the feature.
4400 (2) Define tcg_register_jit. This should create a buffer containing
4401 the contents of a .debug_frame section that describes the post-
4402 prologue unwind info for the tcg machine.
4404 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4407 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4408 typedef enum {
4409 JIT_NOACTION = 0,
4410 JIT_REGISTER_FN,
4411 JIT_UNREGISTER_FN
4412 } jit_actions_t;
4414 struct jit_code_entry {
4415 struct jit_code_entry *next_entry;
4416 struct jit_code_entry *prev_entry;
4417 const void *symfile_addr;
4418 uint64_t symfile_size;
4421 struct jit_descriptor {
4422 uint32_t version;
4423 uint32_t action_flag;
4424 struct jit_code_entry *relevant_entry;
4425 struct jit_code_entry *first_entry;
4428 void __jit_debug_register_code(void) __attribute__((noinline));
4429 void __jit_debug_register_code(void)
4431 asm("");
4434 /* Must statically initialize the version, because GDB may check
4435 the version before we can set it. */
4436 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4438 /* End GDB interface. */
4440 static int find_string(const char *strtab, const char *str)
4442 const char *p = strtab + 1;
4444 while (1) {
4445 if (strcmp(p, str) == 0) {
4446 return p - strtab;
4448 p += strlen(p) + 1;
4452 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4453 const void *debug_frame,
4454 size_t debug_frame_size)
4456 struct __attribute__((packed)) DebugInfo {
4457 uint32_t len;
4458 uint16_t version;
4459 uint32_t abbrev;
4460 uint8_t ptr_size;
4461 uint8_t cu_die;
4462 uint16_t cu_lang;
4463 uintptr_t cu_low_pc;
4464 uintptr_t cu_high_pc;
4465 uint8_t fn_die;
4466 char fn_name[16];
4467 uintptr_t fn_low_pc;
4468 uintptr_t fn_high_pc;
4469 uint8_t cu_eoc;
4472 struct ElfImage {
4473 ElfW(Ehdr) ehdr;
4474 ElfW(Phdr) phdr;
4475 ElfW(Shdr) shdr[7];
4476 ElfW(Sym) sym[2];
4477 struct DebugInfo di;
4478 uint8_t da[24];
4479 char str[80];
4482 struct ElfImage *img;
4484 static const struct ElfImage img_template = {
4485 .ehdr = {
4486 .e_ident[EI_MAG0] = ELFMAG0,
4487 .e_ident[EI_MAG1] = ELFMAG1,
4488 .e_ident[EI_MAG2] = ELFMAG2,
4489 .e_ident[EI_MAG3] = ELFMAG3,
4490 .e_ident[EI_CLASS] = ELF_CLASS,
4491 .e_ident[EI_DATA] = ELF_DATA,
4492 .e_ident[EI_VERSION] = EV_CURRENT,
4493 .e_type = ET_EXEC,
4494 .e_machine = ELF_HOST_MACHINE,
4495 .e_version = EV_CURRENT,
4496 .e_phoff = offsetof(struct ElfImage, phdr),
4497 .e_shoff = offsetof(struct ElfImage, shdr),
4498 .e_ehsize = sizeof(ElfW(Shdr)),
4499 .e_phentsize = sizeof(ElfW(Phdr)),
4500 .e_phnum = 1,
4501 .e_shentsize = sizeof(ElfW(Shdr)),
4502 .e_shnum = ARRAY_SIZE(img->shdr),
4503 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4504 #ifdef ELF_HOST_FLAGS
4505 .e_flags = ELF_HOST_FLAGS,
4506 #endif
4507 #ifdef ELF_OSABI
4508 .e_ident[EI_OSABI] = ELF_OSABI,
4509 #endif
4511 .phdr = {
4512 .p_type = PT_LOAD,
4513 .p_flags = PF_X,
4515 .shdr = {
4516 [0] = { .sh_type = SHT_NULL },
4517 /* Trick: The contents of code_gen_buffer are not present in
4518 this fake ELF file; that got allocated elsewhere. Therefore
4519 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4520 will not look for contents. We can record any address. */
4521 [1] = { /* .text */
4522 .sh_type = SHT_NOBITS,
4523 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4525 [2] = { /* .debug_info */
4526 .sh_type = SHT_PROGBITS,
4527 .sh_offset = offsetof(struct ElfImage, di),
4528 .sh_size = sizeof(struct DebugInfo),
4530 [3] = { /* .debug_abbrev */
4531 .sh_type = SHT_PROGBITS,
4532 .sh_offset = offsetof(struct ElfImage, da),
4533 .sh_size = sizeof(img->da),
4535 [4] = { /* .debug_frame */
4536 .sh_type = SHT_PROGBITS,
4537 .sh_offset = sizeof(struct ElfImage),
4539 [5] = { /* .symtab */
4540 .sh_type = SHT_SYMTAB,
4541 .sh_offset = offsetof(struct ElfImage, sym),
4542 .sh_size = sizeof(img->sym),
4543 .sh_info = 1,
4544 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4545 .sh_entsize = sizeof(ElfW(Sym)),
4547 [6] = { /* .strtab */
4548 .sh_type = SHT_STRTAB,
4549 .sh_offset = offsetof(struct ElfImage, str),
4550 .sh_size = sizeof(img->str),
4553 .sym = {
4554 [1] = { /* code_gen_buffer */
4555 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4556 .st_shndx = 1,
4559 .di = {
4560 .len = sizeof(struct DebugInfo) - 4,
4561 .version = 2,
4562 .ptr_size = sizeof(void *),
4563 .cu_die = 1,
4564 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
4565 .fn_die = 2,
4566 .fn_name = "code_gen_buffer"
4568 .da = {
4569 1, /* abbrev number (the cu) */
4570 0x11, 1, /* DW_TAG_compile_unit, has children */
4571 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4572 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4573 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4574 0, 0, /* end of abbrev */
4575 2, /* abbrev number (the fn) */
4576 0x2e, 0, /* DW_TAG_subprogram, no children */
4577 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4578 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4579 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4580 0, 0, /* end of abbrev */
4581 0 /* no more abbrev */
4583 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4584 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4587 /* We only need a single jit entry; statically allocate it. */
4588 static struct jit_code_entry one_entry;
4590 uintptr_t buf = (uintptr_t)buf_ptr;
4591 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4592 DebugFrameHeader *dfh;
4594 img = g_malloc(img_size);
4595 *img = img_template;
4597 img->phdr.p_vaddr = buf;
4598 img->phdr.p_paddr = buf;
4599 img->phdr.p_memsz = buf_size;
4601 img->shdr[1].sh_name = find_string(img->str, ".text");
4602 img->shdr[1].sh_addr = buf;
4603 img->shdr[1].sh_size = buf_size;
4605 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4606 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4608 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4609 img->shdr[4].sh_size = debug_frame_size;
4611 img->shdr[5].sh_name = find_string(img->str, ".symtab");
4612 img->shdr[6].sh_name = find_string(img->str, ".strtab");
4614 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4615 img->sym[1].st_value = buf;
4616 img->sym[1].st_size = buf_size;
4618 img->di.cu_low_pc = buf;
4619 img->di.cu_high_pc = buf + buf_size;
4620 img->di.fn_low_pc = buf;
4621 img->di.fn_high_pc = buf + buf_size;
4623 dfh = (DebugFrameHeader *)(img + 1);
4624 memcpy(dfh, debug_frame, debug_frame_size);
4625 dfh->fde.func_start = buf;
4626 dfh->fde.func_len = buf_size;
4628 #ifdef DEBUG_JIT
4629 /* Enable this block to be able to debug the ELF image file creation.
4630 One can use readelf, objdump, or other inspection utilities. */
4632 FILE *f = fopen("/tmp/qemu.jit", "w+b");
4633 if (f) {
4634 if (fwrite(img, img_size, 1, f) != img_size) {
4635 /* Avoid stupid unused return value warning for fwrite. */
4637 fclose(f);
4640 #endif
4642 one_entry.symfile_addr = img;
4643 one_entry.symfile_size = img_size;
4645 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4646 __jit_debug_descriptor.relevant_entry = &one_entry;
4647 __jit_debug_descriptor.first_entry = &one_entry;
4648 __jit_debug_register_code();
4650 #else
4651 /* No support for the feature. Provide the entry point expected by exec.c,
4652 and implement the internal function we declared earlier. */
4654 static void tcg_register_jit_int(void *buf, size_t size,
4655 const void *debug_frame,
4656 size_t debug_frame_size)
4660 void tcg_register_jit(void *buf, size_t buf_size)
4663 #endif /* ELF_HOST_MACHINE */
4665 #if !TCG_TARGET_MAYBE_vec
4666 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4668 g_assert_not_reached();
4670 #endif