tcg/sparc: Split out target constraints to tcg-target-con-str.h
[qemu/ar7.git] / tcg / tcg.c
blob8cfa28ed84010314214e1682214baa029634d943
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
42 instructions */
43 #define NO_CPU_IO_DEFS
44 #include "cpu.h"
46 #include "exec/exec-all.h"
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
52 #include "tcg/tcg-op.h"
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS ELFCLASS32
56 #else
57 # define ELF_CLASS ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA ELFDATA2MSB
61 #else
62 # define ELF_DATA ELFDATA2LSB
63 #endif
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70 used here. */
71 static void tcg_target_init(TCGContext *s);
72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
73 static void tcg_target_qemu_prologue(TCGContext *s);
74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
75 intptr_t value, intptr_t addend);
77 /* The CIE and FDE header definitions will be common to all hosts. */
78 typedef struct {
79 uint32_t len __attribute__((aligned((sizeof(void *)))));
80 uint32_t id;
81 uint8_t version;
82 char augmentation[1];
83 uint8_t code_align;
84 uint8_t data_align;
85 uint8_t return_column;
86 } DebugFrameCIE;
88 typedef struct QEMU_PACKED {
89 uint32_t len __attribute__((aligned((sizeof(void *)))));
90 uint32_t cie_offset;
91 uintptr_t func_start;
92 uintptr_t func_len;
93 } DebugFrameFDEHeader;
95 typedef struct QEMU_PACKED {
96 DebugFrameCIE cie;
97 DebugFrameFDEHeader fde;
98 } DebugFrameHeader;
100 static void tcg_register_jit_int(const void *buf, size_t size,
101 const void *debug_frame,
102 size_t debug_frame_size)
103 __attribute__((unused));
105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
106 #ifndef TCG_TARGET_CON_STR_H
107 static const char *target_parse_constraint(TCGArgConstraint *ct,
108 const char *ct_str, TCGType type);
109 #endif
110 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
111 intptr_t arg2);
112 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
113 static void tcg_out_movi(TCGContext *s, TCGType type,
114 TCGReg ret, tcg_target_long arg);
115 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
116 const int *const_args);
117 #if TCG_TARGET_MAYBE_vec
118 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
119 TCGReg dst, TCGReg src);
120 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
121 TCGReg dst, TCGReg base, intptr_t offset);
122 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
123 TCGReg dst, int64_t arg);
124 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
125 unsigned vece, const TCGArg *args,
126 const int *const_args);
127 #else
128 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
129 TCGReg dst, TCGReg src)
131 g_assert_not_reached();
133 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
134 TCGReg dst, TCGReg base, intptr_t offset)
136 g_assert_not_reached();
138 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
139 TCGReg dst, int64_t arg)
141 g_assert_not_reached();
143 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
144 unsigned vece, const TCGArg *args,
145 const int *const_args)
147 g_assert_not_reached();
149 #endif
150 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
151 intptr_t arg2);
152 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
153 TCGReg base, intptr_t ofs);
154 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
155 static int tcg_target_const_match(tcg_target_long val, TCGType type,
156 const TCGArgConstraint *arg_ct);
157 #ifdef TCG_TARGET_NEED_LDST_LABELS
158 static int tcg_out_ldst_finalize(TCGContext *s);
159 #endif
161 #define TCG_HIGHWATER 1024
163 static TCGContext **tcg_ctxs;
164 static unsigned int n_tcg_ctxs;
165 TCGv_env cpu_env = 0;
166 const void *tcg_code_gen_epilogue;
167 uintptr_t tcg_splitwx_diff;
169 #ifndef CONFIG_TCG_INTERPRETER
170 tcg_prologue_fn *tcg_qemu_tb_exec;
171 #endif
173 struct tcg_region_tree {
174 QemuMutex lock;
175 GTree *tree;
176 /* padding to avoid false sharing is computed at run-time */
180 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
181 * dynamically allocate from as demand dictates. Given appropriate region
182 * sizing, this minimizes flushes even when some TCG threads generate a lot
183 * more code than others.
185 struct tcg_region_state {
186 QemuMutex lock;
188 /* fields set at init time */
189 void *start;
190 void *start_aligned;
191 void *end;
192 size_t n;
193 size_t size; /* size of one region */
194 size_t stride; /* .size + guard size */
196 /* fields protected by the lock */
197 size_t current; /* current region index */
198 size_t agg_size_full; /* aggregate size of full regions */
201 static struct tcg_region_state region;
203 * This is an array of struct tcg_region_tree's, with padding.
204 * We use void * to simplify the computation of region_trees[i]; each
205 * struct is found every tree_size bytes.
207 static void *region_trees;
208 static size_t tree_size;
209 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
210 static TCGRegSet tcg_target_call_clobber_regs;
212 #if TCG_TARGET_INSN_UNIT_SIZE == 1
213 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
215 *s->code_ptr++ = v;
218 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
219 uint8_t v)
221 *p = v;
223 #endif
225 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
226 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
228 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
229 *s->code_ptr++ = v;
230 } else {
231 tcg_insn_unit *p = s->code_ptr;
232 memcpy(p, &v, sizeof(v));
233 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
237 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
238 uint16_t v)
240 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
241 *p = v;
242 } else {
243 memcpy(p, &v, sizeof(v));
246 #endif
248 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
249 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
251 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
252 *s->code_ptr++ = v;
253 } else {
254 tcg_insn_unit *p = s->code_ptr;
255 memcpy(p, &v, sizeof(v));
256 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
260 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
261 uint32_t v)
263 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
264 *p = v;
265 } else {
266 memcpy(p, &v, sizeof(v));
269 #endif
271 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
272 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
274 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
275 *s->code_ptr++ = v;
276 } else {
277 tcg_insn_unit *p = s->code_ptr;
278 memcpy(p, &v, sizeof(v));
279 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
283 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
284 uint64_t v)
286 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
287 *p = v;
288 } else {
289 memcpy(p, &v, sizeof(v));
292 #endif
294 /* label relocation processing */
296 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
297 TCGLabel *l, intptr_t addend)
299 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
301 r->type = type;
302 r->ptr = code_ptr;
303 r->addend = addend;
304 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
307 static void tcg_out_label(TCGContext *s, TCGLabel *l)
309 tcg_debug_assert(!l->has_value);
310 l->has_value = 1;
311 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
314 TCGLabel *gen_new_label(void)
316 TCGContext *s = tcg_ctx;
317 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
319 memset(l, 0, sizeof(TCGLabel));
320 l->id = s->nb_labels++;
321 QSIMPLEQ_INIT(&l->relocs);
323 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
325 return l;
328 static bool tcg_resolve_relocs(TCGContext *s)
330 TCGLabel *l;
332 QSIMPLEQ_FOREACH(l, &s->labels, next) {
333 TCGRelocation *r;
334 uintptr_t value = l->u.value;
336 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
337 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
338 return false;
342 return true;
345 static void set_jmp_reset_offset(TCGContext *s, int which)
348 * We will check for overflow at the end of the opcode loop in
349 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
351 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
354 #include "tcg-target.c.inc"
356 /* compare a pointer @ptr and a tb_tc @s */
357 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
359 if (ptr >= s->ptr + s->size) {
360 return 1;
361 } else if (ptr < s->ptr) {
362 return -1;
364 return 0;
367 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
369 const struct tb_tc *a = ap;
370 const struct tb_tc *b = bp;
373 * When both sizes are set, we know this isn't a lookup.
374 * This is the most likely case: every TB must be inserted; lookups
375 * are a lot less frequent.
377 if (likely(a->size && b->size)) {
378 if (a->ptr > b->ptr) {
379 return 1;
380 } else if (a->ptr < b->ptr) {
381 return -1;
383 /* a->ptr == b->ptr should happen only on deletions */
384 g_assert(a->size == b->size);
385 return 0;
388 * All lookups have either .size field set to 0.
389 * From the glib sources we see that @ap is always the lookup key. However
390 * the docs provide no guarantee, so we just mark this case as likely.
392 if (likely(a->size == 0)) {
393 return ptr_cmp_tb_tc(a->ptr, b);
395 return ptr_cmp_tb_tc(b->ptr, a);
398 static void tcg_region_trees_init(void)
400 size_t i;
402 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
403 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
404 for (i = 0; i < region.n; i++) {
405 struct tcg_region_tree *rt = region_trees + i * tree_size;
407 qemu_mutex_init(&rt->lock);
408 rt->tree = g_tree_new(tb_tc_cmp);
412 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
414 void *p = tcg_splitwx_to_rw(cp);
415 size_t region_idx;
417 if (p < region.start_aligned) {
418 region_idx = 0;
419 } else {
420 ptrdiff_t offset = p - region.start_aligned;
422 if (offset > region.stride * (region.n - 1)) {
423 region_idx = region.n - 1;
424 } else {
425 region_idx = offset / region.stride;
428 return region_trees + region_idx * tree_size;
431 void tcg_tb_insert(TranslationBlock *tb)
433 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
435 qemu_mutex_lock(&rt->lock);
436 g_tree_insert(rt->tree, &tb->tc, tb);
437 qemu_mutex_unlock(&rt->lock);
440 void tcg_tb_remove(TranslationBlock *tb)
442 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
444 qemu_mutex_lock(&rt->lock);
445 g_tree_remove(rt->tree, &tb->tc);
446 qemu_mutex_unlock(&rt->lock);
450 * Find the TB 'tb' such that
451 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
452 * Return NULL if not found.
454 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
456 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
457 TranslationBlock *tb;
458 struct tb_tc s = { .ptr = (void *)tc_ptr };
460 qemu_mutex_lock(&rt->lock);
461 tb = g_tree_lookup(rt->tree, &s);
462 qemu_mutex_unlock(&rt->lock);
463 return tb;
466 static void tcg_region_tree_lock_all(void)
468 size_t i;
470 for (i = 0; i < region.n; i++) {
471 struct tcg_region_tree *rt = region_trees + i * tree_size;
473 qemu_mutex_lock(&rt->lock);
477 static void tcg_region_tree_unlock_all(void)
479 size_t i;
481 for (i = 0; i < region.n; i++) {
482 struct tcg_region_tree *rt = region_trees + i * tree_size;
484 qemu_mutex_unlock(&rt->lock);
488 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
490 size_t i;
492 tcg_region_tree_lock_all();
493 for (i = 0; i < region.n; i++) {
494 struct tcg_region_tree *rt = region_trees + i * tree_size;
496 g_tree_foreach(rt->tree, func, user_data);
498 tcg_region_tree_unlock_all();
501 size_t tcg_nb_tbs(void)
503 size_t nb_tbs = 0;
504 size_t i;
506 tcg_region_tree_lock_all();
507 for (i = 0; i < region.n; i++) {
508 struct tcg_region_tree *rt = region_trees + i * tree_size;
510 nb_tbs += g_tree_nnodes(rt->tree);
512 tcg_region_tree_unlock_all();
513 return nb_tbs;
516 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
518 TranslationBlock *tb = v;
520 tb_destroy(tb);
521 return FALSE;
524 static void tcg_region_tree_reset_all(void)
526 size_t i;
528 tcg_region_tree_lock_all();
529 for (i = 0; i < region.n; i++) {
530 struct tcg_region_tree *rt = region_trees + i * tree_size;
532 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
533 /* Increment the refcount first so that destroy acts as a reset */
534 g_tree_ref(rt->tree);
535 g_tree_destroy(rt->tree);
537 tcg_region_tree_unlock_all();
540 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
542 void *start, *end;
544 start = region.start_aligned + curr_region * region.stride;
545 end = start + region.size;
547 if (curr_region == 0) {
548 start = region.start;
550 if (curr_region == region.n - 1) {
551 end = region.end;
554 *pstart = start;
555 *pend = end;
558 static void tcg_region_assign(TCGContext *s, size_t curr_region)
560 void *start, *end;
562 tcg_region_bounds(curr_region, &start, &end);
564 s->code_gen_buffer = start;
565 s->code_gen_ptr = start;
566 s->code_gen_buffer_size = end - start;
567 s->code_gen_highwater = end - TCG_HIGHWATER;
570 static bool tcg_region_alloc__locked(TCGContext *s)
572 if (region.current == region.n) {
573 return true;
575 tcg_region_assign(s, region.current);
576 region.current++;
577 return false;
581 * Request a new region once the one in use has filled up.
582 * Returns true on error.
584 static bool tcg_region_alloc(TCGContext *s)
586 bool err;
587 /* read the region size now; alloc__locked will overwrite it on success */
588 size_t size_full = s->code_gen_buffer_size;
590 qemu_mutex_lock(&region.lock);
591 err = tcg_region_alloc__locked(s);
592 if (!err) {
593 region.agg_size_full += size_full - TCG_HIGHWATER;
595 qemu_mutex_unlock(&region.lock);
596 return err;
600 * Perform a context's first region allocation.
601 * This function does _not_ increment region.agg_size_full.
603 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
605 return tcg_region_alloc__locked(s);
608 /* Call from a safe-work context */
609 void tcg_region_reset_all(void)
611 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
612 unsigned int i;
614 qemu_mutex_lock(&region.lock);
615 region.current = 0;
616 region.agg_size_full = 0;
618 for (i = 0; i < n_ctxs; i++) {
619 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
620 bool err = tcg_region_initial_alloc__locked(s);
622 g_assert(!err);
624 qemu_mutex_unlock(&region.lock);
626 tcg_region_tree_reset_all();
629 #ifdef CONFIG_USER_ONLY
630 static size_t tcg_n_regions(void)
632 return 1;
634 #else
636 * It is likely that some vCPUs will translate more code than others, so we
637 * first try to set more regions than max_cpus, with those regions being of
638 * reasonable size. If that's not possible we make do by evenly dividing
639 * the code_gen_buffer among the vCPUs.
641 static size_t tcg_n_regions(void)
643 size_t i;
645 /* Use a single region if all we have is one vCPU thread */
646 #if !defined(CONFIG_USER_ONLY)
647 MachineState *ms = MACHINE(qdev_get_machine());
648 unsigned int max_cpus = ms->smp.max_cpus;
649 #endif
650 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
651 return 1;
654 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
655 for (i = 8; i > 0; i--) {
656 size_t regions_per_thread = i;
657 size_t region_size;
659 region_size = tcg_init_ctx.code_gen_buffer_size;
660 region_size /= max_cpus * regions_per_thread;
662 if (region_size >= 2 * 1024u * 1024) {
663 return max_cpus * regions_per_thread;
666 /* If we can't, then just allocate one region per vCPU thread */
667 return max_cpus;
669 #endif
672 * Initializes region partitioning.
674 * Called at init time from the parent thread (i.e. the one calling
675 * tcg_context_init), after the target's TCG globals have been set.
677 * Region partitioning works by splitting code_gen_buffer into separate regions,
678 * and then assigning regions to TCG threads so that the threads can translate
679 * code in parallel without synchronization.
681 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
682 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
683 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
684 * must have been parsed before calling this function, since it calls
685 * qemu_tcg_mttcg_enabled().
687 * In user-mode we use a single region. Having multiple regions in user-mode
688 * is not supported, because the number of vCPU threads (recall that each thread
689 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
690 * OS, and usually this number is huge (tens of thousands is not uncommon).
691 * Thus, given this large bound on the number of vCPU threads and the fact
692 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
693 * that the availability of at least one region per vCPU thread.
695 * However, this user-mode limitation is unlikely to be a significant problem
696 * in practice. Multi-threaded guests share most if not all of their translated
697 * code, which makes parallel code generation less appealing than in softmmu.
699 void tcg_region_init(void)
701 void *buf = tcg_init_ctx.code_gen_buffer;
702 void *aligned;
703 size_t size = tcg_init_ctx.code_gen_buffer_size;
704 size_t page_size = qemu_real_host_page_size;
705 size_t region_size;
706 size_t n_regions;
707 size_t i;
708 uintptr_t splitwx_diff;
710 n_regions = tcg_n_regions();
712 /* The first region will be 'aligned - buf' bytes larger than the others */
713 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
714 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
716 * Make region_size a multiple of page_size, using aligned as the start.
717 * As a result of this we might end up with a few extra pages at the end of
718 * the buffer; we will assign those to the last region.
720 region_size = (size - (aligned - buf)) / n_regions;
721 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
723 /* A region must have at least 2 pages; one code, one guard */
724 g_assert(region_size >= 2 * page_size);
726 /* init the region struct */
727 qemu_mutex_init(&region.lock);
728 region.n = n_regions;
729 region.size = region_size - page_size;
730 region.stride = region_size;
731 region.start = buf;
732 region.start_aligned = aligned;
733 /* page-align the end, since its last page will be a guard page */
734 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
735 /* account for that last guard page */
736 region.end -= page_size;
738 /* set guard pages */
739 splitwx_diff = tcg_splitwx_diff;
740 for (i = 0; i < region.n; i++) {
741 void *start, *end;
742 int rc;
744 tcg_region_bounds(i, &start, &end);
745 rc = qemu_mprotect_none(end, page_size);
746 g_assert(!rc);
747 if (splitwx_diff) {
748 rc = qemu_mprotect_none(end + splitwx_diff, page_size);
749 g_assert(!rc);
753 tcg_region_trees_init();
755 /* In user-mode we support only one ctx, so do the initial allocation now */
756 #ifdef CONFIG_USER_ONLY
758 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
760 g_assert(!err);
762 #endif
765 #ifdef CONFIG_DEBUG_TCG
766 const void *tcg_splitwx_to_rx(void *rw)
768 /* Pass NULL pointers unchanged. */
769 if (rw) {
770 g_assert(in_code_gen_buffer(rw));
771 rw += tcg_splitwx_diff;
773 return rw;
776 void *tcg_splitwx_to_rw(const void *rx)
778 /* Pass NULL pointers unchanged. */
779 if (rx) {
780 rx -= tcg_splitwx_diff;
781 /* Assert that we end with a pointer in the rw region. */
782 g_assert(in_code_gen_buffer(rx));
784 return (void *)rx;
786 #endif /* CONFIG_DEBUG_TCG */
788 static void alloc_tcg_plugin_context(TCGContext *s)
790 #ifdef CONFIG_PLUGIN
791 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
792 s->plugin_tb->insns =
793 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
794 #endif
798 * All TCG threads except the parent (i.e. the one that called tcg_context_init
799 * and registered the target's TCG globals) must register with this function
800 * before initiating translation.
802 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
803 * of tcg_region_init() for the reasoning behind this.
805 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
806 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
807 * is not used anymore for translation once this function is called.
809 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
810 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
812 #ifdef CONFIG_USER_ONLY
813 void tcg_register_thread(void)
815 tcg_ctx = &tcg_init_ctx;
817 #else
818 void tcg_register_thread(void)
820 MachineState *ms = MACHINE(qdev_get_machine());
821 TCGContext *s = g_malloc(sizeof(*s));
822 unsigned int i, n;
823 bool err;
825 *s = tcg_init_ctx;
827 /* Relink mem_base. */
828 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
829 if (tcg_init_ctx.temps[i].mem_base) {
830 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
831 tcg_debug_assert(b >= 0 && b < n);
832 s->temps[i].mem_base = &s->temps[b];
836 /* Claim an entry in tcg_ctxs */
837 n = qatomic_fetch_inc(&n_tcg_ctxs);
838 g_assert(n < ms->smp.max_cpus);
839 qatomic_set(&tcg_ctxs[n], s);
841 if (n > 0) {
842 alloc_tcg_plugin_context(s);
845 tcg_ctx = s;
846 qemu_mutex_lock(&region.lock);
847 err = tcg_region_initial_alloc__locked(tcg_ctx);
848 g_assert(!err);
849 qemu_mutex_unlock(&region.lock);
851 #endif /* !CONFIG_USER_ONLY */
854 * Returns the size (in bytes) of all translated code (i.e. from all regions)
855 * currently in the cache.
856 * See also: tcg_code_capacity()
857 * Do not confuse with tcg_current_code_size(); that one applies to a single
858 * TCG context.
860 size_t tcg_code_size(void)
862 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
863 unsigned int i;
864 size_t total;
866 qemu_mutex_lock(&region.lock);
867 total = region.agg_size_full;
868 for (i = 0; i < n_ctxs; i++) {
869 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
870 size_t size;
872 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
873 g_assert(size <= s->code_gen_buffer_size);
874 total += size;
876 qemu_mutex_unlock(&region.lock);
877 return total;
881 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
882 * regions.
883 * See also: tcg_code_size()
885 size_t tcg_code_capacity(void)
887 size_t guard_size, capacity;
889 /* no need for synchronization; these variables are set at init time */
890 guard_size = region.stride - region.size;
891 capacity = region.end + guard_size - region.start;
892 capacity -= region.n * (guard_size + TCG_HIGHWATER);
893 return capacity;
896 size_t tcg_tb_phys_invalidate_count(void)
898 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
899 unsigned int i;
900 size_t total = 0;
902 for (i = 0; i < n_ctxs; i++) {
903 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
905 total += qatomic_read(&s->tb_phys_invalidate_count);
907 return total;
910 /* pool based memory allocation */
911 void *tcg_malloc_internal(TCGContext *s, int size)
913 TCGPool *p;
914 int pool_size;
916 if (size > TCG_POOL_CHUNK_SIZE) {
917 /* big malloc: insert a new pool (XXX: could optimize) */
918 p = g_malloc(sizeof(TCGPool) + size);
919 p->size = size;
920 p->next = s->pool_first_large;
921 s->pool_first_large = p;
922 return p->data;
923 } else {
924 p = s->pool_current;
925 if (!p) {
926 p = s->pool_first;
927 if (!p)
928 goto new_pool;
929 } else {
930 if (!p->next) {
931 new_pool:
932 pool_size = TCG_POOL_CHUNK_SIZE;
933 p = g_malloc(sizeof(TCGPool) + pool_size);
934 p->size = pool_size;
935 p->next = NULL;
936 if (s->pool_current)
937 s->pool_current->next = p;
938 else
939 s->pool_first = p;
940 } else {
941 p = p->next;
945 s->pool_current = p;
946 s->pool_cur = p->data + size;
947 s->pool_end = p->data + p->size;
948 return p->data;
951 void tcg_pool_reset(TCGContext *s)
953 TCGPool *p, *t;
954 for (p = s->pool_first_large; p; p = t) {
955 t = p->next;
956 g_free(p);
958 s->pool_first_large = NULL;
959 s->pool_cur = s->pool_end = NULL;
960 s->pool_current = NULL;
963 typedef struct TCGHelperInfo {
964 void *func;
965 const char *name;
966 unsigned flags;
967 unsigned sizemask;
968 } TCGHelperInfo;
970 #include "exec/helper-proto.h"
972 static const TCGHelperInfo all_helpers[] = {
973 #include "exec/helper-tcg.h"
975 static GHashTable *helper_table;
977 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
978 static void process_op_defs(TCGContext *s);
979 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
980 TCGReg reg, const char *name);
982 void tcg_context_init(TCGContext *s)
984 int op, total_args, n, i;
985 TCGOpDef *def;
986 TCGArgConstraint *args_ct;
987 TCGTemp *ts;
989 memset(s, 0, sizeof(*s));
990 s->nb_globals = 0;
992 /* Count total number of arguments and allocate the corresponding
993 space */
994 total_args = 0;
995 for(op = 0; op < NB_OPS; op++) {
996 def = &tcg_op_defs[op];
997 n = def->nb_iargs + def->nb_oargs;
998 total_args += n;
1001 args_ct = g_new0(TCGArgConstraint, total_args);
1003 for(op = 0; op < NB_OPS; op++) {
1004 def = &tcg_op_defs[op];
1005 def->args_ct = args_ct;
1006 n = def->nb_iargs + def->nb_oargs;
1007 args_ct += n;
1010 /* Register helpers. */
1011 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
1012 helper_table = g_hash_table_new(NULL, NULL);
1014 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1015 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1016 (gpointer)&all_helpers[i]);
1019 tcg_target_init(s);
1020 process_op_defs(s);
1022 /* Reverse the order of the saved registers, assuming they're all at
1023 the start of tcg_target_reg_alloc_order. */
1024 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1025 int r = tcg_target_reg_alloc_order[n];
1026 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1027 break;
1030 for (i = 0; i < n; ++i) {
1031 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1033 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1034 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1037 alloc_tcg_plugin_context(s);
1039 tcg_ctx = s;
1041 * In user-mode we simply share the init context among threads, since we
1042 * use a single region. See the documentation tcg_region_init() for the
1043 * reasoning behind this.
1044 * In softmmu we will have at most max_cpus TCG threads.
1046 #ifdef CONFIG_USER_ONLY
1047 tcg_ctxs = &tcg_ctx;
1048 n_tcg_ctxs = 1;
1049 #else
1050 MachineState *ms = MACHINE(qdev_get_machine());
1051 unsigned int max_cpus = ms->smp.max_cpus;
1052 tcg_ctxs = g_new(TCGContext *, max_cpus);
1053 #endif
1055 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1056 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1057 cpu_env = temp_tcgv_ptr(ts);
1061 * Allocate TBs right before their corresponding translated code, making
1062 * sure that TBs and code are on different cache lines.
1064 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1066 uintptr_t align = qemu_icache_linesize;
1067 TranslationBlock *tb;
1068 void *next;
1070 retry:
1071 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1072 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1074 if (unlikely(next > s->code_gen_highwater)) {
1075 if (tcg_region_alloc(s)) {
1076 return NULL;
1078 goto retry;
1080 qatomic_set(&s->code_gen_ptr, next);
1081 s->data_gen_ptr = NULL;
1082 return tb;
1085 void tcg_prologue_init(TCGContext *s)
1087 size_t prologue_size, total_size;
1088 void *buf0, *buf1;
1090 /* Put the prologue at the beginning of code_gen_buffer. */
1091 buf0 = s->code_gen_buffer;
1092 total_size = s->code_gen_buffer_size;
1093 s->code_ptr = buf0;
1094 s->code_buf = buf0;
1095 s->data_gen_ptr = NULL;
1098 * The region trees are not yet configured, but tcg_splitwx_to_rx
1099 * needs the bounds for an assert.
1101 region.start = buf0;
1102 region.end = buf0 + total_size;
1104 #ifndef CONFIG_TCG_INTERPRETER
1105 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1106 #endif
1108 /* Compute a high-water mark, at which we voluntarily flush the buffer
1109 and start over. The size here is arbitrary, significantly larger
1110 than we expect the code generation for any one opcode to require. */
1111 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1113 #ifdef TCG_TARGET_NEED_POOL_LABELS
1114 s->pool_labels = NULL;
1115 #endif
1117 qemu_thread_jit_write();
1118 /* Generate the prologue. */
1119 tcg_target_qemu_prologue(s);
1121 #ifdef TCG_TARGET_NEED_POOL_LABELS
1122 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1124 int result = tcg_out_pool_finalize(s);
1125 tcg_debug_assert(result == 0);
1127 #endif
1129 buf1 = s->code_ptr;
1130 #ifndef CONFIG_TCG_INTERPRETER
1131 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1132 tcg_ptr_byte_diff(buf1, buf0));
1133 #endif
1135 /* Deduct the prologue from the buffer. */
1136 prologue_size = tcg_current_code_size(s);
1137 s->code_gen_ptr = buf1;
1138 s->code_gen_buffer = buf1;
1139 s->code_buf = buf1;
1140 total_size -= prologue_size;
1141 s->code_gen_buffer_size = total_size;
1143 tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1145 #ifdef DEBUG_DISAS
1146 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1147 FILE *logfile = qemu_log_lock();
1148 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1149 if (s->data_gen_ptr) {
1150 size_t code_size = s->data_gen_ptr - buf0;
1151 size_t data_size = prologue_size - code_size;
1152 size_t i;
1154 log_disas(buf0, code_size);
1156 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1157 if (sizeof(tcg_target_ulong) == 8) {
1158 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1159 (uintptr_t)s->data_gen_ptr + i,
1160 *(uint64_t *)(s->data_gen_ptr + i));
1161 } else {
1162 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
1163 (uintptr_t)s->data_gen_ptr + i,
1164 *(uint32_t *)(s->data_gen_ptr + i));
1167 } else {
1168 log_disas(buf0, prologue_size);
1170 qemu_log("\n");
1171 qemu_log_flush();
1172 qemu_log_unlock(logfile);
1174 #endif
1176 /* Assert that goto_ptr is implemented completely. */
1177 if (TCG_TARGET_HAS_goto_ptr) {
1178 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1182 void tcg_func_start(TCGContext *s)
1184 tcg_pool_reset(s);
1185 s->nb_temps = s->nb_globals;
1187 /* No temps have been previously allocated for size or locality. */
1188 memset(s->free_temps, 0, sizeof(s->free_temps));
1190 /* No constant temps have been previously allocated. */
1191 for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1192 if (s->const_table[i]) {
1193 g_hash_table_remove_all(s->const_table[i]);
1197 s->nb_ops = 0;
1198 s->nb_labels = 0;
1199 s->current_frame_offset = s->frame_start;
1201 #ifdef CONFIG_DEBUG_TCG
1202 s->goto_tb_issue_mask = 0;
1203 #endif
1205 QTAILQ_INIT(&s->ops);
1206 QTAILQ_INIT(&s->free_ops);
1207 QSIMPLEQ_INIT(&s->labels);
1210 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1212 int n = s->nb_temps++;
1214 if (n >= TCG_MAX_TEMPS) {
1215 /* Signal overflow, starting over with fewer guest insns. */
1216 siglongjmp(s->jmp_trans, -2);
1218 return memset(&s->temps[n], 0, sizeof(TCGTemp));
1221 static TCGTemp *tcg_global_alloc(TCGContext *s)
1223 TCGTemp *ts;
1225 tcg_debug_assert(s->nb_globals == s->nb_temps);
1226 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1227 s->nb_globals++;
1228 ts = tcg_temp_alloc(s);
1229 ts->kind = TEMP_GLOBAL;
1231 return ts;
1234 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1235 TCGReg reg, const char *name)
1237 TCGTemp *ts;
1239 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1240 tcg_abort();
1243 ts = tcg_global_alloc(s);
1244 ts->base_type = type;
1245 ts->type = type;
1246 ts->kind = TEMP_FIXED;
1247 ts->reg = reg;
1248 ts->name = name;
1249 tcg_regset_set_reg(s->reserved_regs, reg);
1251 return ts;
1254 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1256 s->frame_start = start;
1257 s->frame_end = start + size;
1258 s->frame_temp
1259 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1262 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1263 intptr_t offset, const char *name)
1265 TCGContext *s = tcg_ctx;
1266 TCGTemp *base_ts = tcgv_ptr_temp(base);
1267 TCGTemp *ts = tcg_global_alloc(s);
1268 int indirect_reg = 0, bigendian = 0;
1269 #ifdef HOST_WORDS_BIGENDIAN
1270 bigendian = 1;
1271 #endif
1273 switch (base_ts->kind) {
1274 case TEMP_FIXED:
1275 break;
1276 case TEMP_GLOBAL:
1277 /* We do not support double-indirect registers. */
1278 tcg_debug_assert(!base_ts->indirect_reg);
1279 base_ts->indirect_base = 1;
1280 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1281 ? 2 : 1);
1282 indirect_reg = 1;
1283 break;
1284 default:
1285 g_assert_not_reached();
1288 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1289 TCGTemp *ts2 = tcg_global_alloc(s);
1290 char buf[64];
1292 ts->base_type = TCG_TYPE_I64;
1293 ts->type = TCG_TYPE_I32;
1294 ts->indirect_reg = indirect_reg;
1295 ts->mem_allocated = 1;
1296 ts->mem_base = base_ts;
1297 ts->mem_offset = offset + bigendian * 4;
1298 pstrcpy(buf, sizeof(buf), name);
1299 pstrcat(buf, sizeof(buf), "_0");
1300 ts->name = strdup(buf);
1302 tcg_debug_assert(ts2 == ts + 1);
1303 ts2->base_type = TCG_TYPE_I64;
1304 ts2->type = TCG_TYPE_I32;
1305 ts2->indirect_reg = indirect_reg;
1306 ts2->mem_allocated = 1;
1307 ts2->mem_base = base_ts;
1308 ts2->mem_offset = offset + (1 - bigendian) * 4;
1309 pstrcpy(buf, sizeof(buf), name);
1310 pstrcat(buf, sizeof(buf), "_1");
1311 ts2->name = strdup(buf);
1312 } else {
1313 ts->base_type = type;
1314 ts->type = type;
1315 ts->indirect_reg = indirect_reg;
1316 ts->mem_allocated = 1;
1317 ts->mem_base = base_ts;
1318 ts->mem_offset = offset;
1319 ts->name = name;
1321 return ts;
1324 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1326 TCGContext *s = tcg_ctx;
1327 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1328 TCGTemp *ts;
1329 int idx, k;
1331 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1332 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1333 if (idx < TCG_MAX_TEMPS) {
1334 /* There is already an available temp with the right type. */
1335 clear_bit(idx, s->free_temps[k].l);
1337 ts = &s->temps[idx];
1338 ts->temp_allocated = 1;
1339 tcg_debug_assert(ts->base_type == type);
1340 tcg_debug_assert(ts->kind == kind);
1341 } else {
1342 ts = tcg_temp_alloc(s);
1343 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1344 TCGTemp *ts2 = tcg_temp_alloc(s);
1346 ts->base_type = type;
1347 ts->type = TCG_TYPE_I32;
1348 ts->temp_allocated = 1;
1349 ts->kind = kind;
1351 tcg_debug_assert(ts2 == ts + 1);
1352 ts2->base_type = TCG_TYPE_I64;
1353 ts2->type = TCG_TYPE_I32;
1354 ts2->temp_allocated = 1;
1355 ts2->kind = kind;
1356 } else {
1357 ts->base_type = type;
1358 ts->type = type;
1359 ts->temp_allocated = 1;
1360 ts->kind = kind;
1364 #if defined(CONFIG_DEBUG_TCG)
1365 s->temps_in_use++;
1366 #endif
1367 return ts;
1370 TCGv_vec tcg_temp_new_vec(TCGType type)
1372 TCGTemp *t;
1374 #ifdef CONFIG_DEBUG_TCG
1375 switch (type) {
1376 case TCG_TYPE_V64:
1377 assert(TCG_TARGET_HAS_v64);
1378 break;
1379 case TCG_TYPE_V128:
1380 assert(TCG_TARGET_HAS_v128);
1381 break;
1382 case TCG_TYPE_V256:
1383 assert(TCG_TARGET_HAS_v256);
1384 break;
1385 default:
1386 g_assert_not_reached();
1388 #endif
1390 t = tcg_temp_new_internal(type, 0);
1391 return temp_tcgv_vec(t);
1394 /* Create a new temp of the same type as an existing temp. */
1395 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1397 TCGTemp *t = tcgv_vec_temp(match);
1399 tcg_debug_assert(t->temp_allocated != 0);
1401 t = tcg_temp_new_internal(t->base_type, 0);
1402 return temp_tcgv_vec(t);
1405 void tcg_temp_free_internal(TCGTemp *ts)
1407 TCGContext *s = tcg_ctx;
1408 int k, idx;
1410 /* In order to simplify users of tcg_constant_*, silently ignore free. */
1411 if (ts->kind == TEMP_CONST) {
1412 return;
1415 #if defined(CONFIG_DEBUG_TCG)
1416 s->temps_in_use--;
1417 if (s->temps_in_use < 0) {
1418 fprintf(stderr, "More temporaries freed than allocated!\n");
1420 #endif
1422 tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1423 tcg_debug_assert(ts->temp_allocated != 0);
1424 ts->temp_allocated = 0;
1426 idx = temp_idx(ts);
1427 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1428 set_bit(idx, s->free_temps[k].l);
1431 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1433 TCGContext *s = tcg_ctx;
1434 GHashTable *h = s->const_table[type];
1435 TCGTemp *ts;
1437 if (h == NULL) {
1438 h = g_hash_table_new(g_int64_hash, g_int64_equal);
1439 s->const_table[type] = h;
1442 ts = g_hash_table_lookup(h, &val);
1443 if (ts == NULL) {
1444 ts = tcg_temp_alloc(s);
1446 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1447 TCGTemp *ts2 = tcg_temp_alloc(s);
1449 ts->base_type = TCG_TYPE_I64;
1450 ts->type = TCG_TYPE_I32;
1451 ts->kind = TEMP_CONST;
1452 ts->temp_allocated = 1;
1454 * Retain the full value of the 64-bit constant in the low
1455 * part, so that the hash table works. Actual uses will
1456 * truncate the value to the low part.
1458 ts->val = val;
1460 tcg_debug_assert(ts2 == ts + 1);
1461 ts2->base_type = TCG_TYPE_I64;
1462 ts2->type = TCG_TYPE_I32;
1463 ts2->kind = TEMP_CONST;
1464 ts2->temp_allocated = 1;
1465 ts2->val = val >> 32;
1466 } else {
1467 ts->base_type = type;
1468 ts->type = type;
1469 ts->kind = TEMP_CONST;
1470 ts->temp_allocated = 1;
1471 ts->val = val;
1473 g_hash_table_insert(h, &ts->val, ts);
1476 return ts;
1479 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1481 val = dup_const(vece, val);
1482 return temp_tcgv_vec(tcg_constant_internal(type, val));
1485 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1487 TCGTemp *t = tcgv_vec_temp(match);
1489 tcg_debug_assert(t->temp_allocated != 0);
1490 return tcg_constant_vec(t->base_type, vece, val);
1493 TCGv_i32 tcg_const_i32(int32_t val)
1495 TCGv_i32 t0;
1496 t0 = tcg_temp_new_i32();
1497 tcg_gen_movi_i32(t0, val);
1498 return t0;
1501 TCGv_i64 tcg_const_i64(int64_t val)
1503 TCGv_i64 t0;
1504 t0 = tcg_temp_new_i64();
1505 tcg_gen_movi_i64(t0, val);
1506 return t0;
1509 TCGv_i32 tcg_const_local_i32(int32_t val)
1511 TCGv_i32 t0;
1512 t0 = tcg_temp_local_new_i32();
1513 tcg_gen_movi_i32(t0, val);
1514 return t0;
1517 TCGv_i64 tcg_const_local_i64(int64_t val)
1519 TCGv_i64 t0;
1520 t0 = tcg_temp_local_new_i64();
1521 tcg_gen_movi_i64(t0, val);
1522 return t0;
1525 #if defined(CONFIG_DEBUG_TCG)
1526 void tcg_clear_temp_count(void)
1528 TCGContext *s = tcg_ctx;
1529 s->temps_in_use = 0;
1532 int tcg_check_temp_count(void)
1534 TCGContext *s = tcg_ctx;
1535 if (s->temps_in_use) {
1536 /* Clear the count so that we don't give another
1537 * warning immediately next time around.
1539 s->temps_in_use = 0;
1540 return 1;
1542 return 0;
1544 #endif
1546 /* Return true if OP may appear in the opcode stream.
1547 Test the runtime variable that controls each opcode. */
1548 bool tcg_op_supported(TCGOpcode op)
1550 const bool have_vec
1551 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1553 switch (op) {
1554 case INDEX_op_discard:
1555 case INDEX_op_set_label:
1556 case INDEX_op_call:
1557 case INDEX_op_br:
1558 case INDEX_op_mb:
1559 case INDEX_op_insn_start:
1560 case INDEX_op_exit_tb:
1561 case INDEX_op_goto_tb:
1562 case INDEX_op_qemu_ld_i32:
1563 case INDEX_op_qemu_st_i32:
1564 case INDEX_op_qemu_ld_i64:
1565 case INDEX_op_qemu_st_i64:
1566 return true;
1568 case INDEX_op_qemu_st8_i32:
1569 return TCG_TARGET_HAS_qemu_st8_i32;
1571 case INDEX_op_goto_ptr:
1572 return TCG_TARGET_HAS_goto_ptr;
1574 case INDEX_op_mov_i32:
1575 case INDEX_op_setcond_i32:
1576 case INDEX_op_brcond_i32:
1577 case INDEX_op_ld8u_i32:
1578 case INDEX_op_ld8s_i32:
1579 case INDEX_op_ld16u_i32:
1580 case INDEX_op_ld16s_i32:
1581 case INDEX_op_ld_i32:
1582 case INDEX_op_st8_i32:
1583 case INDEX_op_st16_i32:
1584 case INDEX_op_st_i32:
1585 case INDEX_op_add_i32:
1586 case INDEX_op_sub_i32:
1587 case INDEX_op_mul_i32:
1588 case INDEX_op_and_i32:
1589 case INDEX_op_or_i32:
1590 case INDEX_op_xor_i32:
1591 case INDEX_op_shl_i32:
1592 case INDEX_op_shr_i32:
1593 case INDEX_op_sar_i32:
1594 return true;
1596 case INDEX_op_movcond_i32:
1597 return TCG_TARGET_HAS_movcond_i32;
1598 case INDEX_op_div_i32:
1599 case INDEX_op_divu_i32:
1600 return TCG_TARGET_HAS_div_i32;
1601 case INDEX_op_rem_i32:
1602 case INDEX_op_remu_i32:
1603 return TCG_TARGET_HAS_rem_i32;
1604 case INDEX_op_div2_i32:
1605 case INDEX_op_divu2_i32:
1606 return TCG_TARGET_HAS_div2_i32;
1607 case INDEX_op_rotl_i32:
1608 case INDEX_op_rotr_i32:
1609 return TCG_TARGET_HAS_rot_i32;
1610 case INDEX_op_deposit_i32:
1611 return TCG_TARGET_HAS_deposit_i32;
1612 case INDEX_op_extract_i32:
1613 return TCG_TARGET_HAS_extract_i32;
1614 case INDEX_op_sextract_i32:
1615 return TCG_TARGET_HAS_sextract_i32;
1616 case INDEX_op_extract2_i32:
1617 return TCG_TARGET_HAS_extract2_i32;
1618 case INDEX_op_add2_i32:
1619 return TCG_TARGET_HAS_add2_i32;
1620 case INDEX_op_sub2_i32:
1621 return TCG_TARGET_HAS_sub2_i32;
1622 case INDEX_op_mulu2_i32:
1623 return TCG_TARGET_HAS_mulu2_i32;
1624 case INDEX_op_muls2_i32:
1625 return TCG_TARGET_HAS_muls2_i32;
1626 case INDEX_op_muluh_i32:
1627 return TCG_TARGET_HAS_muluh_i32;
1628 case INDEX_op_mulsh_i32:
1629 return TCG_TARGET_HAS_mulsh_i32;
1630 case INDEX_op_ext8s_i32:
1631 return TCG_TARGET_HAS_ext8s_i32;
1632 case INDEX_op_ext16s_i32:
1633 return TCG_TARGET_HAS_ext16s_i32;
1634 case INDEX_op_ext8u_i32:
1635 return TCG_TARGET_HAS_ext8u_i32;
1636 case INDEX_op_ext16u_i32:
1637 return TCG_TARGET_HAS_ext16u_i32;
1638 case INDEX_op_bswap16_i32:
1639 return TCG_TARGET_HAS_bswap16_i32;
1640 case INDEX_op_bswap32_i32:
1641 return TCG_TARGET_HAS_bswap32_i32;
1642 case INDEX_op_not_i32:
1643 return TCG_TARGET_HAS_not_i32;
1644 case INDEX_op_neg_i32:
1645 return TCG_TARGET_HAS_neg_i32;
1646 case INDEX_op_andc_i32:
1647 return TCG_TARGET_HAS_andc_i32;
1648 case INDEX_op_orc_i32:
1649 return TCG_TARGET_HAS_orc_i32;
1650 case INDEX_op_eqv_i32:
1651 return TCG_TARGET_HAS_eqv_i32;
1652 case INDEX_op_nand_i32:
1653 return TCG_TARGET_HAS_nand_i32;
1654 case INDEX_op_nor_i32:
1655 return TCG_TARGET_HAS_nor_i32;
1656 case INDEX_op_clz_i32:
1657 return TCG_TARGET_HAS_clz_i32;
1658 case INDEX_op_ctz_i32:
1659 return TCG_TARGET_HAS_ctz_i32;
1660 case INDEX_op_ctpop_i32:
1661 return TCG_TARGET_HAS_ctpop_i32;
1663 case INDEX_op_brcond2_i32:
1664 case INDEX_op_setcond2_i32:
1665 return TCG_TARGET_REG_BITS == 32;
1667 case INDEX_op_mov_i64:
1668 case INDEX_op_setcond_i64:
1669 case INDEX_op_brcond_i64:
1670 case INDEX_op_ld8u_i64:
1671 case INDEX_op_ld8s_i64:
1672 case INDEX_op_ld16u_i64:
1673 case INDEX_op_ld16s_i64:
1674 case INDEX_op_ld32u_i64:
1675 case INDEX_op_ld32s_i64:
1676 case INDEX_op_ld_i64:
1677 case INDEX_op_st8_i64:
1678 case INDEX_op_st16_i64:
1679 case INDEX_op_st32_i64:
1680 case INDEX_op_st_i64:
1681 case INDEX_op_add_i64:
1682 case INDEX_op_sub_i64:
1683 case INDEX_op_mul_i64:
1684 case INDEX_op_and_i64:
1685 case INDEX_op_or_i64:
1686 case INDEX_op_xor_i64:
1687 case INDEX_op_shl_i64:
1688 case INDEX_op_shr_i64:
1689 case INDEX_op_sar_i64:
1690 case INDEX_op_ext_i32_i64:
1691 case INDEX_op_extu_i32_i64:
1692 return TCG_TARGET_REG_BITS == 64;
1694 case INDEX_op_movcond_i64:
1695 return TCG_TARGET_HAS_movcond_i64;
1696 case INDEX_op_div_i64:
1697 case INDEX_op_divu_i64:
1698 return TCG_TARGET_HAS_div_i64;
1699 case INDEX_op_rem_i64:
1700 case INDEX_op_remu_i64:
1701 return TCG_TARGET_HAS_rem_i64;
1702 case INDEX_op_div2_i64:
1703 case INDEX_op_divu2_i64:
1704 return TCG_TARGET_HAS_div2_i64;
1705 case INDEX_op_rotl_i64:
1706 case INDEX_op_rotr_i64:
1707 return TCG_TARGET_HAS_rot_i64;
1708 case INDEX_op_deposit_i64:
1709 return TCG_TARGET_HAS_deposit_i64;
1710 case INDEX_op_extract_i64:
1711 return TCG_TARGET_HAS_extract_i64;
1712 case INDEX_op_sextract_i64:
1713 return TCG_TARGET_HAS_sextract_i64;
1714 case INDEX_op_extract2_i64:
1715 return TCG_TARGET_HAS_extract2_i64;
1716 case INDEX_op_extrl_i64_i32:
1717 return TCG_TARGET_HAS_extrl_i64_i32;
1718 case INDEX_op_extrh_i64_i32:
1719 return TCG_TARGET_HAS_extrh_i64_i32;
1720 case INDEX_op_ext8s_i64:
1721 return TCG_TARGET_HAS_ext8s_i64;
1722 case INDEX_op_ext16s_i64:
1723 return TCG_TARGET_HAS_ext16s_i64;
1724 case INDEX_op_ext32s_i64:
1725 return TCG_TARGET_HAS_ext32s_i64;
1726 case INDEX_op_ext8u_i64:
1727 return TCG_TARGET_HAS_ext8u_i64;
1728 case INDEX_op_ext16u_i64:
1729 return TCG_TARGET_HAS_ext16u_i64;
1730 case INDEX_op_ext32u_i64:
1731 return TCG_TARGET_HAS_ext32u_i64;
1732 case INDEX_op_bswap16_i64:
1733 return TCG_TARGET_HAS_bswap16_i64;
1734 case INDEX_op_bswap32_i64:
1735 return TCG_TARGET_HAS_bswap32_i64;
1736 case INDEX_op_bswap64_i64:
1737 return TCG_TARGET_HAS_bswap64_i64;
1738 case INDEX_op_not_i64:
1739 return TCG_TARGET_HAS_not_i64;
1740 case INDEX_op_neg_i64:
1741 return TCG_TARGET_HAS_neg_i64;
1742 case INDEX_op_andc_i64:
1743 return TCG_TARGET_HAS_andc_i64;
1744 case INDEX_op_orc_i64:
1745 return TCG_TARGET_HAS_orc_i64;
1746 case INDEX_op_eqv_i64:
1747 return TCG_TARGET_HAS_eqv_i64;
1748 case INDEX_op_nand_i64:
1749 return TCG_TARGET_HAS_nand_i64;
1750 case INDEX_op_nor_i64:
1751 return TCG_TARGET_HAS_nor_i64;
1752 case INDEX_op_clz_i64:
1753 return TCG_TARGET_HAS_clz_i64;
1754 case INDEX_op_ctz_i64:
1755 return TCG_TARGET_HAS_ctz_i64;
1756 case INDEX_op_ctpop_i64:
1757 return TCG_TARGET_HAS_ctpop_i64;
1758 case INDEX_op_add2_i64:
1759 return TCG_TARGET_HAS_add2_i64;
1760 case INDEX_op_sub2_i64:
1761 return TCG_TARGET_HAS_sub2_i64;
1762 case INDEX_op_mulu2_i64:
1763 return TCG_TARGET_HAS_mulu2_i64;
1764 case INDEX_op_muls2_i64:
1765 return TCG_TARGET_HAS_muls2_i64;
1766 case INDEX_op_muluh_i64:
1767 return TCG_TARGET_HAS_muluh_i64;
1768 case INDEX_op_mulsh_i64:
1769 return TCG_TARGET_HAS_mulsh_i64;
1771 case INDEX_op_mov_vec:
1772 case INDEX_op_dup_vec:
1773 case INDEX_op_dupm_vec:
1774 case INDEX_op_ld_vec:
1775 case INDEX_op_st_vec:
1776 case INDEX_op_add_vec:
1777 case INDEX_op_sub_vec:
1778 case INDEX_op_and_vec:
1779 case INDEX_op_or_vec:
1780 case INDEX_op_xor_vec:
1781 case INDEX_op_cmp_vec:
1782 return have_vec;
1783 case INDEX_op_dup2_vec:
1784 return have_vec && TCG_TARGET_REG_BITS == 32;
1785 case INDEX_op_not_vec:
1786 return have_vec && TCG_TARGET_HAS_not_vec;
1787 case INDEX_op_neg_vec:
1788 return have_vec && TCG_TARGET_HAS_neg_vec;
1789 case INDEX_op_abs_vec:
1790 return have_vec && TCG_TARGET_HAS_abs_vec;
1791 case INDEX_op_andc_vec:
1792 return have_vec && TCG_TARGET_HAS_andc_vec;
1793 case INDEX_op_orc_vec:
1794 return have_vec && TCG_TARGET_HAS_orc_vec;
1795 case INDEX_op_mul_vec:
1796 return have_vec && TCG_TARGET_HAS_mul_vec;
1797 case INDEX_op_shli_vec:
1798 case INDEX_op_shri_vec:
1799 case INDEX_op_sari_vec:
1800 return have_vec && TCG_TARGET_HAS_shi_vec;
1801 case INDEX_op_shls_vec:
1802 case INDEX_op_shrs_vec:
1803 case INDEX_op_sars_vec:
1804 return have_vec && TCG_TARGET_HAS_shs_vec;
1805 case INDEX_op_shlv_vec:
1806 case INDEX_op_shrv_vec:
1807 case INDEX_op_sarv_vec:
1808 return have_vec && TCG_TARGET_HAS_shv_vec;
1809 case INDEX_op_rotli_vec:
1810 return have_vec && TCG_TARGET_HAS_roti_vec;
1811 case INDEX_op_rotls_vec:
1812 return have_vec && TCG_TARGET_HAS_rots_vec;
1813 case INDEX_op_rotlv_vec:
1814 case INDEX_op_rotrv_vec:
1815 return have_vec && TCG_TARGET_HAS_rotv_vec;
1816 case INDEX_op_ssadd_vec:
1817 case INDEX_op_usadd_vec:
1818 case INDEX_op_sssub_vec:
1819 case INDEX_op_ussub_vec:
1820 return have_vec && TCG_TARGET_HAS_sat_vec;
1821 case INDEX_op_smin_vec:
1822 case INDEX_op_umin_vec:
1823 case INDEX_op_smax_vec:
1824 case INDEX_op_umax_vec:
1825 return have_vec && TCG_TARGET_HAS_minmax_vec;
1826 case INDEX_op_bitsel_vec:
1827 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1828 case INDEX_op_cmpsel_vec:
1829 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1831 default:
1832 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1833 return true;
1837 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1838 and endian swap. Maybe it would be better to do the alignment
1839 and endian swap in tcg_reg_alloc_call(). */
1840 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1842 int i, real_args, nb_rets, pi;
1843 unsigned sizemask, flags;
1844 TCGHelperInfo *info;
1845 TCGOp *op;
1847 info = g_hash_table_lookup(helper_table, (gpointer)func);
1848 flags = info->flags;
1849 sizemask = info->sizemask;
1851 #ifdef CONFIG_PLUGIN
1852 /* detect non-plugin helpers */
1853 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1854 tcg_ctx->plugin_insn->calls_helpers = true;
1856 #endif
1858 #if defined(__sparc__) && !defined(__arch64__) \
1859 && !defined(CONFIG_TCG_INTERPRETER)
1860 /* We have 64-bit values in one register, but need to pass as two
1861 separate parameters. Split them. */
1862 int orig_sizemask = sizemask;
1863 int orig_nargs = nargs;
1864 TCGv_i64 retl, reth;
1865 TCGTemp *split_args[MAX_OPC_PARAM];
1867 retl = NULL;
1868 reth = NULL;
1869 if (sizemask != 0) {
1870 for (i = real_args = 0; i < nargs; ++i) {
1871 int is_64bit = sizemask & (1 << (i+1)*2);
1872 if (is_64bit) {
1873 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1874 TCGv_i32 h = tcg_temp_new_i32();
1875 TCGv_i32 l = tcg_temp_new_i32();
1876 tcg_gen_extr_i64_i32(l, h, orig);
1877 split_args[real_args++] = tcgv_i32_temp(h);
1878 split_args[real_args++] = tcgv_i32_temp(l);
1879 } else {
1880 split_args[real_args++] = args[i];
1883 nargs = real_args;
1884 args = split_args;
1885 sizemask = 0;
1887 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1888 for (i = 0; i < nargs; ++i) {
1889 int is_64bit = sizemask & (1 << (i+1)*2);
1890 int is_signed = sizemask & (2 << (i+1)*2);
1891 if (!is_64bit) {
1892 TCGv_i64 temp = tcg_temp_new_i64();
1893 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1894 if (is_signed) {
1895 tcg_gen_ext32s_i64(temp, orig);
1896 } else {
1897 tcg_gen_ext32u_i64(temp, orig);
1899 args[i] = tcgv_i64_temp(temp);
1902 #endif /* TCG_TARGET_EXTEND_ARGS */
1904 op = tcg_emit_op(INDEX_op_call);
1906 pi = 0;
1907 if (ret != NULL) {
1908 #if defined(__sparc__) && !defined(__arch64__) \
1909 && !defined(CONFIG_TCG_INTERPRETER)
1910 if (orig_sizemask & 1) {
1911 /* The 32-bit ABI is going to return the 64-bit value in
1912 the %o0/%o1 register pair. Prepare for this by using
1913 two return temporaries, and reassemble below. */
1914 retl = tcg_temp_new_i64();
1915 reth = tcg_temp_new_i64();
1916 op->args[pi++] = tcgv_i64_arg(reth);
1917 op->args[pi++] = tcgv_i64_arg(retl);
1918 nb_rets = 2;
1919 } else {
1920 op->args[pi++] = temp_arg(ret);
1921 nb_rets = 1;
1923 #else
1924 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1925 #ifdef HOST_WORDS_BIGENDIAN
1926 op->args[pi++] = temp_arg(ret + 1);
1927 op->args[pi++] = temp_arg(ret);
1928 #else
1929 op->args[pi++] = temp_arg(ret);
1930 op->args[pi++] = temp_arg(ret + 1);
1931 #endif
1932 nb_rets = 2;
1933 } else {
1934 op->args[pi++] = temp_arg(ret);
1935 nb_rets = 1;
1937 #endif
1938 } else {
1939 nb_rets = 0;
1941 TCGOP_CALLO(op) = nb_rets;
1943 real_args = 0;
1944 for (i = 0; i < nargs; i++) {
1945 int is_64bit = sizemask & (1 << (i+1)*2);
1946 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1947 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1948 /* some targets want aligned 64 bit args */
1949 if (real_args & 1) {
1950 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1951 real_args++;
1953 #endif
1954 /* If stack grows up, then we will be placing successive
1955 arguments at lower addresses, which means we need to
1956 reverse the order compared to how we would normally
1957 treat either big or little-endian. For those arguments
1958 that will wind up in registers, this still works for
1959 HPPA (the only current STACK_GROWSUP target) since the
1960 argument registers are *also* allocated in decreasing
1961 order. If another such target is added, this logic may
1962 have to get more complicated to differentiate between
1963 stack arguments and register arguments. */
1964 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1965 op->args[pi++] = temp_arg(args[i] + 1);
1966 op->args[pi++] = temp_arg(args[i]);
1967 #else
1968 op->args[pi++] = temp_arg(args[i]);
1969 op->args[pi++] = temp_arg(args[i] + 1);
1970 #endif
1971 real_args += 2;
1972 continue;
1975 op->args[pi++] = temp_arg(args[i]);
1976 real_args++;
1978 op->args[pi++] = (uintptr_t)func;
1979 op->args[pi++] = flags;
1980 TCGOP_CALLI(op) = real_args;
1982 /* Make sure the fields didn't overflow. */
1983 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1984 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1986 #if defined(__sparc__) && !defined(__arch64__) \
1987 && !defined(CONFIG_TCG_INTERPRETER)
1988 /* Free all of the parts we allocated above. */
1989 for (i = real_args = 0; i < orig_nargs; ++i) {
1990 int is_64bit = orig_sizemask & (1 << (i+1)*2);
1991 if (is_64bit) {
1992 tcg_temp_free_internal(args[real_args++]);
1993 tcg_temp_free_internal(args[real_args++]);
1994 } else {
1995 real_args++;
1998 if (orig_sizemask & 1) {
1999 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
2000 Note that describing these as TCGv_i64 eliminates an unnecessary
2001 zero-extension that tcg_gen_concat_i32_i64 would create. */
2002 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
2003 tcg_temp_free_i64(retl);
2004 tcg_temp_free_i64(reth);
2006 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2007 for (i = 0; i < nargs; ++i) {
2008 int is_64bit = sizemask & (1 << (i+1)*2);
2009 if (!is_64bit) {
2010 tcg_temp_free_internal(args[i]);
2013 #endif /* TCG_TARGET_EXTEND_ARGS */
2016 static void tcg_reg_alloc_start(TCGContext *s)
2018 int i, n;
2020 for (i = 0, n = s->nb_temps; i < n; i++) {
2021 TCGTemp *ts = &s->temps[i];
2022 TCGTempVal val = TEMP_VAL_MEM;
2024 switch (ts->kind) {
2025 case TEMP_CONST:
2026 val = TEMP_VAL_CONST;
2027 break;
2028 case TEMP_FIXED:
2029 val = TEMP_VAL_REG;
2030 break;
2031 case TEMP_GLOBAL:
2032 break;
2033 case TEMP_NORMAL:
2034 val = TEMP_VAL_DEAD;
2035 /* fall through */
2036 case TEMP_LOCAL:
2037 ts->mem_allocated = 0;
2038 break;
2039 default:
2040 g_assert_not_reached();
2042 ts->val_type = val;
2045 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2048 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2049 TCGTemp *ts)
2051 int idx = temp_idx(ts);
2053 switch (ts->kind) {
2054 case TEMP_FIXED:
2055 case TEMP_GLOBAL:
2056 pstrcpy(buf, buf_size, ts->name);
2057 break;
2058 case TEMP_LOCAL:
2059 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2060 break;
2061 case TEMP_NORMAL:
2062 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2063 break;
2064 case TEMP_CONST:
2065 switch (ts->type) {
2066 case TCG_TYPE_I32:
2067 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2068 break;
2069 #if TCG_TARGET_REG_BITS > 32
2070 case TCG_TYPE_I64:
2071 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2072 break;
2073 #endif
2074 case TCG_TYPE_V64:
2075 case TCG_TYPE_V128:
2076 case TCG_TYPE_V256:
2077 snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2078 64 << (ts->type - TCG_TYPE_V64), ts->val);
2079 break;
2080 default:
2081 g_assert_not_reached();
2083 break;
2085 return buf;
2088 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2089 int buf_size, TCGArg arg)
2091 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2094 /* Find helper name. */
2095 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2097 const char *ret = NULL;
2098 if (helper_table) {
2099 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2100 if (info) {
2101 ret = info->name;
2104 return ret;
2107 static const char * const cond_name[] =
2109 [TCG_COND_NEVER] = "never",
2110 [TCG_COND_ALWAYS] = "always",
2111 [TCG_COND_EQ] = "eq",
2112 [TCG_COND_NE] = "ne",
2113 [TCG_COND_LT] = "lt",
2114 [TCG_COND_GE] = "ge",
2115 [TCG_COND_LE] = "le",
2116 [TCG_COND_GT] = "gt",
2117 [TCG_COND_LTU] = "ltu",
2118 [TCG_COND_GEU] = "geu",
2119 [TCG_COND_LEU] = "leu",
2120 [TCG_COND_GTU] = "gtu"
2123 static const char * const ldst_name[] =
2125 [MO_UB] = "ub",
2126 [MO_SB] = "sb",
2127 [MO_LEUW] = "leuw",
2128 [MO_LESW] = "lesw",
2129 [MO_LEUL] = "leul",
2130 [MO_LESL] = "lesl",
2131 [MO_LEQ] = "leq",
2132 [MO_BEUW] = "beuw",
2133 [MO_BESW] = "besw",
2134 [MO_BEUL] = "beul",
2135 [MO_BESL] = "besl",
2136 [MO_BEQ] = "beq",
2139 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2140 #ifdef TARGET_ALIGNED_ONLY
2141 [MO_UNALN >> MO_ASHIFT] = "un+",
2142 [MO_ALIGN >> MO_ASHIFT] = "",
2143 #else
2144 [MO_UNALN >> MO_ASHIFT] = "",
2145 [MO_ALIGN >> MO_ASHIFT] = "al+",
2146 #endif
2147 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
2148 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
2149 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
2150 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2151 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2152 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2155 static inline bool tcg_regset_single(TCGRegSet d)
2157 return (d & (d - 1)) == 0;
2160 static inline TCGReg tcg_regset_first(TCGRegSet d)
2162 if (TCG_TARGET_NB_REGS <= 32) {
2163 return ctz32(d);
2164 } else {
2165 return ctz64(d);
2169 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2171 char buf[128];
2172 TCGOp *op;
2174 QTAILQ_FOREACH(op, &s->ops, link) {
2175 int i, k, nb_oargs, nb_iargs, nb_cargs;
2176 const TCGOpDef *def;
2177 TCGOpcode c;
2178 int col = 0;
2180 c = op->opc;
2181 def = &tcg_op_defs[c];
2183 if (c == INDEX_op_insn_start) {
2184 nb_oargs = 0;
2185 col += qemu_log("\n ----");
2187 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2188 target_ulong a;
2189 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2190 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2191 #else
2192 a = op->args[i];
2193 #endif
2194 col += qemu_log(" " TARGET_FMT_lx, a);
2196 } else if (c == INDEX_op_call) {
2197 /* variable number of arguments */
2198 nb_oargs = TCGOP_CALLO(op);
2199 nb_iargs = TCGOP_CALLI(op);
2200 nb_cargs = def->nb_cargs;
2202 /* function name, flags, out args */
2203 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2204 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2205 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2206 for (i = 0; i < nb_oargs; i++) {
2207 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2208 op->args[i]));
2210 for (i = 0; i < nb_iargs; i++) {
2211 TCGArg arg = op->args[nb_oargs + i];
2212 const char *t = "<dummy>";
2213 if (arg != TCG_CALL_DUMMY_ARG) {
2214 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2216 col += qemu_log(",%s", t);
2218 } else {
2219 col += qemu_log(" %s ", def->name);
2221 nb_oargs = def->nb_oargs;
2222 nb_iargs = def->nb_iargs;
2223 nb_cargs = def->nb_cargs;
2225 if (def->flags & TCG_OPF_VECTOR) {
2226 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2227 8 << TCGOP_VECE(op));
2230 k = 0;
2231 for (i = 0; i < nb_oargs; i++) {
2232 if (k != 0) {
2233 col += qemu_log(",");
2235 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2236 op->args[k++]));
2238 for (i = 0; i < nb_iargs; i++) {
2239 if (k != 0) {
2240 col += qemu_log(",");
2242 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2243 op->args[k++]));
2245 switch (c) {
2246 case INDEX_op_brcond_i32:
2247 case INDEX_op_setcond_i32:
2248 case INDEX_op_movcond_i32:
2249 case INDEX_op_brcond2_i32:
2250 case INDEX_op_setcond2_i32:
2251 case INDEX_op_brcond_i64:
2252 case INDEX_op_setcond_i64:
2253 case INDEX_op_movcond_i64:
2254 case INDEX_op_cmp_vec:
2255 case INDEX_op_cmpsel_vec:
2256 if (op->args[k] < ARRAY_SIZE(cond_name)
2257 && cond_name[op->args[k]]) {
2258 col += qemu_log(",%s", cond_name[op->args[k++]]);
2259 } else {
2260 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2262 i = 1;
2263 break;
2264 case INDEX_op_qemu_ld_i32:
2265 case INDEX_op_qemu_st_i32:
2266 case INDEX_op_qemu_st8_i32:
2267 case INDEX_op_qemu_ld_i64:
2268 case INDEX_op_qemu_st_i64:
2270 TCGMemOpIdx oi = op->args[k++];
2271 MemOp op = get_memop(oi);
2272 unsigned ix = get_mmuidx(oi);
2274 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2275 col += qemu_log(",$0x%x,%u", op, ix);
2276 } else {
2277 const char *s_al, *s_op;
2278 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2279 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2280 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2282 i = 1;
2284 break;
2285 default:
2286 i = 0;
2287 break;
2289 switch (c) {
2290 case INDEX_op_set_label:
2291 case INDEX_op_br:
2292 case INDEX_op_brcond_i32:
2293 case INDEX_op_brcond_i64:
2294 case INDEX_op_brcond2_i32:
2295 col += qemu_log("%s$L%d", k ? "," : "",
2296 arg_label(op->args[k])->id);
2297 i++, k++;
2298 break;
2299 default:
2300 break;
2302 for (; i < nb_cargs; i++, k++) {
2303 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2307 if (have_prefs || op->life) {
2309 QemuLogFile *logfile;
2311 rcu_read_lock();
2312 logfile = qatomic_rcu_read(&qemu_logfile);
2313 if (logfile) {
2314 for (; col < 40; ++col) {
2315 putc(' ', logfile->fd);
2318 rcu_read_unlock();
2321 if (op->life) {
2322 unsigned life = op->life;
2324 if (life & (SYNC_ARG * 3)) {
2325 qemu_log(" sync:");
2326 for (i = 0; i < 2; ++i) {
2327 if (life & (SYNC_ARG << i)) {
2328 qemu_log(" %d", i);
2332 life /= DEAD_ARG;
2333 if (life) {
2334 qemu_log(" dead:");
2335 for (i = 0; life; ++i, life >>= 1) {
2336 if (life & 1) {
2337 qemu_log(" %d", i);
2343 if (have_prefs) {
2344 for (i = 0; i < nb_oargs; ++i) {
2345 TCGRegSet set = op->output_pref[i];
2347 if (i == 0) {
2348 qemu_log(" pref=");
2349 } else {
2350 qemu_log(",");
2352 if (set == 0) {
2353 qemu_log("none");
2354 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2355 qemu_log("all");
2356 #ifdef CONFIG_DEBUG_TCG
2357 } else if (tcg_regset_single(set)) {
2358 TCGReg reg = tcg_regset_first(set);
2359 qemu_log("%s", tcg_target_reg_names[reg]);
2360 #endif
2361 } else if (TCG_TARGET_NB_REGS <= 32) {
2362 qemu_log("%#x", (uint32_t)set);
2363 } else {
2364 qemu_log("%#" PRIx64, (uint64_t)set);
2369 qemu_log("\n");
2373 /* we give more priority to constraints with less registers */
2374 static int get_constraint_priority(const TCGOpDef *def, int k)
2376 const TCGArgConstraint *arg_ct = &def->args_ct[k];
2377 int n;
2379 if (arg_ct->oalias) {
2380 /* an alias is equivalent to a single register */
2381 n = 1;
2382 } else {
2383 n = ctpop64(arg_ct->regs);
2385 return TCG_TARGET_NB_REGS - n + 1;
2388 /* sort from highest priority to lowest */
2389 static void sort_constraints(TCGOpDef *def, int start, int n)
2391 int i, j;
2392 TCGArgConstraint *a = def->args_ct;
2394 for (i = 0; i < n; i++) {
2395 a[start + i].sort_index = start + i;
2397 if (n <= 1) {
2398 return;
2400 for (i = 0; i < n - 1; i++) {
2401 for (j = i + 1; j < n; j++) {
2402 int p1 = get_constraint_priority(def, a[start + i].sort_index);
2403 int p2 = get_constraint_priority(def, a[start + j].sort_index);
2404 if (p1 < p2) {
2405 int tmp = a[start + i].sort_index;
2406 a[start + i].sort_index = a[start + j].sort_index;
2407 a[start + j].sort_index = tmp;
2413 static void process_op_defs(TCGContext *s)
2415 TCGOpcode op;
2417 for (op = 0; op < NB_OPS; op++) {
2418 TCGOpDef *def = &tcg_op_defs[op];
2419 const TCGTargetOpDef *tdefs;
2420 int i, nb_args;
2422 if (def->flags & TCG_OPF_NOT_PRESENT) {
2423 continue;
2426 nb_args = def->nb_iargs + def->nb_oargs;
2427 if (nb_args == 0) {
2428 continue;
2431 tdefs = tcg_target_op_def(op);
2432 /* Missing TCGTargetOpDef entry. */
2433 tcg_debug_assert(tdefs != NULL);
2435 for (i = 0; i < nb_args; i++) {
2436 const char *ct_str = tdefs->args_ct_str[i];
2437 /* Incomplete TCGTargetOpDef entry. */
2438 tcg_debug_assert(ct_str != NULL);
2440 while (*ct_str != '\0') {
2441 switch(*ct_str) {
2442 case '0' ... '9':
2444 int oarg = *ct_str - '0';
2445 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2446 tcg_debug_assert(oarg < def->nb_oargs);
2447 tcg_debug_assert(def->args_ct[oarg].regs != 0);
2448 def->args_ct[i] = def->args_ct[oarg];
2449 /* The output sets oalias. */
2450 def->args_ct[oarg].oalias = true;
2451 def->args_ct[oarg].alias_index = i;
2452 /* The input sets ialias. */
2453 def->args_ct[i].ialias = true;
2454 def->args_ct[i].alias_index = oarg;
2456 ct_str++;
2457 break;
2458 case '&':
2459 def->args_ct[i].newreg = true;
2460 ct_str++;
2461 break;
2462 case 'i':
2463 def->args_ct[i].ct |= TCG_CT_CONST;
2464 ct_str++;
2465 break;
2467 #ifdef TCG_TARGET_CON_STR_H
2468 /* Include all of the target-specific constraints. */
2470 #undef CONST
2471 #define CONST(CASE, MASK) \
2472 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2473 #define REGS(CASE, MASK) \
2474 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2476 #include "tcg-target-con-str.h"
2478 #undef REGS
2479 #undef CONST
2480 default:
2481 /* Typo in TCGTargetOpDef constraint. */
2482 g_assert_not_reached();
2483 #else
2484 default:
2486 TCGType type = (def->flags & TCG_OPF_64BIT
2487 ? TCG_TYPE_I64 : TCG_TYPE_I32);
2488 ct_str = target_parse_constraint(&def->args_ct[i],
2489 ct_str, type);
2490 /* Typo in TCGTargetOpDef constraint. */
2491 tcg_debug_assert(ct_str != NULL);
2493 #endif
2498 /* TCGTargetOpDef entry with too much information? */
2499 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2501 /* sort the constraints (XXX: this is just an heuristic) */
2502 sort_constraints(def, 0, def->nb_oargs);
2503 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2507 void tcg_op_remove(TCGContext *s, TCGOp *op)
2509 TCGLabel *label;
2511 switch (op->opc) {
2512 case INDEX_op_br:
2513 label = arg_label(op->args[0]);
2514 label->refs--;
2515 break;
2516 case INDEX_op_brcond_i32:
2517 case INDEX_op_brcond_i64:
2518 label = arg_label(op->args[3]);
2519 label->refs--;
2520 break;
2521 case INDEX_op_brcond2_i32:
2522 label = arg_label(op->args[5]);
2523 label->refs--;
2524 break;
2525 default:
2526 break;
2529 QTAILQ_REMOVE(&s->ops, op, link);
2530 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2531 s->nb_ops--;
2533 #ifdef CONFIG_PROFILER
2534 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2535 #endif
2538 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2540 TCGContext *s = tcg_ctx;
2541 TCGOp *op;
2543 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2544 op = tcg_malloc(sizeof(TCGOp));
2545 } else {
2546 op = QTAILQ_FIRST(&s->free_ops);
2547 QTAILQ_REMOVE(&s->free_ops, op, link);
2549 memset(op, 0, offsetof(TCGOp, link));
2550 op->opc = opc;
2551 s->nb_ops++;
2553 return op;
2556 TCGOp *tcg_emit_op(TCGOpcode opc)
2558 TCGOp *op = tcg_op_alloc(opc);
2559 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2560 return op;
2563 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2565 TCGOp *new_op = tcg_op_alloc(opc);
2566 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2567 return new_op;
2570 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2572 TCGOp *new_op = tcg_op_alloc(opc);
2573 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2574 return new_op;
2577 /* Reachable analysis : remove unreachable code. */
2578 static void reachable_code_pass(TCGContext *s)
2580 TCGOp *op, *op_next;
2581 bool dead = false;
2583 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2584 bool remove = dead;
2585 TCGLabel *label;
2586 int call_flags;
2588 switch (op->opc) {
2589 case INDEX_op_set_label:
2590 label = arg_label(op->args[0]);
2591 if (label->refs == 0) {
2593 * While there is an occasional backward branch, virtually
2594 * all branches generated by the translators are forward.
2595 * Which means that generally we will have already removed
2596 * all references to the label that will be, and there is
2597 * little to be gained by iterating.
2599 remove = true;
2600 } else {
2601 /* Once we see a label, insns become live again. */
2602 dead = false;
2603 remove = false;
2606 * Optimization can fold conditional branches to unconditional.
2607 * If we find a label with one reference which is preceded by
2608 * an unconditional branch to it, remove both. This needed to
2609 * wait until the dead code in between them was removed.
2611 if (label->refs == 1) {
2612 TCGOp *op_prev = QTAILQ_PREV(op, link);
2613 if (op_prev->opc == INDEX_op_br &&
2614 label == arg_label(op_prev->args[0])) {
2615 tcg_op_remove(s, op_prev);
2616 remove = true;
2620 break;
2622 case INDEX_op_br:
2623 case INDEX_op_exit_tb:
2624 case INDEX_op_goto_ptr:
2625 /* Unconditional branches; everything following is dead. */
2626 dead = true;
2627 break;
2629 case INDEX_op_call:
2630 /* Notice noreturn helper calls, raising exceptions. */
2631 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2632 if (call_flags & TCG_CALL_NO_RETURN) {
2633 dead = true;
2635 break;
2637 case INDEX_op_insn_start:
2638 /* Never remove -- we need to keep these for unwind. */
2639 remove = false;
2640 break;
2642 default:
2643 break;
2646 if (remove) {
2647 tcg_op_remove(s, op);
2652 #define TS_DEAD 1
2653 #define TS_MEM 2
2655 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2656 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2658 /* For liveness_pass_1, the register preferences for a given temp. */
2659 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2661 return ts->state_ptr;
2664 /* For liveness_pass_1, reset the preferences for a given temp to the
2665 * maximal regset for its type.
2667 static inline void la_reset_pref(TCGTemp *ts)
2669 *la_temp_pref(ts)
2670 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2673 /* liveness analysis: end of function: all temps are dead, and globals
2674 should be in memory. */
2675 static void la_func_end(TCGContext *s, int ng, int nt)
2677 int i;
2679 for (i = 0; i < ng; ++i) {
2680 s->temps[i].state = TS_DEAD | TS_MEM;
2681 la_reset_pref(&s->temps[i]);
2683 for (i = ng; i < nt; ++i) {
2684 s->temps[i].state = TS_DEAD;
2685 la_reset_pref(&s->temps[i]);
2689 /* liveness analysis: end of basic block: all temps are dead, globals
2690 and local temps should be in memory. */
2691 static void la_bb_end(TCGContext *s, int ng, int nt)
2693 int i;
2695 for (i = 0; i < nt; ++i) {
2696 TCGTemp *ts = &s->temps[i];
2697 int state;
2699 switch (ts->kind) {
2700 case TEMP_FIXED:
2701 case TEMP_GLOBAL:
2702 case TEMP_LOCAL:
2703 state = TS_DEAD | TS_MEM;
2704 break;
2705 case TEMP_NORMAL:
2706 case TEMP_CONST:
2707 state = TS_DEAD;
2708 break;
2709 default:
2710 g_assert_not_reached();
2712 ts->state = state;
2713 la_reset_pref(ts);
2717 /* liveness analysis: sync globals back to memory. */
2718 static void la_global_sync(TCGContext *s, int ng)
2720 int i;
2722 for (i = 0; i < ng; ++i) {
2723 int state = s->temps[i].state;
2724 s->temps[i].state = state | TS_MEM;
2725 if (state == TS_DEAD) {
2726 /* If the global was previously dead, reset prefs. */
2727 la_reset_pref(&s->temps[i]);
2733 * liveness analysis: conditional branch: all temps are dead,
2734 * globals and local temps should be synced.
2736 static void la_bb_sync(TCGContext *s, int ng, int nt)
2738 la_global_sync(s, ng);
2740 for (int i = ng; i < nt; ++i) {
2741 TCGTemp *ts = &s->temps[i];
2742 int state;
2744 switch (ts->kind) {
2745 case TEMP_LOCAL:
2746 state = ts->state;
2747 ts->state = state | TS_MEM;
2748 if (state != TS_DEAD) {
2749 continue;
2751 break;
2752 case TEMP_NORMAL:
2753 s->temps[i].state = TS_DEAD;
2754 break;
2755 case TEMP_CONST:
2756 continue;
2757 default:
2758 g_assert_not_reached();
2760 la_reset_pref(&s->temps[i]);
2764 /* liveness analysis: sync globals back to memory and kill. */
2765 static void la_global_kill(TCGContext *s, int ng)
2767 int i;
2769 for (i = 0; i < ng; i++) {
2770 s->temps[i].state = TS_DEAD | TS_MEM;
2771 la_reset_pref(&s->temps[i]);
2775 /* liveness analysis: note live globals crossing calls. */
2776 static void la_cross_call(TCGContext *s, int nt)
2778 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2779 int i;
2781 for (i = 0; i < nt; i++) {
2782 TCGTemp *ts = &s->temps[i];
2783 if (!(ts->state & TS_DEAD)) {
2784 TCGRegSet *pset = la_temp_pref(ts);
2785 TCGRegSet set = *pset;
2787 set &= mask;
2788 /* If the combination is not possible, restart. */
2789 if (set == 0) {
2790 set = tcg_target_available_regs[ts->type] & mask;
2792 *pset = set;
2797 /* Liveness analysis : update the opc_arg_life array to tell if a
2798 given input arguments is dead. Instructions updating dead
2799 temporaries are removed. */
2800 static void liveness_pass_1(TCGContext *s)
2802 int nb_globals = s->nb_globals;
2803 int nb_temps = s->nb_temps;
2804 TCGOp *op, *op_prev;
2805 TCGRegSet *prefs;
2806 int i;
2808 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2809 for (i = 0; i < nb_temps; ++i) {
2810 s->temps[i].state_ptr = prefs + i;
2813 /* ??? Should be redundant with the exit_tb that ends the TB. */
2814 la_func_end(s, nb_globals, nb_temps);
2816 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2817 int nb_iargs, nb_oargs;
2818 TCGOpcode opc_new, opc_new2;
2819 bool have_opc_new2;
2820 TCGLifeData arg_life = 0;
2821 TCGTemp *ts;
2822 TCGOpcode opc = op->opc;
2823 const TCGOpDef *def = &tcg_op_defs[opc];
2825 switch (opc) {
2826 case INDEX_op_call:
2828 int call_flags;
2829 int nb_call_regs;
2831 nb_oargs = TCGOP_CALLO(op);
2832 nb_iargs = TCGOP_CALLI(op);
2833 call_flags = op->args[nb_oargs + nb_iargs + 1];
2835 /* pure functions can be removed if their result is unused */
2836 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2837 for (i = 0; i < nb_oargs; i++) {
2838 ts = arg_temp(op->args[i]);
2839 if (ts->state != TS_DEAD) {
2840 goto do_not_remove_call;
2843 goto do_remove;
2845 do_not_remove_call:
2847 /* Output args are dead. */
2848 for (i = 0; i < nb_oargs; i++) {
2849 ts = arg_temp(op->args[i]);
2850 if (ts->state & TS_DEAD) {
2851 arg_life |= DEAD_ARG << i;
2853 if (ts->state & TS_MEM) {
2854 arg_life |= SYNC_ARG << i;
2856 ts->state = TS_DEAD;
2857 la_reset_pref(ts);
2859 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2860 op->output_pref[i] = 0;
2863 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2864 TCG_CALL_NO_READ_GLOBALS))) {
2865 la_global_kill(s, nb_globals);
2866 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2867 la_global_sync(s, nb_globals);
2870 /* Record arguments that die in this helper. */
2871 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2872 ts = arg_temp(op->args[i]);
2873 if (ts && ts->state & TS_DEAD) {
2874 arg_life |= DEAD_ARG << i;
2878 /* For all live registers, remove call-clobbered prefs. */
2879 la_cross_call(s, nb_temps);
2881 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2883 /* Input arguments are live for preceding opcodes. */
2884 for (i = 0; i < nb_iargs; i++) {
2885 ts = arg_temp(op->args[i + nb_oargs]);
2886 if (ts && ts->state & TS_DEAD) {
2887 /* For those arguments that die, and will be allocated
2888 * in registers, clear the register set for that arg,
2889 * to be filled in below. For args that will be on
2890 * the stack, reset to any available reg.
2892 *la_temp_pref(ts)
2893 = (i < nb_call_regs ? 0 :
2894 tcg_target_available_regs[ts->type]);
2895 ts->state &= ~TS_DEAD;
2899 /* For each input argument, add its input register to prefs.
2900 If a temp is used once, this produces a single set bit. */
2901 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2902 ts = arg_temp(op->args[i + nb_oargs]);
2903 if (ts) {
2904 tcg_regset_set_reg(*la_temp_pref(ts),
2905 tcg_target_call_iarg_regs[i]);
2909 break;
2910 case INDEX_op_insn_start:
2911 break;
2912 case INDEX_op_discard:
2913 /* mark the temporary as dead */
2914 ts = arg_temp(op->args[0]);
2915 ts->state = TS_DEAD;
2916 la_reset_pref(ts);
2917 break;
2919 case INDEX_op_add2_i32:
2920 opc_new = INDEX_op_add_i32;
2921 goto do_addsub2;
2922 case INDEX_op_sub2_i32:
2923 opc_new = INDEX_op_sub_i32;
2924 goto do_addsub2;
2925 case INDEX_op_add2_i64:
2926 opc_new = INDEX_op_add_i64;
2927 goto do_addsub2;
2928 case INDEX_op_sub2_i64:
2929 opc_new = INDEX_op_sub_i64;
2930 do_addsub2:
2931 nb_iargs = 4;
2932 nb_oargs = 2;
2933 /* Test if the high part of the operation is dead, but not
2934 the low part. The result can be optimized to a simple
2935 add or sub. This happens often for x86_64 guest when the
2936 cpu mode is set to 32 bit. */
2937 if (arg_temp(op->args[1])->state == TS_DEAD) {
2938 if (arg_temp(op->args[0])->state == TS_DEAD) {
2939 goto do_remove;
2941 /* Replace the opcode and adjust the args in place,
2942 leaving 3 unused args at the end. */
2943 op->opc = opc = opc_new;
2944 op->args[1] = op->args[2];
2945 op->args[2] = op->args[4];
2946 /* Fall through and mark the single-word operation live. */
2947 nb_iargs = 2;
2948 nb_oargs = 1;
2950 goto do_not_remove;
2952 case INDEX_op_mulu2_i32:
2953 opc_new = INDEX_op_mul_i32;
2954 opc_new2 = INDEX_op_muluh_i32;
2955 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2956 goto do_mul2;
2957 case INDEX_op_muls2_i32:
2958 opc_new = INDEX_op_mul_i32;
2959 opc_new2 = INDEX_op_mulsh_i32;
2960 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2961 goto do_mul2;
2962 case INDEX_op_mulu2_i64:
2963 opc_new = INDEX_op_mul_i64;
2964 opc_new2 = INDEX_op_muluh_i64;
2965 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2966 goto do_mul2;
2967 case INDEX_op_muls2_i64:
2968 opc_new = INDEX_op_mul_i64;
2969 opc_new2 = INDEX_op_mulsh_i64;
2970 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2971 goto do_mul2;
2972 do_mul2:
2973 nb_iargs = 2;
2974 nb_oargs = 2;
2975 if (arg_temp(op->args[1])->state == TS_DEAD) {
2976 if (arg_temp(op->args[0])->state == TS_DEAD) {
2977 /* Both parts of the operation are dead. */
2978 goto do_remove;
2980 /* The high part of the operation is dead; generate the low. */
2981 op->opc = opc = opc_new;
2982 op->args[1] = op->args[2];
2983 op->args[2] = op->args[3];
2984 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2985 /* The low part of the operation is dead; generate the high. */
2986 op->opc = opc = opc_new2;
2987 op->args[0] = op->args[1];
2988 op->args[1] = op->args[2];
2989 op->args[2] = op->args[3];
2990 } else {
2991 goto do_not_remove;
2993 /* Mark the single-word operation live. */
2994 nb_oargs = 1;
2995 goto do_not_remove;
2997 default:
2998 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2999 nb_iargs = def->nb_iargs;
3000 nb_oargs = def->nb_oargs;
3002 /* Test if the operation can be removed because all
3003 its outputs are dead. We assume that nb_oargs == 0
3004 implies side effects */
3005 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3006 for (i = 0; i < nb_oargs; i++) {
3007 if (arg_temp(op->args[i])->state != TS_DEAD) {
3008 goto do_not_remove;
3011 goto do_remove;
3013 goto do_not_remove;
3015 do_remove:
3016 tcg_op_remove(s, op);
3017 break;
3019 do_not_remove:
3020 for (i = 0; i < nb_oargs; i++) {
3021 ts = arg_temp(op->args[i]);
3023 /* Remember the preference of the uses that followed. */
3024 op->output_pref[i] = *la_temp_pref(ts);
3026 /* Output args are dead. */
3027 if (ts->state & TS_DEAD) {
3028 arg_life |= DEAD_ARG << i;
3030 if (ts->state & TS_MEM) {
3031 arg_life |= SYNC_ARG << i;
3033 ts->state = TS_DEAD;
3034 la_reset_pref(ts);
3037 /* If end of basic block, update. */
3038 if (def->flags & TCG_OPF_BB_EXIT) {
3039 la_func_end(s, nb_globals, nb_temps);
3040 } else if (def->flags & TCG_OPF_COND_BRANCH) {
3041 la_bb_sync(s, nb_globals, nb_temps);
3042 } else if (def->flags & TCG_OPF_BB_END) {
3043 la_bb_end(s, nb_globals, nb_temps);
3044 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3045 la_global_sync(s, nb_globals);
3046 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3047 la_cross_call(s, nb_temps);
3051 /* Record arguments that die in this opcode. */
3052 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3053 ts = arg_temp(op->args[i]);
3054 if (ts->state & TS_DEAD) {
3055 arg_life |= DEAD_ARG << i;
3059 /* Input arguments are live for preceding opcodes. */
3060 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3061 ts = arg_temp(op->args[i]);
3062 if (ts->state & TS_DEAD) {
3063 /* For operands that were dead, initially allow
3064 all regs for the type. */
3065 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3066 ts->state &= ~TS_DEAD;
3070 /* Incorporate constraints for this operand. */
3071 switch (opc) {
3072 case INDEX_op_mov_i32:
3073 case INDEX_op_mov_i64:
3074 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3075 have proper constraints. That said, special case
3076 moves to propagate preferences backward. */
3077 if (IS_DEAD_ARG(1)) {
3078 *la_temp_pref(arg_temp(op->args[0]))
3079 = *la_temp_pref(arg_temp(op->args[1]));
3081 break;
3083 default:
3084 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3085 const TCGArgConstraint *ct = &def->args_ct[i];
3086 TCGRegSet set, *pset;
3088 ts = arg_temp(op->args[i]);
3089 pset = la_temp_pref(ts);
3090 set = *pset;
3092 set &= ct->regs;
3093 if (ct->ialias) {
3094 set &= op->output_pref[ct->alias_index];
3096 /* If the combination is not possible, restart. */
3097 if (set == 0) {
3098 set = ct->regs;
3100 *pset = set;
3102 break;
3104 break;
3106 op->life = arg_life;
3110 /* Liveness analysis: Convert indirect regs to direct temporaries. */
3111 static bool liveness_pass_2(TCGContext *s)
3113 int nb_globals = s->nb_globals;
3114 int nb_temps, i;
3115 bool changes = false;
3116 TCGOp *op, *op_next;
3118 /* Create a temporary for each indirect global. */
3119 for (i = 0; i < nb_globals; ++i) {
3120 TCGTemp *its = &s->temps[i];
3121 if (its->indirect_reg) {
3122 TCGTemp *dts = tcg_temp_alloc(s);
3123 dts->type = its->type;
3124 dts->base_type = its->base_type;
3125 its->state_ptr = dts;
3126 } else {
3127 its->state_ptr = NULL;
3129 /* All globals begin dead. */
3130 its->state = TS_DEAD;
3132 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3133 TCGTemp *its = &s->temps[i];
3134 its->state_ptr = NULL;
3135 its->state = TS_DEAD;
3138 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3139 TCGOpcode opc = op->opc;
3140 const TCGOpDef *def = &tcg_op_defs[opc];
3141 TCGLifeData arg_life = op->life;
3142 int nb_iargs, nb_oargs, call_flags;
3143 TCGTemp *arg_ts, *dir_ts;
3145 if (opc == INDEX_op_call) {
3146 nb_oargs = TCGOP_CALLO(op);
3147 nb_iargs = TCGOP_CALLI(op);
3148 call_flags = op->args[nb_oargs + nb_iargs + 1];
3149 } else {
3150 nb_iargs = def->nb_iargs;
3151 nb_oargs = def->nb_oargs;
3153 /* Set flags similar to how calls require. */
3154 if (def->flags & TCG_OPF_COND_BRANCH) {
3155 /* Like reading globals: sync_globals */
3156 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3157 } else if (def->flags & TCG_OPF_BB_END) {
3158 /* Like writing globals: save_globals */
3159 call_flags = 0;
3160 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3161 /* Like reading globals: sync_globals */
3162 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3163 } else {
3164 /* No effect on globals. */
3165 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3166 TCG_CALL_NO_WRITE_GLOBALS);
3170 /* Make sure that input arguments are available. */
3171 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3172 arg_ts = arg_temp(op->args[i]);
3173 if (arg_ts) {
3174 dir_ts = arg_ts->state_ptr;
3175 if (dir_ts && arg_ts->state == TS_DEAD) {
3176 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3177 ? INDEX_op_ld_i32
3178 : INDEX_op_ld_i64);
3179 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3181 lop->args[0] = temp_arg(dir_ts);
3182 lop->args[1] = temp_arg(arg_ts->mem_base);
3183 lop->args[2] = arg_ts->mem_offset;
3185 /* Loaded, but synced with memory. */
3186 arg_ts->state = TS_MEM;
3191 /* Perform input replacement, and mark inputs that became dead.
3192 No action is required except keeping temp_state up to date
3193 so that we reload when needed. */
3194 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3195 arg_ts = arg_temp(op->args[i]);
3196 if (arg_ts) {
3197 dir_ts = arg_ts->state_ptr;
3198 if (dir_ts) {
3199 op->args[i] = temp_arg(dir_ts);
3200 changes = true;
3201 if (IS_DEAD_ARG(i)) {
3202 arg_ts->state = TS_DEAD;
3208 /* Liveness analysis should ensure that the following are
3209 all correct, for call sites and basic block end points. */
3210 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3211 /* Nothing to do */
3212 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3213 for (i = 0; i < nb_globals; ++i) {
3214 /* Liveness should see that globals are synced back,
3215 that is, either TS_DEAD or TS_MEM. */
3216 arg_ts = &s->temps[i];
3217 tcg_debug_assert(arg_ts->state_ptr == 0
3218 || arg_ts->state != 0);
3220 } else {
3221 for (i = 0; i < nb_globals; ++i) {
3222 /* Liveness should see that globals are saved back,
3223 that is, TS_DEAD, waiting to be reloaded. */
3224 arg_ts = &s->temps[i];
3225 tcg_debug_assert(arg_ts->state_ptr == 0
3226 || arg_ts->state == TS_DEAD);
3230 /* Outputs become available. */
3231 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3232 arg_ts = arg_temp(op->args[0]);
3233 dir_ts = arg_ts->state_ptr;
3234 if (dir_ts) {
3235 op->args[0] = temp_arg(dir_ts);
3236 changes = true;
3238 /* The output is now live and modified. */
3239 arg_ts->state = 0;
3241 if (NEED_SYNC_ARG(0)) {
3242 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3243 ? INDEX_op_st_i32
3244 : INDEX_op_st_i64);
3245 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3246 TCGTemp *out_ts = dir_ts;
3248 if (IS_DEAD_ARG(0)) {
3249 out_ts = arg_temp(op->args[1]);
3250 arg_ts->state = TS_DEAD;
3251 tcg_op_remove(s, op);
3252 } else {
3253 arg_ts->state = TS_MEM;
3256 sop->args[0] = temp_arg(out_ts);
3257 sop->args[1] = temp_arg(arg_ts->mem_base);
3258 sop->args[2] = arg_ts->mem_offset;
3259 } else {
3260 tcg_debug_assert(!IS_DEAD_ARG(0));
3263 } else {
3264 for (i = 0; i < nb_oargs; i++) {
3265 arg_ts = arg_temp(op->args[i]);
3266 dir_ts = arg_ts->state_ptr;
3267 if (!dir_ts) {
3268 continue;
3270 op->args[i] = temp_arg(dir_ts);
3271 changes = true;
3273 /* The output is now live and modified. */
3274 arg_ts->state = 0;
3276 /* Sync outputs upon their last write. */
3277 if (NEED_SYNC_ARG(i)) {
3278 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3279 ? INDEX_op_st_i32
3280 : INDEX_op_st_i64);
3281 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3283 sop->args[0] = temp_arg(dir_ts);
3284 sop->args[1] = temp_arg(arg_ts->mem_base);
3285 sop->args[2] = arg_ts->mem_offset;
3287 arg_ts->state = TS_MEM;
3289 /* Drop outputs that are dead. */
3290 if (IS_DEAD_ARG(i)) {
3291 arg_ts->state = TS_DEAD;
3297 return changes;
3300 #ifdef CONFIG_DEBUG_TCG
3301 static void dump_regs(TCGContext *s)
3303 TCGTemp *ts;
3304 int i;
3305 char buf[64];
3307 for(i = 0; i < s->nb_temps; i++) {
3308 ts = &s->temps[i];
3309 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3310 switch(ts->val_type) {
3311 case TEMP_VAL_REG:
3312 printf("%s", tcg_target_reg_names[ts->reg]);
3313 break;
3314 case TEMP_VAL_MEM:
3315 printf("%d(%s)", (int)ts->mem_offset,
3316 tcg_target_reg_names[ts->mem_base->reg]);
3317 break;
3318 case TEMP_VAL_CONST:
3319 printf("$0x%" PRIx64, ts->val);
3320 break;
3321 case TEMP_VAL_DEAD:
3322 printf("D");
3323 break;
3324 default:
3325 printf("???");
3326 break;
3328 printf("\n");
3331 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3332 if (s->reg_to_temp[i] != NULL) {
3333 printf("%s: %s\n",
3334 tcg_target_reg_names[i],
3335 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3340 static void check_regs(TCGContext *s)
3342 int reg;
3343 int k;
3344 TCGTemp *ts;
3345 char buf[64];
3347 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3348 ts = s->reg_to_temp[reg];
3349 if (ts != NULL) {
3350 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3351 printf("Inconsistency for register %s:\n",
3352 tcg_target_reg_names[reg]);
3353 goto fail;
3357 for (k = 0; k < s->nb_temps; k++) {
3358 ts = &s->temps[k];
3359 if (ts->val_type == TEMP_VAL_REG
3360 && ts->kind != TEMP_FIXED
3361 && s->reg_to_temp[ts->reg] != ts) {
3362 printf("Inconsistency for temp %s:\n",
3363 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3364 fail:
3365 printf("reg state:\n");
3366 dump_regs(s);
3367 tcg_abort();
3371 #endif
3373 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3375 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3376 /* Sparc64 stack is accessed with offset of 2047 */
3377 s->current_frame_offset = (s->current_frame_offset +
3378 (tcg_target_long)sizeof(tcg_target_long) - 1) &
3379 ~(sizeof(tcg_target_long) - 1);
3380 #endif
3381 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3382 s->frame_end) {
3383 tcg_abort();
3385 ts->mem_offset = s->current_frame_offset;
3386 ts->mem_base = s->frame_temp;
3387 ts->mem_allocated = 1;
3388 s->current_frame_offset += sizeof(tcg_target_long);
3391 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3393 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3394 mark it free; otherwise mark it dead. */
3395 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3397 TCGTempVal new_type;
3399 switch (ts->kind) {
3400 case TEMP_FIXED:
3401 return;
3402 case TEMP_GLOBAL:
3403 case TEMP_LOCAL:
3404 new_type = TEMP_VAL_MEM;
3405 break;
3406 case TEMP_NORMAL:
3407 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3408 break;
3409 case TEMP_CONST:
3410 new_type = TEMP_VAL_CONST;
3411 break;
3412 default:
3413 g_assert_not_reached();
3415 if (ts->val_type == TEMP_VAL_REG) {
3416 s->reg_to_temp[ts->reg] = NULL;
3418 ts->val_type = new_type;
3421 /* Mark a temporary as dead. */
3422 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3424 temp_free_or_dead(s, ts, 1);
3427 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3428 registers needs to be allocated to store a constant. If 'free_or_dead'
3429 is non-zero, subsequently release the temporary; if it is positive, the
3430 temp is dead; if it is negative, the temp is free. */
3431 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3432 TCGRegSet preferred_regs, int free_or_dead)
3434 if (!temp_readonly(ts) && !ts->mem_coherent) {
3435 if (!ts->mem_allocated) {
3436 temp_allocate_frame(s, ts);
3438 switch (ts->val_type) {
3439 case TEMP_VAL_CONST:
3440 /* If we're going to free the temp immediately, then we won't
3441 require it later in a register, so attempt to store the
3442 constant to memory directly. */
3443 if (free_or_dead
3444 && tcg_out_sti(s, ts->type, ts->val,
3445 ts->mem_base->reg, ts->mem_offset)) {
3446 break;
3448 temp_load(s, ts, tcg_target_available_regs[ts->type],
3449 allocated_regs, preferred_regs);
3450 /* fallthrough */
3452 case TEMP_VAL_REG:
3453 tcg_out_st(s, ts->type, ts->reg,
3454 ts->mem_base->reg, ts->mem_offset);
3455 break;
3457 case TEMP_VAL_MEM:
3458 break;
3460 case TEMP_VAL_DEAD:
3461 default:
3462 tcg_abort();
3464 ts->mem_coherent = 1;
3466 if (free_or_dead) {
3467 temp_free_or_dead(s, ts, free_or_dead);
3471 /* free register 'reg' by spilling the corresponding temporary if necessary */
3472 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3474 TCGTemp *ts = s->reg_to_temp[reg];
3475 if (ts != NULL) {
3476 temp_sync(s, ts, allocated_regs, 0, -1);
3481 * tcg_reg_alloc:
3482 * @required_regs: Set of registers in which we must allocate.
3483 * @allocated_regs: Set of registers which must be avoided.
3484 * @preferred_regs: Set of registers we should prefer.
3485 * @rev: True if we search the registers in "indirect" order.
3487 * The allocated register must be in @required_regs & ~@allocated_regs,
3488 * but if we can put it in @preferred_regs we may save a move later.
3490 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3491 TCGRegSet allocated_regs,
3492 TCGRegSet preferred_regs, bool rev)
3494 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3495 TCGRegSet reg_ct[2];
3496 const int *order;
3498 reg_ct[1] = required_regs & ~allocated_regs;
3499 tcg_debug_assert(reg_ct[1] != 0);
3500 reg_ct[0] = reg_ct[1] & preferred_regs;
3502 /* Skip the preferred_regs option if it cannot be satisfied,
3503 or if the preference made no difference. */
3504 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3506 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3508 /* Try free registers, preferences first. */
3509 for (j = f; j < 2; j++) {
3510 TCGRegSet set = reg_ct[j];
3512 if (tcg_regset_single(set)) {
3513 /* One register in the set. */
3514 TCGReg reg = tcg_regset_first(set);
3515 if (s->reg_to_temp[reg] == NULL) {
3516 return reg;
3518 } else {
3519 for (i = 0; i < n; i++) {
3520 TCGReg reg = order[i];
3521 if (s->reg_to_temp[reg] == NULL &&
3522 tcg_regset_test_reg(set, reg)) {
3523 return reg;
3529 /* We must spill something. */
3530 for (j = f; j < 2; j++) {
3531 TCGRegSet set = reg_ct[j];
3533 if (tcg_regset_single(set)) {
3534 /* One register in the set. */
3535 TCGReg reg = tcg_regset_first(set);
3536 tcg_reg_free(s, reg, allocated_regs);
3537 return reg;
3538 } else {
3539 for (i = 0; i < n; i++) {
3540 TCGReg reg = order[i];
3541 if (tcg_regset_test_reg(set, reg)) {
3542 tcg_reg_free(s, reg, allocated_regs);
3543 return reg;
3549 tcg_abort();
3552 /* Make sure the temporary is in a register. If needed, allocate the register
3553 from DESIRED while avoiding ALLOCATED. */
3554 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3555 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3557 TCGReg reg;
3559 switch (ts->val_type) {
3560 case TEMP_VAL_REG:
3561 return;
3562 case TEMP_VAL_CONST:
3563 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3564 preferred_regs, ts->indirect_base);
3565 if (ts->type <= TCG_TYPE_I64) {
3566 tcg_out_movi(s, ts->type, reg, ts->val);
3567 } else {
3568 uint64_t val = ts->val;
3569 MemOp vece = MO_64;
3572 * Find the minimal vector element that matches the constant.
3573 * The targets will, in general, have to do this search anyway,
3574 * do this generically.
3576 if (val == dup_const(MO_8, val)) {
3577 vece = MO_8;
3578 } else if (val == dup_const(MO_16, val)) {
3579 vece = MO_16;
3580 } else if (val == dup_const(MO_32, val)) {
3581 vece = MO_32;
3584 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3586 ts->mem_coherent = 0;
3587 break;
3588 case TEMP_VAL_MEM:
3589 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3590 preferred_regs, ts->indirect_base);
3591 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3592 ts->mem_coherent = 1;
3593 break;
3594 case TEMP_VAL_DEAD:
3595 default:
3596 tcg_abort();
3598 ts->reg = reg;
3599 ts->val_type = TEMP_VAL_REG;
3600 s->reg_to_temp[reg] = ts;
3603 /* Save a temporary to memory. 'allocated_regs' is used in case a
3604 temporary registers needs to be allocated to store a constant. */
3605 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3607 /* The liveness analysis already ensures that globals are back
3608 in memory. Keep an tcg_debug_assert for safety. */
3609 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3612 /* save globals to their canonical location and assume they can be
3613 modified be the following code. 'allocated_regs' is used in case a
3614 temporary registers needs to be allocated to store a constant. */
3615 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3617 int i, n;
3619 for (i = 0, n = s->nb_globals; i < n; i++) {
3620 temp_save(s, &s->temps[i], allocated_regs);
3624 /* sync globals to their canonical location and assume they can be
3625 read by the following code. 'allocated_regs' is used in case a
3626 temporary registers needs to be allocated to store a constant. */
3627 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3629 int i, n;
3631 for (i = 0, n = s->nb_globals; i < n; i++) {
3632 TCGTemp *ts = &s->temps[i];
3633 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3634 || ts->kind == TEMP_FIXED
3635 || ts->mem_coherent);
3639 /* at the end of a basic block, we assume all temporaries are dead and
3640 all globals are stored at their canonical location. */
3641 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3643 int i;
3645 for (i = s->nb_globals; i < s->nb_temps; i++) {
3646 TCGTemp *ts = &s->temps[i];
3648 switch (ts->kind) {
3649 case TEMP_LOCAL:
3650 temp_save(s, ts, allocated_regs);
3651 break;
3652 case TEMP_NORMAL:
3653 /* The liveness analysis already ensures that temps are dead.
3654 Keep an tcg_debug_assert for safety. */
3655 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3656 break;
3657 case TEMP_CONST:
3658 /* Similarly, we should have freed any allocated register. */
3659 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3660 break;
3661 default:
3662 g_assert_not_reached();
3666 save_globals(s, allocated_regs);
3670 * At a conditional branch, we assume all temporaries are dead and
3671 * all globals and local temps are synced to their location.
3673 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3675 sync_globals(s, allocated_regs);
3677 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3678 TCGTemp *ts = &s->temps[i];
3680 * The liveness analysis already ensures that temps are dead.
3681 * Keep tcg_debug_asserts for safety.
3683 switch (ts->kind) {
3684 case TEMP_LOCAL:
3685 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3686 break;
3687 case TEMP_NORMAL:
3688 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3689 break;
3690 case TEMP_CONST:
3691 break;
3692 default:
3693 g_assert_not_reached();
3699 * Specialized code generation for INDEX_op_mov_* with a constant.
3701 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3702 tcg_target_ulong val, TCGLifeData arg_life,
3703 TCGRegSet preferred_regs)
3705 /* ENV should not be modified. */
3706 tcg_debug_assert(!temp_readonly(ots));
3708 /* The movi is not explicitly generated here. */
3709 if (ots->val_type == TEMP_VAL_REG) {
3710 s->reg_to_temp[ots->reg] = NULL;
3712 ots->val_type = TEMP_VAL_CONST;
3713 ots->val = val;
3714 ots->mem_coherent = 0;
3715 if (NEED_SYNC_ARG(0)) {
3716 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3717 } else if (IS_DEAD_ARG(0)) {
3718 temp_dead(s, ots);
3723 * Specialized code generation for INDEX_op_mov_*.
3725 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3727 const TCGLifeData arg_life = op->life;
3728 TCGRegSet allocated_regs, preferred_regs;
3729 TCGTemp *ts, *ots;
3730 TCGType otype, itype;
3732 allocated_regs = s->reserved_regs;
3733 preferred_regs = op->output_pref[0];
3734 ots = arg_temp(op->args[0]);
3735 ts = arg_temp(op->args[1]);
3737 /* ENV should not be modified. */
3738 tcg_debug_assert(!temp_readonly(ots));
3740 /* Note that otype != itype for no-op truncation. */
3741 otype = ots->type;
3742 itype = ts->type;
3744 if (ts->val_type == TEMP_VAL_CONST) {
3745 /* propagate constant or generate sti */
3746 tcg_target_ulong val = ts->val;
3747 if (IS_DEAD_ARG(1)) {
3748 temp_dead(s, ts);
3750 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3751 return;
3754 /* If the source value is in memory we're going to be forced
3755 to have it in a register in order to perform the copy. Copy
3756 the SOURCE value into its own register first, that way we
3757 don't have to reload SOURCE the next time it is used. */
3758 if (ts->val_type == TEMP_VAL_MEM) {
3759 temp_load(s, ts, tcg_target_available_regs[itype],
3760 allocated_regs, preferred_regs);
3763 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3764 if (IS_DEAD_ARG(0)) {
3765 /* mov to a non-saved dead register makes no sense (even with
3766 liveness analysis disabled). */
3767 tcg_debug_assert(NEED_SYNC_ARG(0));
3768 if (!ots->mem_allocated) {
3769 temp_allocate_frame(s, ots);
3771 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3772 if (IS_DEAD_ARG(1)) {
3773 temp_dead(s, ts);
3775 temp_dead(s, ots);
3776 } else {
3777 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3778 /* the mov can be suppressed */
3779 if (ots->val_type == TEMP_VAL_REG) {
3780 s->reg_to_temp[ots->reg] = NULL;
3782 ots->reg = ts->reg;
3783 temp_dead(s, ts);
3784 } else {
3785 if (ots->val_type != TEMP_VAL_REG) {
3786 /* When allocating a new register, make sure to not spill the
3787 input one. */
3788 tcg_regset_set_reg(allocated_regs, ts->reg);
3789 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3790 allocated_regs, preferred_regs,
3791 ots->indirect_base);
3793 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3795 * Cross register class move not supported.
3796 * Store the source register into the destination slot
3797 * and leave the destination temp as TEMP_VAL_MEM.
3799 assert(!temp_readonly(ots));
3800 if (!ts->mem_allocated) {
3801 temp_allocate_frame(s, ots);
3803 tcg_out_st(s, ts->type, ts->reg,
3804 ots->mem_base->reg, ots->mem_offset);
3805 ots->mem_coherent = 1;
3806 temp_free_or_dead(s, ots, -1);
3807 return;
3810 ots->val_type = TEMP_VAL_REG;
3811 ots->mem_coherent = 0;
3812 s->reg_to_temp[ots->reg] = ots;
3813 if (NEED_SYNC_ARG(0)) {
3814 temp_sync(s, ots, allocated_regs, 0, 0);
3820 * Specialized code generation for INDEX_op_dup_vec.
3822 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3824 const TCGLifeData arg_life = op->life;
3825 TCGRegSet dup_out_regs, dup_in_regs;
3826 TCGTemp *its, *ots;
3827 TCGType itype, vtype;
3828 intptr_t endian_fixup;
3829 unsigned vece;
3830 bool ok;
3832 ots = arg_temp(op->args[0]);
3833 its = arg_temp(op->args[1]);
3835 /* ENV should not be modified. */
3836 tcg_debug_assert(!temp_readonly(ots));
3838 itype = its->type;
3839 vece = TCGOP_VECE(op);
3840 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3842 if (its->val_type == TEMP_VAL_CONST) {
3843 /* Propagate constant via movi -> dupi. */
3844 tcg_target_ulong val = its->val;
3845 if (IS_DEAD_ARG(1)) {
3846 temp_dead(s, its);
3848 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3849 return;
3852 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3853 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3855 /* Allocate the output register now. */
3856 if (ots->val_type != TEMP_VAL_REG) {
3857 TCGRegSet allocated_regs = s->reserved_regs;
3859 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3860 /* Make sure to not spill the input register. */
3861 tcg_regset_set_reg(allocated_regs, its->reg);
3863 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3864 op->output_pref[0], ots->indirect_base);
3865 ots->val_type = TEMP_VAL_REG;
3866 ots->mem_coherent = 0;
3867 s->reg_to_temp[ots->reg] = ots;
3870 switch (its->val_type) {
3871 case TEMP_VAL_REG:
3873 * The dup constriaints must be broad, covering all possible VECE.
3874 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3875 * to fail, indicating that extra moves are required for that case.
3877 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3878 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3879 goto done;
3881 /* Try again from memory or a vector input register. */
3883 if (!its->mem_coherent) {
3885 * The input register is not synced, and so an extra store
3886 * would be required to use memory. Attempt an integer-vector
3887 * register move first. We do not have a TCGRegSet for this.
3889 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3890 break;
3892 /* Sync the temp back to its slot and load from there. */
3893 temp_sync(s, its, s->reserved_regs, 0, 0);
3895 /* fall through */
3897 case TEMP_VAL_MEM:
3898 #ifdef HOST_WORDS_BIGENDIAN
3899 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3900 endian_fixup -= 1 << vece;
3901 #else
3902 endian_fixup = 0;
3903 #endif
3904 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3905 its->mem_offset + endian_fixup)) {
3906 goto done;
3908 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3909 break;
3911 default:
3912 g_assert_not_reached();
3915 /* We now have a vector input register, so dup must succeed. */
3916 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3917 tcg_debug_assert(ok);
3919 done:
3920 if (IS_DEAD_ARG(1)) {
3921 temp_dead(s, its);
3923 if (NEED_SYNC_ARG(0)) {
3924 temp_sync(s, ots, s->reserved_regs, 0, 0);
3926 if (IS_DEAD_ARG(0)) {
3927 temp_dead(s, ots);
3931 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3933 const TCGLifeData arg_life = op->life;
3934 const TCGOpDef * const def = &tcg_op_defs[op->opc];
3935 TCGRegSet i_allocated_regs;
3936 TCGRegSet o_allocated_regs;
3937 int i, k, nb_iargs, nb_oargs;
3938 TCGReg reg;
3939 TCGArg arg;
3940 const TCGArgConstraint *arg_ct;
3941 TCGTemp *ts;
3942 TCGArg new_args[TCG_MAX_OP_ARGS];
3943 int const_args[TCG_MAX_OP_ARGS];
3945 nb_oargs = def->nb_oargs;
3946 nb_iargs = def->nb_iargs;
3948 /* copy constants */
3949 memcpy(new_args + nb_oargs + nb_iargs,
3950 op->args + nb_oargs + nb_iargs,
3951 sizeof(TCGArg) * def->nb_cargs);
3953 i_allocated_regs = s->reserved_regs;
3954 o_allocated_regs = s->reserved_regs;
3956 /* satisfy input constraints */
3957 for (k = 0; k < nb_iargs; k++) {
3958 TCGRegSet i_preferred_regs, o_preferred_regs;
3960 i = def->args_ct[nb_oargs + k].sort_index;
3961 arg = op->args[i];
3962 arg_ct = &def->args_ct[i];
3963 ts = arg_temp(arg);
3965 if (ts->val_type == TEMP_VAL_CONST
3966 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3967 /* constant is OK for instruction */
3968 const_args[i] = 1;
3969 new_args[i] = ts->val;
3970 continue;
3973 i_preferred_regs = o_preferred_regs = 0;
3974 if (arg_ct->ialias) {
3975 o_preferred_regs = op->output_pref[arg_ct->alias_index];
3978 * If the input is readonly, then it cannot also be an
3979 * output and aliased to itself. If the input is not
3980 * dead after the instruction, we must allocate a new
3981 * register and move it.
3983 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3984 goto allocate_in_reg;
3988 * Check if the current register has already been allocated
3989 * for another input aliased to an output.
3991 if (ts->val_type == TEMP_VAL_REG) {
3992 reg = ts->reg;
3993 for (int k2 = 0; k2 < k; k2++) {
3994 int i2 = def->args_ct[nb_oargs + k2].sort_index;
3995 if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3996 goto allocate_in_reg;
4000 i_preferred_regs = o_preferred_regs;
4003 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
4004 reg = ts->reg;
4006 if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
4007 allocate_in_reg:
4009 * Allocate a new register matching the constraint
4010 * and move the temporary register into it.
4012 temp_load(s, ts, tcg_target_available_regs[ts->type],
4013 i_allocated_regs, 0);
4014 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
4015 o_preferred_regs, ts->indirect_base);
4016 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4018 * Cross register class move not supported. Sync the
4019 * temp back to its slot and load from there.
4021 temp_sync(s, ts, i_allocated_regs, 0, 0);
4022 tcg_out_ld(s, ts->type, reg,
4023 ts->mem_base->reg, ts->mem_offset);
4026 new_args[i] = reg;
4027 const_args[i] = 0;
4028 tcg_regset_set_reg(i_allocated_regs, reg);
4031 /* mark dead temporaries and free the associated registers */
4032 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4033 if (IS_DEAD_ARG(i)) {
4034 temp_dead(s, arg_temp(op->args[i]));
4038 if (def->flags & TCG_OPF_COND_BRANCH) {
4039 tcg_reg_alloc_cbranch(s, i_allocated_regs);
4040 } else if (def->flags & TCG_OPF_BB_END) {
4041 tcg_reg_alloc_bb_end(s, i_allocated_regs);
4042 } else {
4043 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4044 /* XXX: permit generic clobber register list ? */
4045 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4046 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4047 tcg_reg_free(s, i, i_allocated_regs);
4051 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4052 /* sync globals if the op has side effects and might trigger
4053 an exception. */
4054 sync_globals(s, i_allocated_regs);
4057 /* satisfy the output constraints */
4058 for(k = 0; k < nb_oargs; k++) {
4059 i = def->args_ct[k].sort_index;
4060 arg = op->args[i];
4061 arg_ct = &def->args_ct[i];
4062 ts = arg_temp(arg);
4064 /* ENV should not be modified. */
4065 tcg_debug_assert(!temp_readonly(ts));
4067 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4068 reg = new_args[arg_ct->alias_index];
4069 } else if (arg_ct->newreg) {
4070 reg = tcg_reg_alloc(s, arg_ct->regs,
4071 i_allocated_regs | o_allocated_regs,
4072 op->output_pref[k], ts->indirect_base);
4073 } else {
4074 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4075 op->output_pref[k], ts->indirect_base);
4077 tcg_regset_set_reg(o_allocated_regs, reg);
4078 if (ts->val_type == TEMP_VAL_REG) {
4079 s->reg_to_temp[ts->reg] = NULL;
4081 ts->val_type = TEMP_VAL_REG;
4082 ts->reg = reg;
4084 * Temp value is modified, so the value kept in memory is
4085 * potentially not the same.
4087 ts->mem_coherent = 0;
4088 s->reg_to_temp[reg] = ts;
4089 new_args[i] = reg;
4093 /* emit instruction */
4094 if (def->flags & TCG_OPF_VECTOR) {
4095 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4096 new_args, const_args);
4097 } else {
4098 tcg_out_op(s, op->opc, new_args, const_args);
4101 /* move the outputs in the correct register if needed */
4102 for(i = 0; i < nb_oargs; i++) {
4103 ts = arg_temp(op->args[i]);
4105 /* ENV should not be modified. */
4106 tcg_debug_assert(!temp_readonly(ts));
4108 if (NEED_SYNC_ARG(i)) {
4109 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4110 } else if (IS_DEAD_ARG(i)) {
4111 temp_dead(s, ts);
4116 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4118 const TCGLifeData arg_life = op->life;
4119 TCGTemp *ots, *itsl, *itsh;
4120 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4122 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4123 tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4124 tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4126 ots = arg_temp(op->args[0]);
4127 itsl = arg_temp(op->args[1]);
4128 itsh = arg_temp(op->args[2]);
4130 /* ENV should not be modified. */
4131 tcg_debug_assert(!temp_readonly(ots));
4133 /* Allocate the output register now. */
4134 if (ots->val_type != TEMP_VAL_REG) {
4135 TCGRegSet allocated_regs = s->reserved_regs;
4136 TCGRegSet dup_out_regs =
4137 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4139 /* Make sure to not spill the input registers. */
4140 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4141 tcg_regset_set_reg(allocated_regs, itsl->reg);
4143 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4144 tcg_regset_set_reg(allocated_regs, itsh->reg);
4147 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4148 op->output_pref[0], ots->indirect_base);
4149 ots->val_type = TEMP_VAL_REG;
4150 ots->mem_coherent = 0;
4151 s->reg_to_temp[ots->reg] = ots;
4154 /* Promote dup2 of immediates to dupi_vec. */
4155 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4156 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4157 MemOp vece = MO_64;
4159 if (val == dup_const(MO_8, val)) {
4160 vece = MO_8;
4161 } else if (val == dup_const(MO_16, val)) {
4162 vece = MO_16;
4163 } else if (val == dup_const(MO_32, val)) {
4164 vece = MO_32;
4167 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4168 goto done;
4171 /* If the two inputs form one 64-bit value, try dupm_vec. */
4172 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4173 if (!itsl->mem_coherent) {
4174 temp_sync(s, itsl, s->reserved_regs, 0, 0);
4176 if (!itsh->mem_coherent) {
4177 temp_sync(s, itsh, s->reserved_regs, 0, 0);
4179 #ifdef HOST_WORDS_BIGENDIAN
4180 TCGTemp *its = itsh;
4181 #else
4182 TCGTemp *its = itsl;
4183 #endif
4184 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4185 its->mem_base->reg, its->mem_offset)) {
4186 goto done;
4190 /* Fall back to generic expansion. */
4191 return false;
4193 done:
4194 if (IS_DEAD_ARG(1)) {
4195 temp_dead(s, itsl);
4197 if (IS_DEAD_ARG(2)) {
4198 temp_dead(s, itsh);
4200 if (NEED_SYNC_ARG(0)) {
4201 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4202 } else if (IS_DEAD_ARG(0)) {
4203 temp_dead(s, ots);
4205 return true;
4208 #ifdef TCG_TARGET_STACK_GROWSUP
4209 #define STACK_DIR(x) (-(x))
4210 #else
4211 #define STACK_DIR(x) (x)
4212 #endif
4214 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4216 const int nb_oargs = TCGOP_CALLO(op);
4217 const int nb_iargs = TCGOP_CALLI(op);
4218 const TCGLifeData arg_life = op->life;
4219 int flags, nb_regs, i;
4220 TCGReg reg;
4221 TCGArg arg;
4222 TCGTemp *ts;
4223 intptr_t stack_offset;
4224 size_t call_stack_size;
4225 tcg_insn_unit *func_addr;
4226 int allocate_args;
4227 TCGRegSet allocated_regs;
4229 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4230 flags = op->args[nb_oargs + nb_iargs + 1];
4232 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4233 if (nb_regs > nb_iargs) {
4234 nb_regs = nb_iargs;
4237 /* assign stack slots first */
4238 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4239 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4240 ~(TCG_TARGET_STACK_ALIGN - 1);
4241 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4242 if (allocate_args) {
4243 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4244 preallocate call stack */
4245 tcg_abort();
4248 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4249 for (i = nb_regs; i < nb_iargs; i++) {
4250 arg = op->args[nb_oargs + i];
4251 #ifdef TCG_TARGET_STACK_GROWSUP
4252 stack_offset -= sizeof(tcg_target_long);
4253 #endif
4254 if (arg != TCG_CALL_DUMMY_ARG) {
4255 ts = arg_temp(arg);
4256 temp_load(s, ts, tcg_target_available_regs[ts->type],
4257 s->reserved_regs, 0);
4258 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4260 #ifndef TCG_TARGET_STACK_GROWSUP
4261 stack_offset += sizeof(tcg_target_long);
4262 #endif
4265 /* assign input registers */
4266 allocated_regs = s->reserved_regs;
4267 for (i = 0; i < nb_regs; i++) {
4268 arg = op->args[nb_oargs + i];
4269 if (arg != TCG_CALL_DUMMY_ARG) {
4270 ts = arg_temp(arg);
4271 reg = tcg_target_call_iarg_regs[i];
4273 if (ts->val_type == TEMP_VAL_REG) {
4274 if (ts->reg != reg) {
4275 tcg_reg_free(s, reg, allocated_regs);
4276 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4278 * Cross register class move not supported. Sync the
4279 * temp back to its slot and load from there.
4281 temp_sync(s, ts, allocated_regs, 0, 0);
4282 tcg_out_ld(s, ts->type, reg,
4283 ts->mem_base->reg, ts->mem_offset);
4286 } else {
4287 TCGRegSet arg_set = 0;
4289 tcg_reg_free(s, reg, allocated_regs);
4290 tcg_regset_set_reg(arg_set, reg);
4291 temp_load(s, ts, arg_set, allocated_regs, 0);
4294 tcg_regset_set_reg(allocated_regs, reg);
4298 /* mark dead temporaries and free the associated registers */
4299 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4300 if (IS_DEAD_ARG(i)) {
4301 temp_dead(s, arg_temp(op->args[i]));
4305 /* clobber call registers */
4306 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4307 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4308 tcg_reg_free(s, i, allocated_regs);
4312 /* Save globals if they might be written by the helper, sync them if
4313 they might be read. */
4314 if (flags & TCG_CALL_NO_READ_GLOBALS) {
4315 /* Nothing to do */
4316 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4317 sync_globals(s, allocated_regs);
4318 } else {
4319 save_globals(s, allocated_regs);
4322 tcg_out_call(s, func_addr);
4324 /* assign output registers and emit moves if needed */
4325 for(i = 0; i < nb_oargs; i++) {
4326 arg = op->args[i];
4327 ts = arg_temp(arg);
4329 /* ENV should not be modified. */
4330 tcg_debug_assert(!temp_readonly(ts));
4332 reg = tcg_target_call_oarg_regs[i];
4333 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4334 if (ts->val_type == TEMP_VAL_REG) {
4335 s->reg_to_temp[ts->reg] = NULL;
4337 ts->val_type = TEMP_VAL_REG;
4338 ts->reg = reg;
4339 ts->mem_coherent = 0;
4340 s->reg_to_temp[reg] = ts;
4341 if (NEED_SYNC_ARG(i)) {
4342 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4343 } else if (IS_DEAD_ARG(i)) {
4344 temp_dead(s, ts);
4349 #ifdef CONFIG_PROFILER
4351 /* avoid copy/paste errors */
4352 #define PROF_ADD(to, from, field) \
4353 do { \
4354 (to)->field += qatomic_read(&((from)->field)); \
4355 } while (0)
4357 #define PROF_MAX(to, from, field) \
4358 do { \
4359 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4360 if (val__ > (to)->field) { \
4361 (to)->field = val__; \
4363 } while (0)
4365 /* Pass in a zero'ed @prof */
4366 static inline
4367 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4369 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4370 unsigned int i;
4372 for (i = 0; i < n_ctxs; i++) {
4373 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4374 const TCGProfile *orig = &s->prof;
4376 if (counters) {
4377 PROF_ADD(prof, orig, cpu_exec_time);
4378 PROF_ADD(prof, orig, tb_count1);
4379 PROF_ADD(prof, orig, tb_count);
4380 PROF_ADD(prof, orig, op_count);
4381 PROF_MAX(prof, orig, op_count_max);
4382 PROF_ADD(prof, orig, temp_count);
4383 PROF_MAX(prof, orig, temp_count_max);
4384 PROF_ADD(prof, orig, del_op_count);
4385 PROF_ADD(prof, orig, code_in_len);
4386 PROF_ADD(prof, orig, code_out_len);
4387 PROF_ADD(prof, orig, search_out_len);
4388 PROF_ADD(prof, orig, interm_time);
4389 PROF_ADD(prof, orig, code_time);
4390 PROF_ADD(prof, orig, la_time);
4391 PROF_ADD(prof, orig, opt_time);
4392 PROF_ADD(prof, orig, restore_count);
4393 PROF_ADD(prof, orig, restore_time);
4395 if (table) {
4396 int i;
4398 for (i = 0; i < NB_OPS; i++) {
4399 PROF_ADD(prof, orig, table_op_count[i]);
4405 #undef PROF_ADD
4406 #undef PROF_MAX
4408 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4410 tcg_profile_snapshot(prof, true, false);
4413 static void tcg_profile_snapshot_table(TCGProfile *prof)
4415 tcg_profile_snapshot(prof, false, true);
4418 void tcg_dump_op_count(void)
4420 TCGProfile prof = {};
4421 int i;
4423 tcg_profile_snapshot_table(&prof);
4424 for (i = 0; i < NB_OPS; i++) {
4425 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4426 prof.table_op_count[i]);
4430 int64_t tcg_cpu_exec_time(void)
4432 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4433 unsigned int i;
4434 int64_t ret = 0;
4436 for (i = 0; i < n_ctxs; i++) {
4437 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4438 const TCGProfile *prof = &s->prof;
4440 ret += qatomic_read(&prof->cpu_exec_time);
4442 return ret;
4444 #else
4445 void tcg_dump_op_count(void)
4447 qemu_printf("[TCG profiler not compiled]\n");
4450 int64_t tcg_cpu_exec_time(void)
4452 error_report("%s: TCG profiler not compiled", __func__);
4453 exit(EXIT_FAILURE);
4455 #endif
4458 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4460 #ifdef CONFIG_PROFILER
4461 TCGProfile *prof = &s->prof;
4462 #endif
4463 int i, num_insns;
4464 TCGOp *op;
4466 #ifdef CONFIG_PROFILER
4468 int n = 0;
4470 QTAILQ_FOREACH(op, &s->ops, link) {
4471 n++;
4473 qatomic_set(&prof->op_count, prof->op_count + n);
4474 if (n > prof->op_count_max) {
4475 qatomic_set(&prof->op_count_max, n);
4478 n = s->nb_temps;
4479 qatomic_set(&prof->temp_count, prof->temp_count + n);
4480 if (n > prof->temp_count_max) {
4481 qatomic_set(&prof->temp_count_max, n);
4484 #endif
4486 #ifdef DEBUG_DISAS
4487 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4488 && qemu_log_in_addr_range(tb->pc))) {
4489 FILE *logfile = qemu_log_lock();
4490 qemu_log("OP:\n");
4491 tcg_dump_ops(s, false);
4492 qemu_log("\n");
4493 qemu_log_unlock(logfile);
4495 #endif
4497 #ifdef CONFIG_DEBUG_TCG
4498 /* Ensure all labels referenced have been emitted. */
4500 TCGLabel *l;
4501 bool error = false;
4503 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4504 if (unlikely(!l->present) && l->refs) {
4505 qemu_log_mask(CPU_LOG_TB_OP,
4506 "$L%d referenced but not present.\n", l->id);
4507 error = true;
4510 assert(!error);
4512 #endif
4514 #ifdef CONFIG_PROFILER
4515 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4516 #endif
4518 #ifdef USE_TCG_OPTIMIZATIONS
4519 tcg_optimize(s);
4520 #endif
4522 #ifdef CONFIG_PROFILER
4523 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4524 qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4525 #endif
4527 reachable_code_pass(s);
4528 liveness_pass_1(s);
4530 if (s->nb_indirects > 0) {
4531 #ifdef DEBUG_DISAS
4532 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4533 && qemu_log_in_addr_range(tb->pc))) {
4534 FILE *logfile = qemu_log_lock();
4535 qemu_log("OP before indirect lowering:\n");
4536 tcg_dump_ops(s, false);
4537 qemu_log("\n");
4538 qemu_log_unlock(logfile);
4540 #endif
4541 /* Replace indirect temps with direct temps. */
4542 if (liveness_pass_2(s)) {
4543 /* If changes were made, re-run liveness. */
4544 liveness_pass_1(s);
4548 #ifdef CONFIG_PROFILER
4549 qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4550 #endif
4552 #ifdef DEBUG_DISAS
4553 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4554 && qemu_log_in_addr_range(tb->pc))) {
4555 FILE *logfile = qemu_log_lock();
4556 qemu_log("OP after optimization and liveness analysis:\n");
4557 tcg_dump_ops(s, true);
4558 qemu_log("\n");
4559 qemu_log_unlock(logfile);
4561 #endif
4563 tcg_reg_alloc_start(s);
4566 * Reset the buffer pointers when restarting after overflow.
4567 * TODO: Move this into translate-all.c with the rest of the
4568 * buffer management. Having only this done here is confusing.
4570 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4571 s->code_ptr = s->code_buf;
4573 #ifdef TCG_TARGET_NEED_LDST_LABELS
4574 QSIMPLEQ_INIT(&s->ldst_labels);
4575 #endif
4576 #ifdef TCG_TARGET_NEED_POOL_LABELS
4577 s->pool_labels = NULL;
4578 #endif
4580 num_insns = -1;
4581 QTAILQ_FOREACH(op, &s->ops, link) {
4582 TCGOpcode opc = op->opc;
4584 #ifdef CONFIG_PROFILER
4585 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4586 #endif
4588 switch (opc) {
4589 case INDEX_op_mov_i32:
4590 case INDEX_op_mov_i64:
4591 case INDEX_op_mov_vec:
4592 tcg_reg_alloc_mov(s, op);
4593 break;
4594 case INDEX_op_dup_vec:
4595 tcg_reg_alloc_dup(s, op);
4596 break;
4597 case INDEX_op_insn_start:
4598 if (num_insns >= 0) {
4599 size_t off = tcg_current_code_size(s);
4600 s->gen_insn_end_off[num_insns] = off;
4601 /* Assert that we do not overflow our stored offset. */
4602 assert(s->gen_insn_end_off[num_insns] == off);
4604 num_insns++;
4605 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4606 target_ulong a;
4607 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4608 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4609 #else
4610 a = op->args[i];
4611 #endif
4612 s->gen_insn_data[num_insns][i] = a;
4614 break;
4615 case INDEX_op_discard:
4616 temp_dead(s, arg_temp(op->args[0]));
4617 break;
4618 case INDEX_op_set_label:
4619 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4620 tcg_out_label(s, arg_label(op->args[0]));
4621 break;
4622 case INDEX_op_call:
4623 tcg_reg_alloc_call(s, op);
4624 break;
4625 case INDEX_op_dup2_vec:
4626 if (tcg_reg_alloc_dup2(s, op)) {
4627 break;
4629 /* fall through */
4630 default:
4631 /* Sanity check that we've not introduced any unhandled opcodes. */
4632 tcg_debug_assert(tcg_op_supported(opc));
4633 /* Note: in order to speed up the code, it would be much
4634 faster to have specialized register allocator functions for
4635 some common argument patterns */
4636 tcg_reg_alloc_op(s, op);
4637 break;
4639 #ifdef CONFIG_DEBUG_TCG
4640 check_regs(s);
4641 #endif
4642 /* Test for (pending) buffer overflow. The assumption is that any
4643 one operation beginning below the high water mark cannot overrun
4644 the buffer completely. Thus we can test for overflow after
4645 generating code without having to check during generation. */
4646 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4647 return -1;
4649 /* Test for TB overflow, as seen by gen_insn_end_off. */
4650 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4651 return -2;
4654 tcg_debug_assert(num_insns >= 0);
4655 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4657 /* Generate TB finalization at the end of block */
4658 #ifdef TCG_TARGET_NEED_LDST_LABELS
4659 i = tcg_out_ldst_finalize(s);
4660 if (i < 0) {
4661 return i;
4663 #endif
4664 #ifdef TCG_TARGET_NEED_POOL_LABELS
4665 i = tcg_out_pool_finalize(s);
4666 if (i < 0) {
4667 return i;
4669 #endif
4670 if (!tcg_resolve_relocs(s)) {
4671 return -2;
4674 #ifndef CONFIG_TCG_INTERPRETER
4675 /* flush instruction cache */
4676 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4677 (uintptr_t)s->code_buf,
4678 tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4679 #endif
4681 return tcg_current_code_size(s);
4684 #ifdef CONFIG_PROFILER
4685 void tcg_dump_info(void)
4687 TCGProfile prof = {};
4688 const TCGProfile *s;
4689 int64_t tb_count;
4690 int64_t tb_div_count;
4691 int64_t tot;
4693 tcg_profile_snapshot_counters(&prof);
4694 s = &prof;
4695 tb_count = s->tb_count;
4696 tb_div_count = tb_count ? tb_count : 1;
4697 tot = s->interm_time + s->code_time;
4699 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4700 tot, tot / 2.4e9);
4701 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4702 " %0.1f%%)\n",
4703 tb_count, s->tb_count1 - tb_count,
4704 (double)(s->tb_count1 - s->tb_count)
4705 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4706 qemu_printf("avg ops/TB %0.1f max=%d\n",
4707 (double)s->op_count / tb_div_count, s->op_count_max);
4708 qemu_printf("deleted ops/TB %0.2f\n",
4709 (double)s->del_op_count / tb_div_count);
4710 qemu_printf("avg temps/TB %0.2f max=%d\n",
4711 (double)s->temp_count / tb_div_count, s->temp_count_max);
4712 qemu_printf("avg host code/TB %0.1f\n",
4713 (double)s->code_out_len / tb_div_count);
4714 qemu_printf("avg search data/TB %0.1f\n",
4715 (double)s->search_out_len / tb_div_count);
4717 qemu_printf("cycles/op %0.1f\n",
4718 s->op_count ? (double)tot / s->op_count : 0);
4719 qemu_printf("cycles/in byte %0.1f\n",
4720 s->code_in_len ? (double)tot / s->code_in_len : 0);
4721 qemu_printf("cycles/out byte %0.1f\n",
4722 s->code_out_len ? (double)tot / s->code_out_len : 0);
4723 qemu_printf("cycles/search byte %0.1f\n",
4724 s->search_out_len ? (double)tot / s->search_out_len : 0);
4725 if (tot == 0) {
4726 tot = 1;
4728 qemu_printf(" gen_interm time %0.1f%%\n",
4729 (double)s->interm_time / tot * 100.0);
4730 qemu_printf(" gen_code time %0.1f%%\n",
4731 (double)s->code_time / tot * 100.0);
4732 qemu_printf("optim./code time %0.1f%%\n",
4733 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4734 * 100.0);
4735 qemu_printf("liveness/code time %0.1f%%\n",
4736 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4737 qemu_printf("cpu_restore count %" PRId64 "\n",
4738 s->restore_count);
4739 qemu_printf(" avg cycles %0.1f\n",
4740 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4742 #else
4743 void tcg_dump_info(void)
4745 qemu_printf("[TCG profiler not compiled]\n");
4747 #endif
4749 #ifdef ELF_HOST_MACHINE
4750 /* In order to use this feature, the backend needs to do three things:
4752 (1) Define ELF_HOST_MACHINE to indicate both what value to
4753 put into the ELF image and to indicate support for the feature.
4755 (2) Define tcg_register_jit. This should create a buffer containing
4756 the contents of a .debug_frame section that describes the post-
4757 prologue unwind info for the tcg machine.
4759 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4762 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4763 typedef enum {
4764 JIT_NOACTION = 0,
4765 JIT_REGISTER_FN,
4766 JIT_UNREGISTER_FN
4767 } jit_actions_t;
4769 struct jit_code_entry {
4770 struct jit_code_entry *next_entry;
4771 struct jit_code_entry *prev_entry;
4772 const void *symfile_addr;
4773 uint64_t symfile_size;
4776 struct jit_descriptor {
4777 uint32_t version;
4778 uint32_t action_flag;
4779 struct jit_code_entry *relevant_entry;
4780 struct jit_code_entry *first_entry;
4783 void __jit_debug_register_code(void) __attribute__((noinline));
4784 void __jit_debug_register_code(void)
4786 asm("");
4789 /* Must statically initialize the version, because GDB may check
4790 the version before we can set it. */
4791 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4793 /* End GDB interface. */
4795 static int find_string(const char *strtab, const char *str)
4797 const char *p = strtab + 1;
4799 while (1) {
4800 if (strcmp(p, str) == 0) {
4801 return p - strtab;
4803 p += strlen(p) + 1;
4807 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4808 const void *debug_frame,
4809 size_t debug_frame_size)
4811 struct __attribute__((packed)) DebugInfo {
4812 uint32_t len;
4813 uint16_t version;
4814 uint32_t abbrev;
4815 uint8_t ptr_size;
4816 uint8_t cu_die;
4817 uint16_t cu_lang;
4818 uintptr_t cu_low_pc;
4819 uintptr_t cu_high_pc;
4820 uint8_t fn_die;
4821 char fn_name[16];
4822 uintptr_t fn_low_pc;
4823 uintptr_t fn_high_pc;
4824 uint8_t cu_eoc;
4827 struct ElfImage {
4828 ElfW(Ehdr) ehdr;
4829 ElfW(Phdr) phdr;
4830 ElfW(Shdr) shdr[7];
4831 ElfW(Sym) sym[2];
4832 struct DebugInfo di;
4833 uint8_t da[24];
4834 char str[80];
4837 struct ElfImage *img;
4839 static const struct ElfImage img_template = {
4840 .ehdr = {
4841 .e_ident[EI_MAG0] = ELFMAG0,
4842 .e_ident[EI_MAG1] = ELFMAG1,
4843 .e_ident[EI_MAG2] = ELFMAG2,
4844 .e_ident[EI_MAG3] = ELFMAG3,
4845 .e_ident[EI_CLASS] = ELF_CLASS,
4846 .e_ident[EI_DATA] = ELF_DATA,
4847 .e_ident[EI_VERSION] = EV_CURRENT,
4848 .e_type = ET_EXEC,
4849 .e_machine = ELF_HOST_MACHINE,
4850 .e_version = EV_CURRENT,
4851 .e_phoff = offsetof(struct ElfImage, phdr),
4852 .e_shoff = offsetof(struct ElfImage, shdr),
4853 .e_ehsize = sizeof(ElfW(Shdr)),
4854 .e_phentsize = sizeof(ElfW(Phdr)),
4855 .e_phnum = 1,
4856 .e_shentsize = sizeof(ElfW(Shdr)),
4857 .e_shnum = ARRAY_SIZE(img->shdr),
4858 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4859 #ifdef ELF_HOST_FLAGS
4860 .e_flags = ELF_HOST_FLAGS,
4861 #endif
4862 #ifdef ELF_OSABI
4863 .e_ident[EI_OSABI] = ELF_OSABI,
4864 #endif
4866 .phdr = {
4867 .p_type = PT_LOAD,
4868 .p_flags = PF_X,
4870 .shdr = {
4871 [0] = { .sh_type = SHT_NULL },
4872 /* Trick: The contents of code_gen_buffer are not present in
4873 this fake ELF file; that got allocated elsewhere. Therefore
4874 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4875 will not look for contents. We can record any address. */
4876 [1] = { /* .text */
4877 .sh_type = SHT_NOBITS,
4878 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4880 [2] = { /* .debug_info */
4881 .sh_type = SHT_PROGBITS,
4882 .sh_offset = offsetof(struct ElfImage, di),
4883 .sh_size = sizeof(struct DebugInfo),
4885 [3] = { /* .debug_abbrev */
4886 .sh_type = SHT_PROGBITS,
4887 .sh_offset = offsetof(struct ElfImage, da),
4888 .sh_size = sizeof(img->da),
4890 [4] = { /* .debug_frame */
4891 .sh_type = SHT_PROGBITS,
4892 .sh_offset = sizeof(struct ElfImage),
4894 [5] = { /* .symtab */
4895 .sh_type = SHT_SYMTAB,
4896 .sh_offset = offsetof(struct ElfImage, sym),
4897 .sh_size = sizeof(img->sym),
4898 .sh_info = 1,
4899 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4900 .sh_entsize = sizeof(ElfW(Sym)),
4902 [6] = { /* .strtab */
4903 .sh_type = SHT_STRTAB,
4904 .sh_offset = offsetof(struct ElfImage, str),
4905 .sh_size = sizeof(img->str),
4908 .sym = {
4909 [1] = { /* code_gen_buffer */
4910 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4911 .st_shndx = 1,
4914 .di = {
4915 .len = sizeof(struct DebugInfo) - 4,
4916 .version = 2,
4917 .ptr_size = sizeof(void *),
4918 .cu_die = 1,
4919 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
4920 .fn_die = 2,
4921 .fn_name = "code_gen_buffer"
4923 .da = {
4924 1, /* abbrev number (the cu) */
4925 0x11, 1, /* DW_TAG_compile_unit, has children */
4926 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4927 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4928 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4929 0, 0, /* end of abbrev */
4930 2, /* abbrev number (the fn) */
4931 0x2e, 0, /* DW_TAG_subprogram, no children */
4932 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4933 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4934 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4935 0, 0, /* end of abbrev */
4936 0 /* no more abbrev */
4938 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4939 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4942 /* We only need a single jit entry; statically allocate it. */
4943 static struct jit_code_entry one_entry;
4945 uintptr_t buf = (uintptr_t)buf_ptr;
4946 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4947 DebugFrameHeader *dfh;
4949 img = g_malloc(img_size);
4950 *img = img_template;
4952 img->phdr.p_vaddr = buf;
4953 img->phdr.p_paddr = buf;
4954 img->phdr.p_memsz = buf_size;
4956 img->shdr[1].sh_name = find_string(img->str, ".text");
4957 img->shdr[1].sh_addr = buf;
4958 img->shdr[1].sh_size = buf_size;
4960 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4961 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4963 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4964 img->shdr[4].sh_size = debug_frame_size;
4966 img->shdr[5].sh_name = find_string(img->str, ".symtab");
4967 img->shdr[6].sh_name = find_string(img->str, ".strtab");
4969 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4970 img->sym[1].st_value = buf;
4971 img->sym[1].st_size = buf_size;
4973 img->di.cu_low_pc = buf;
4974 img->di.cu_high_pc = buf + buf_size;
4975 img->di.fn_low_pc = buf;
4976 img->di.fn_high_pc = buf + buf_size;
4978 dfh = (DebugFrameHeader *)(img + 1);
4979 memcpy(dfh, debug_frame, debug_frame_size);
4980 dfh->fde.func_start = buf;
4981 dfh->fde.func_len = buf_size;
4983 #ifdef DEBUG_JIT
4984 /* Enable this block to be able to debug the ELF image file creation.
4985 One can use readelf, objdump, or other inspection utilities. */
4987 FILE *f = fopen("/tmp/qemu.jit", "w+b");
4988 if (f) {
4989 if (fwrite(img, img_size, 1, f) != img_size) {
4990 /* Avoid stupid unused return value warning for fwrite. */
4992 fclose(f);
4995 #endif
4997 one_entry.symfile_addr = img;
4998 one_entry.symfile_size = img_size;
5000 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5001 __jit_debug_descriptor.relevant_entry = &one_entry;
5002 __jit_debug_descriptor.first_entry = &one_entry;
5003 __jit_debug_register_code();
5005 #else
5006 /* No support for the feature. Provide the entry point expected by exec.c,
5007 and implement the internal function we declared earlier. */
5009 static void tcg_register_jit_int(const void *buf, size_t size,
5010 const void *debug_frame,
5011 size_t debug_frame_size)
5015 void tcg_register_jit(const void *buf, size_t buf_size)
5018 #endif /* ELF_HOST_MACHINE */
5020 #if !TCG_TARGET_MAYBE_vec
5021 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5023 g_assert_not_reached();
5025 #endif