tcg: Remove TCG_TARGET_CON_STR_H
[qemu/ar7.git] / tcg / tcg.c
blob39bcdff8dcf78313519e87540a346d334babb314
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
42 instructions */
43 #define NO_CPU_IO_DEFS
44 #include "cpu.h"
46 #include "exec/exec-all.h"
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
52 #include "tcg/tcg-op.h"
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS ELFCLASS32
56 #else
57 # define ELF_CLASS ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA ELFDATA2MSB
61 #else
62 # define ELF_DATA ELFDATA2LSB
63 #endif
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70 used here. */
71 static void tcg_target_init(TCGContext *s);
72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
73 static void tcg_target_qemu_prologue(TCGContext *s);
74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
75 intptr_t value, intptr_t addend);
77 /* The CIE and FDE header definitions will be common to all hosts. */
78 typedef struct {
79 uint32_t len __attribute__((aligned((sizeof(void *)))));
80 uint32_t id;
81 uint8_t version;
82 char augmentation[1];
83 uint8_t code_align;
84 uint8_t data_align;
85 uint8_t return_column;
86 } DebugFrameCIE;
88 typedef struct QEMU_PACKED {
89 uint32_t len __attribute__((aligned((sizeof(void *)))));
90 uint32_t cie_offset;
91 uintptr_t func_start;
92 uintptr_t func_len;
93 } DebugFrameFDEHeader;
95 typedef struct QEMU_PACKED {
96 DebugFrameCIE cie;
97 DebugFrameFDEHeader fde;
98 } DebugFrameHeader;
100 static void tcg_register_jit_int(const void *buf, size_t size,
101 const void *debug_frame,
102 size_t debug_frame_size)
103 __attribute__((unused));
105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
106 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
107 intptr_t arg2);
108 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
109 static void tcg_out_movi(TCGContext *s, TCGType type,
110 TCGReg ret, tcg_target_long arg);
111 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
112 const int *const_args);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115 TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117 TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119 TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
121 unsigned vece, const TCGArg *args,
122 const int *const_args);
123 #else
124 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
125 TCGReg dst, TCGReg src)
127 g_assert_not_reached();
129 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
130 TCGReg dst, TCGReg base, intptr_t offset)
132 g_assert_not_reached();
134 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
135 TCGReg dst, int64_t arg)
137 g_assert_not_reached();
139 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
140 unsigned vece, const TCGArg *args,
141 const int *const_args)
143 g_assert_not_reached();
145 #endif
146 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
147 intptr_t arg2);
148 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
149 TCGReg base, intptr_t ofs);
150 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
151 static int tcg_target_const_match(tcg_target_long val, TCGType type,
152 const TCGArgConstraint *arg_ct);
153 #ifdef TCG_TARGET_NEED_LDST_LABELS
154 static int tcg_out_ldst_finalize(TCGContext *s);
155 #endif
157 #define TCG_HIGHWATER 1024
159 static TCGContext **tcg_ctxs;
160 static unsigned int n_tcg_ctxs;
161 TCGv_env cpu_env = 0;
162 const void *tcg_code_gen_epilogue;
163 uintptr_t tcg_splitwx_diff;
165 #ifndef CONFIG_TCG_INTERPRETER
166 tcg_prologue_fn *tcg_qemu_tb_exec;
167 #endif
169 struct tcg_region_tree {
170 QemuMutex lock;
171 GTree *tree;
172 /* padding to avoid false sharing is computed at run-time */
176 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
177 * dynamically allocate from as demand dictates. Given appropriate region
178 * sizing, this minimizes flushes even when some TCG threads generate a lot
179 * more code than others.
181 struct tcg_region_state {
182 QemuMutex lock;
184 /* fields set at init time */
185 void *start;
186 void *start_aligned;
187 void *end;
188 size_t n;
189 size_t size; /* size of one region */
190 size_t stride; /* .size + guard size */
192 /* fields protected by the lock */
193 size_t current; /* current region index */
194 size_t agg_size_full; /* aggregate size of full regions */
197 static struct tcg_region_state region;
199 * This is an array of struct tcg_region_tree's, with padding.
200 * We use void * to simplify the computation of region_trees[i]; each
201 * struct is found every tree_size bytes.
203 static void *region_trees;
204 static size_t tree_size;
205 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
206 static TCGRegSet tcg_target_call_clobber_regs;
208 #if TCG_TARGET_INSN_UNIT_SIZE == 1
209 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
211 *s->code_ptr++ = v;
214 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
215 uint8_t v)
217 *p = v;
219 #endif
221 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
222 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
224 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
225 *s->code_ptr++ = v;
226 } else {
227 tcg_insn_unit *p = s->code_ptr;
228 memcpy(p, &v, sizeof(v));
229 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
233 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
234 uint16_t v)
236 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
237 *p = v;
238 } else {
239 memcpy(p, &v, sizeof(v));
242 #endif
244 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
245 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
247 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
248 *s->code_ptr++ = v;
249 } else {
250 tcg_insn_unit *p = s->code_ptr;
251 memcpy(p, &v, sizeof(v));
252 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
256 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
257 uint32_t v)
259 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
260 *p = v;
261 } else {
262 memcpy(p, &v, sizeof(v));
265 #endif
267 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
268 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
270 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
271 *s->code_ptr++ = v;
272 } else {
273 tcg_insn_unit *p = s->code_ptr;
274 memcpy(p, &v, sizeof(v));
275 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
279 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
280 uint64_t v)
282 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
283 *p = v;
284 } else {
285 memcpy(p, &v, sizeof(v));
288 #endif
290 /* label relocation processing */
292 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
293 TCGLabel *l, intptr_t addend)
295 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
297 r->type = type;
298 r->ptr = code_ptr;
299 r->addend = addend;
300 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
303 static void tcg_out_label(TCGContext *s, TCGLabel *l)
305 tcg_debug_assert(!l->has_value);
306 l->has_value = 1;
307 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
310 TCGLabel *gen_new_label(void)
312 TCGContext *s = tcg_ctx;
313 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
315 memset(l, 0, sizeof(TCGLabel));
316 l->id = s->nb_labels++;
317 QSIMPLEQ_INIT(&l->relocs);
319 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
321 return l;
324 static bool tcg_resolve_relocs(TCGContext *s)
326 TCGLabel *l;
328 QSIMPLEQ_FOREACH(l, &s->labels, next) {
329 TCGRelocation *r;
330 uintptr_t value = l->u.value;
332 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
333 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
334 return false;
338 return true;
341 static void set_jmp_reset_offset(TCGContext *s, int which)
344 * We will check for overflow at the end of the opcode loop in
345 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
347 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
350 #include "tcg-target.c.inc"
352 /* compare a pointer @ptr and a tb_tc @s */
353 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
355 if (ptr >= s->ptr + s->size) {
356 return 1;
357 } else if (ptr < s->ptr) {
358 return -1;
360 return 0;
363 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
365 const struct tb_tc *a = ap;
366 const struct tb_tc *b = bp;
369 * When both sizes are set, we know this isn't a lookup.
370 * This is the most likely case: every TB must be inserted; lookups
371 * are a lot less frequent.
373 if (likely(a->size && b->size)) {
374 if (a->ptr > b->ptr) {
375 return 1;
376 } else if (a->ptr < b->ptr) {
377 return -1;
379 /* a->ptr == b->ptr should happen only on deletions */
380 g_assert(a->size == b->size);
381 return 0;
384 * All lookups have either .size field set to 0.
385 * From the glib sources we see that @ap is always the lookup key. However
386 * the docs provide no guarantee, so we just mark this case as likely.
388 if (likely(a->size == 0)) {
389 return ptr_cmp_tb_tc(a->ptr, b);
391 return ptr_cmp_tb_tc(b->ptr, a);
394 static void tcg_region_trees_init(void)
396 size_t i;
398 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
399 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
400 for (i = 0; i < region.n; i++) {
401 struct tcg_region_tree *rt = region_trees + i * tree_size;
403 qemu_mutex_init(&rt->lock);
404 rt->tree = g_tree_new(tb_tc_cmp);
408 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
410 void *p = tcg_splitwx_to_rw(cp);
411 size_t region_idx;
413 if (p < region.start_aligned) {
414 region_idx = 0;
415 } else {
416 ptrdiff_t offset = p - region.start_aligned;
418 if (offset > region.stride * (region.n - 1)) {
419 region_idx = region.n - 1;
420 } else {
421 region_idx = offset / region.stride;
424 return region_trees + region_idx * tree_size;
427 void tcg_tb_insert(TranslationBlock *tb)
429 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
431 qemu_mutex_lock(&rt->lock);
432 g_tree_insert(rt->tree, &tb->tc, tb);
433 qemu_mutex_unlock(&rt->lock);
436 void tcg_tb_remove(TranslationBlock *tb)
438 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
440 qemu_mutex_lock(&rt->lock);
441 g_tree_remove(rt->tree, &tb->tc);
442 qemu_mutex_unlock(&rt->lock);
446 * Find the TB 'tb' such that
447 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
448 * Return NULL if not found.
450 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
452 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
453 TranslationBlock *tb;
454 struct tb_tc s = { .ptr = (void *)tc_ptr };
456 qemu_mutex_lock(&rt->lock);
457 tb = g_tree_lookup(rt->tree, &s);
458 qemu_mutex_unlock(&rt->lock);
459 return tb;
462 static void tcg_region_tree_lock_all(void)
464 size_t i;
466 for (i = 0; i < region.n; i++) {
467 struct tcg_region_tree *rt = region_trees + i * tree_size;
469 qemu_mutex_lock(&rt->lock);
473 static void tcg_region_tree_unlock_all(void)
475 size_t i;
477 for (i = 0; i < region.n; i++) {
478 struct tcg_region_tree *rt = region_trees + i * tree_size;
480 qemu_mutex_unlock(&rt->lock);
484 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
486 size_t i;
488 tcg_region_tree_lock_all();
489 for (i = 0; i < region.n; i++) {
490 struct tcg_region_tree *rt = region_trees + i * tree_size;
492 g_tree_foreach(rt->tree, func, user_data);
494 tcg_region_tree_unlock_all();
497 size_t tcg_nb_tbs(void)
499 size_t nb_tbs = 0;
500 size_t i;
502 tcg_region_tree_lock_all();
503 for (i = 0; i < region.n; i++) {
504 struct tcg_region_tree *rt = region_trees + i * tree_size;
506 nb_tbs += g_tree_nnodes(rt->tree);
508 tcg_region_tree_unlock_all();
509 return nb_tbs;
512 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
514 TranslationBlock *tb = v;
516 tb_destroy(tb);
517 return FALSE;
520 static void tcg_region_tree_reset_all(void)
522 size_t i;
524 tcg_region_tree_lock_all();
525 for (i = 0; i < region.n; i++) {
526 struct tcg_region_tree *rt = region_trees + i * tree_size;
528 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
529 /* Increment the refcount first so that destroy acts as a reset */
530 g_tree_ref(rt->tree);
531 g_tree_destroy(rt->tree);
533 tcg_region_tree_unlock_all();
536 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
538 void *start, *end;
540 start = region.start_aligned + curr_region * region.stride;
541 end = start + region.size;
543 if (curr_region == 0) {
544 start = region.start;
546 if (curr_region == region.n - 1) {
547 end = region.end;
550 *pstart = start;
551 *pend = end;
554 static void tcg_region_assign(TCGContext *s, size_t curr_region)
556 void *start, *end;
558 tcg_region_bounds(curr_region, &start, &end);
560 s->code_gen_buffer = start;
561 s->code_gen_ptr = start;
562 s->code_gen_buffer_size = end - start;
563 s->code_gen_highwater = end - TCG_HIGHWATER;
566 static bool tcg_region_alloc__locked(TCGContext *s)
568 if (region.current == region.n) {
569 return true;
571 tcg_region_assign(s, region.current);
572 region.current++;
573 return false;
577 * Request a new region once the one in use has filled up.
578 * Returns true on error.
580 static bool tcg_region_alloc(TCGContext *s)
582 bool err;
583 /* read the region size now; alloc__locked will overwrite it on success */
584 size_t size_full = s->code_gen_buffer_size;
586 qemu_mutex_lock(&region.lock);
587 err = tcg_region_alloc__locked(s);
588 if (!err) {
589 region.agg_size_full += size_full - TCG_HIGHWATER;
591 qemu_mutex_unlock(&region.lock);
592 return err;
596 * Perform a context's first region allocation.
597 * This function does _not_ increment region.agg_size_full.
599 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
601 return tcg_region_alloc__locked(s);
604 /* Call from a safe-work context */
605 void tcg_region_reset_all(void)
607 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
608 unsigned int i;
610 qemu_mutex_lock(&region.lock);
611 region.current = 0;
612 region.agg_size_full = 0;
614 for (i = 0; i < n_ctxs; i++) {
615 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
616 bool err = tcg_region_initial_alloc__locked(s);
618 g_assert(!err);
620 qemu_mutex_unlock(&region.lock);
622 tcg_region_tree_reset_all();
625 #ifdef CONFIG_USER_ONLY
626 static size_t tcg_n_regions(void)
628 return 1;
630 #else
632 * It is likely that some vCPUs will translate more code than others, so we
633 * first try to set more regions than max_cpus, with those regions being of
634 * reasonable size. If that's not possible we make do by evenly dividing
635 * the code_gen_buffer among the vCPUs.
637 static size_t tcg_n_regions(void)
639 size_t i;
641 /* Use a single region if all we have is one vCPU thread */
642 #if !defined(CONFIG_USER_ONLY)
643 MachineState *ms = MACHINE(qdev_get_machine());
644 unsigned int max_cpus = ms->smp.max_cpus;
645 #endif
646 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
647 return 1;
650 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
651 for (i = 8; i > 0; i--) {
652 size_t regions_per_thread = i;
653 size_t region_size;
655 region_size = tcg_init_ctx.code_gen_buffer_size;
656 region_size /= max_cpus * regions_per_thread;
658 if (region_size >= 2 * 1024u * 1024) {
659 return max_cpus * regions_per_thread;
662 /* If we can't, then just allocate one region per vCPU thread */
663 return max_cpus;
665 #endif
668 * Initializes region partitioning.
670 * Called at init time from the parent thread (i.e. the one calling
671 * tcg_context_init), after the target's TCG globals have been set.
673 * Region partitioning works by splitting code_gen_buffer into separate regions,
674 * and then assigning regions to TCG threads so that the threads can translate
675 * code in parallel without synchronization.
677 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
678 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
679 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
680 * must have been parsed before calling this function, since it calls
681 * qemu_tcg_mttcg_enabled().
683 * In user-mode we use a single region. Having multiple regions in user-mode
684 * is not supported, because the number of vCPU threads (recall that each thread
685 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
686 * OS, and usually this number is huge (tens of thousands is not uncommon).
687 * Thus, given this large bound on the number of vCPU threads and the fact
688 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
689 * that the availability of at least one region per vCPU thread.
691 * However, this user-mode limitation is unlikely to be a significant problem
692 * in practice. Multi-threaded guests share most if not all of their translated
693 * code, which makes parallel code generation less appealing than in softmmu.
695 void tcg_region_init(void)
697 void *buf = tcg_init_ctx.code_gen_buffer;
698 void *aligned;
699 size_t size = tcg_init_ctx.code_gen_buffer_size;
700 size_t page_size = qemu_real_host_page_size;
701 size_t region_size;
702 size_t n_regions;
703 size_t i;
704 uintptr_t splitwx_diff;
706 n_regions = tcg_n_regions();
708 /* The first region will be 'aligned - buf' bytes larger than the others */
709 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
710 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
712 * Make region_size a multiple of page_size, using aligned as the start.
713 * As a result of this we might end up with a few extra pages at the end of
714 * the buffer; we will assign those to the last region.
716 region_size = (size - (aligned - buf)) / n_regions;
717 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
719 /* A region must have at least 2 pages; one code, one guard */
720 g_assert(region_size >= 2 * page_size);
722 /* init the region struct */
723 qemu_mutex_init(&region.lock);
724 region.n = n_regions;
725 region.size = region_size - page_size;
726 region.stride = region_size;
727 region.start = buf;
728 region.start_aligned = aligned;
729 /* page-align the end, since its last page will be a guard page */
730 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
731 /* account for that last guard page */
732 region.end -= page_size;
734 /* set guard pages */
735 splitwx_diff = tcg_splitwx_diff;
736 for (i = 0; i < region.n; i++) {
737 void *start, *end;
738 int rc;
740 tcg_region_bounds(i, &start, &end);
741 rc = qemu_mprotect_none(end, page_size);
742 g_assert(!rc);
743 if (splitwx_diff) {
744 rc = qemu_mprotect_none(end + splitwx_diff, page_size);
745 g_assert(!rc);
749 tcg_region_trees_init();
751 /* In user-mode we support only one ctx, so do the initial allocation now */
752 #ifdef CONFIG_USER_ONLY
754 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
756 g_assert(!err);
758 #endif
761 #ifdef CONFIG_DEBUG_TCG
762 const void *tcg_splitwx_to_rx(void *rw)
764 /* Pass NULL pointers unchanged. */
765 if (rw) {
766 g_assert(in_code_gen_buffer(rw));
767 rw += tcg_splitwx_diff;
769 return rw;
772 void *tcg_splitwx_to_rw(const void *rx)
774 /* Pass NULL pointers unchanged. */
775 if (rx) {
776 rx -= tcg_splitwx_diff;
777 /* Assert that we end with a pointer in the rw region. */
778 g_assert(in_code_gen_buffer(rx));
780 return (void *)rx;
782 #endif /* CONFIG_DEBUG_TCG */
784 static void alloc_tcg_plugin_context(TCGContext *s)
786 #ifdef CONFIG_PLUGIN
787 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
788 s->plugin_tb->insns =
789 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
790 #endif
794 * All TCG threads except the parent (i.e. the one that called tcg_context_init
795 * and registered the target's TCG globals) must register with this function
796 * before initiating translation.
798 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
799 * of tcg_region_init() for the reasoning behind this.
801 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
802 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
803 * is not used anymore for translation once this function is called.
805 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
806 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
808 #ifdef CONFIG_USER_ONLY
809 void tcg_register_thread(void)
811 tcg_ctx = &tcg_init_ctx;
813 #else
814 void tcg_register_thread(void)
816 MachineState *ms = MACHINE(qdev_get_machine());
817 TCGContext *s = g_malloc(sizeof(*s));
818 unsigned int i, n;
819 bool err;
821 *s = tcg_init_ctx;
823 /* Relink mem_base. */
824 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
825 if (tcg_init_ctx.temps[i].mem_base) {
826 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
827 tcg_debug_assert(b >= 0 && b < n);
828 s->temps[i].mem_base = &s->temps[b];
832 /* Claim an entry in tcg_ctxs */
833 n = qatomic_fetch_inc(&n_tcg_ctxs);
834 g_assert(n < ms->smp.max_cpus);
835 qatomic_set(&tcg_ctxs[n], s);
837 if (n > 0) {
838 alloc_tcg_plugin_context(s);
841 tcg_ctx = s;
842 qemu_mutex_lock(&region.lock);
843 err = tcg_region_initial_alloc__locked(tcg_ctx);
844 g_assert(!err);
845 qemu_mutex_unlock(&region.lock);
847 #endif /* !CONFIG_USER_ONLY */
850 * Returns the size (in bytes) of all translated code (i.e. from all regions)
851 * currently in the cache.
852 * See also: tcg_code_capacity()
853 * Do not confuse with tcg_current_code_size(); that one applies to a single
854 * TCG context.
856 size_t tcg_code_size(void)
858 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
859 unsigned int i;
860 size_t total;
862 qemu_mutex_lock(&region.lock);
863 total = region.agg_size_full;
864 for (i = 0; i < n_ctxs; i++) {
865 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
866 size_t size;
868 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
869 g_assert(size <= s->code_gen_buffer_size);
870 total += size;
872 qemu_mutex_unlock(&region.lock);
873 return total;
877 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
878 * regions.
879 * See also: tcg_code_size()
881 size_t tcg_code_capacity(void)
883 size_t guard_size, capacity;
885 /* no need for synchronization; these variables are set at init time */
886 guard_size = region.stride - region.size;
887 capacity = region.end + guard_size - region.start;
888 capacity -= region.n * (guard_size + TCG_HIGHWATER);
889 return capacity;
892 size_t tcg_tb_phys_invalidate_count(void)
894 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
895 unsigned int i;
896 size_t total = 0;
898 for (i = 0; i < n_ctxs; i++) {
899 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
901 total += qatomic_read(&s->tb_phys_invalidate_count);
903 return total;
906 /* pool based memory allocation */
907 void *tcg_malloc_internal(TCGContext *s, int size)
909 TCGPool *p;
910 int pool_size;
912 if (size > TCG_POOL_CHUNK_SIZE) {
913 /* big malloc: insert a new pool (XXX: could optimize) */
914 p = g_malloc(sizeof(TCGPool) + size);
915 p->size = size;
916 p->next = s->pool_first_large;
917 s->pool_first_large = p;
918 return p->data;
919 } else {
920 p = s->pool_current;
921 if (!p) {
922 p = s->pool_first;
923 if (!p)
924 goto new_pool;
925 } else {
926 if (!p->next) {
927 new_pool:
928 pool_size = TCG_POOL_CHUNK_SIZE;
929 p = g_malloc(sizeof(TCGPool) + pool_size);
930 p->size = pool_size;
931 p->next = NULL;
932 if (s->pool_current)
933 s->pool_current->next = p;
934 else
935 s->pool_first = p;
936 } else {
937 p = p->next;
941 s->pool_current = p;
942 s->pool_cur = p->data + size;
943 s->pool_end = p->data + p->size;
944 return p->data;
947 void tcg_pool_reset(TCGContext *s)
949 TCGPool *p, *t;
950 for (p = s->pool_first_large; p; p = t) {
951 t = p->next;
952 g_free(p);
954 s->pool_first_large = NULL;
955 s->pool_cur = s->pool_end = NULL;
956 s->pool_current = NULL;
959 typedef struct TCGHelperInfo {
960 void *func;
961 const char *name;
962 unsigned flags;
963 unsigned sizemask;
964 } TCGHelperInfo;
966 #include "exec/helper-proto.h"
968 static const TCGHelperInfo all_helpers[] = {
969 #include "exec/helper-tcg.h"
971 static GHashTable *helper_table;
973 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
974 static void process_op_defs(TCGContext *s);
975 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
976 TCGReg reg, const char *name);
978 void tcg_context_init(TCGContext *s)
980 int op, total_args, n, i;
981 TCGOpDef *def;
982 TCGArgConstraint *args_ct;
983 TCGTemp *ts;
985 memset(s, 0, sizeof(*s));
986 s->nb_globals = 0;
988 /* Count total number of arguments and allocate the corresponding
989 space */
990 total_args = 0;
991 for(op = 0; op < NB_OPS; op++) {
992 def = &tcg_op_defs[op];
993 n = def->nb_iargs + def->nb_oargs;
994 total_args += n;
997 args_ct = g_new0(TCGArgConstraint, total_args);
999 for(op = 0; op < NB_OPS; op++) {
1000 def = &tcg_op_defs[op];
1001 def->args_ct = args_ct;
1002 n = def->nb_iargs + def->nb_oargs;
1003 args_ct += n;
1006 /* Register helpers. */
1007 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
1008 helper_table = g_hash_table_new(NULL, NULL);
1010 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1011 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1012 (gpointer)&all_helpers[i]);
1015 tcg_target_init(s);
1016 process_op_defs(s);
1018 /* Reverse the order of the saved registers, assuming they're all at
1019 the start of tcg_target_reg_alloc_order. */
1020 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1021 int r = tcg_target_reg_alloc_order[n];
1022 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1023 break;
1026 for (i = 0; i < n; ++i) {
1027 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1029 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1030 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1033 alloc_tcg_plugin_context(s);
1035 tcg_ctx = s;
1037 * In user-mode we simply share the init context among threads, since we
1038 * use a single region. See the documentation tcg_region_init() for the
1039 * reasoning behind this.
1040 * In softmmu we will have at most max_cpus TCG threads.
1042 #ifdef CONFIG_USER_ONLY
1043 tcg_ctxs = &tcg_ctx;
1044 n_tcg_ctxs = 1;
1045 #else
1046 MachineState *ms = MACHINE(qdev_get_machine());
1047 unsigned int max_cpus = ms->smp.max_cpus;
1048 tcg_ctxs = g_new(TCGContext *, max_cpus);
1049 #endif
1051 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1052 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1053 cpu_env = temp_tcgv_ptr(ts);
1057 * Allocate TBs right before their corresponding translated code, making
1058 * sure that TBs and code are on different cache lines.
1060 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1062 uintptr_t align = qemu_icache_linesize;
1063 TranslationBlock *tb;
1064 void *next;
1066 retry:
1067 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1068 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1070 if (unlikely(next > s->code_gen_highwater)) {
1071 if (tcg_region_alloc(s)) {
1072 return NULL;
1074 goto retry;
1076 qatomic_set(&s->code_gen_ptr, next);
1077 s->data_gen_ptr = NULL;
1078 return tb;
1081 void tcg_prologue_init(TCGContext *s)
1083 size_t prologue_size, total_size;
1084 void *buf0, *buf1;
1086 /* Put the prologue at the beginning of code_gen_buffer. */
1087 buf0 = s->code_gen_buffer;
1088 total_size = s->code_gen_buffer_size;
1089 s->code_ptr = buf0;
1090 s->code_buf = buf0;
1091 s->data_gen_ptr = NULL;
1094 * The region trees are not yet configured, but tcg_splitwx_to_rx
1095 * needs the bounds for an assert.
1097 region.start = buf0;
1098 region.end = buf0 + total_size;
1100 #ifndef CONFIG_TCG_INTERPRETER
1101 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1102 #endif
1104 /* Compute a high-water mark, at which we voluntarily flush the buffer
1105 and start over. The size here is arbitrary, significantly larger
1106 than we expect the code generation for any one opcode to require. */
1107 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1109 #ifdef TCG_TARGET_NEED_POOL_LABELS
1110 s->pool_labels = NULL;
1111 #endif
1113 qemu_thread_jit_write();
1114 /* Generate the prologue. */
1115 tcg_target_qemu_prologue(s);
1117 #ifdef TCG_TARGET_NEED_POOL_LABELS
1118 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1120 int result = tcg_out_pool_finalize(s);
1121 tcg_debug_assert(result == 0);
1123 #endif
1125 buf1 = s->code_ptr;
1126 #ifndef CONFIG_TCG_INTERPRETER
1127 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1128 tcg_ptr_byte_diff(buf1, buf0));
1129 #endif
1131 /* Deduct the prologue from the buffer. */
1132 prologue_size = tcg_current_code_size(s);
1133 s->code_gen_ptr = buf1;
1134 s->code_gen_buffer = buf1;
1135 s->code_buf = buf1;
1136 total_size -= prologue_size;
1137 s->code_gen_buffer_size = total_size;
1139 tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1141 #ifdef DEBUG_DISAS
1142 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1143 FILE *logfile = qemu_log_lock();
1144 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1145 if (s->data_gen_ptr) {
1146 size_t code_size = s->data_gen_ptr - buf0;
1147 size_t data_size = prologue_size - code_size;
1148 size_t i;
1150 log_disas(buf0, code_size);
1152 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1153 if (sizeof(tcg_target_ulong) == 8) {
1154 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1155 (uintptr_t)s->data_gen_ptr + i,
1156 *(uint64_t *)(s->data_gen_ptr + i));
1157 } else {
1158 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
1159 (uintptr_t)s->data_gen_ptr + i,
1160 *(uint32_t *)(s->data_gen_ptr + i));
1163 } else {
1164 log_disas(buf0, prologue_size);
1166 qemu_log("\n");
1167 qemu_log_flush();
1168 qemu_log_unlock(logfile);
1170 #endif
1172 /* Assert that goto_ptr is implemented completely. */
1173 if (TCG_TARGET_HAS_goto_ptr) {
1174 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1178 void tcg_func_start(TCGContext *s)
1180 tcg_pool_reset(s);
1181 s->nb_temps = s->nb_globals;
1183 /* No temps have been previously allocated for size or locality. */
1184 memset(s->free_temps, 0, sizeof(s->free_temps));
1186 /* No constant temps have been previously allocated. */
1187 for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1188 if (s->const_table[i]) {
1189 g_hash_table_remove_all(s->const_table[i]);
1193 s->nb_ops = 0;
1194 s->nb_labels = 0;
1195 s->current_frame_offset = s->frame_start;
1197 #ifdef CONFIG_DEBUG_TCG
1198 s->goto_tb_issue_mask = 0;
1199 #endif
1201 QTAILQ_INIT(&s->ops);
1202 QTAILQ_INIT(&s->free_ops);
1203 QSIMPLEQ_INIT(&s->labels);
1206 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1208 int n = s->nb_temps++;
1210 if (n >= TCG_MAX_TEMPS) {
1211 /* Signal overflow, starting over with fewer guest insns. */
1212 siglongjmp(s->jmp_trans, -2);
1214 return memset(&s->temps[n], 0, sizeof(TCGTemp));
1217 static TCGTemp *tcg_global_alloc(TCGContext *s)
1219 TCGTemp *ts;
1221 tcg_debug_assert(s->nb_globals == s->nb_temps);
1222 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1223 s->nb_globals++;
1224 ts = tcg_temp_alloc(s);
1225 ts->kind = TEMP_GLOBAL;
1227 return ts;
1230 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1231 TCGReg reg, const char *name)
1233 TCGTemp *ts;
1235 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1236 tcg_abort();
1239 ts = tcg_global_alloc(s);
1240 ts->base_type = type;
1241 ts->type = type;
1242 ts->kind = TEMP_FIXED;
1243 ts->reg = reg;
1244 ts->name = name;
1245 tcg_regset_set_reg(s->reserved_regs, reg);
1247 return ts;
1250 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1252 s->frame_start = start;
1253 s->frame_end = start + size;
1254 s->frame_temp
1255 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1258 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1259 intptr_t offset, const char *name)
1261 TCGContext *s = tcg_ctx;
1262 TCGTemp *base_ts = tcgv_ptr_temp(base);
1263 TCGTemp *ts = tcg_global_alloc(s);
1264 int indirect_reg = 0, bigendian = 0;
1265 #ifdef HOST_WORDS_BIGENDIAN
1266 bigendian = 1;
1267 #endif
1269 switch (base_ts->kind) {
1270 case TEMP_FIXED:
1271 break;
1272 case TEMP_GLOBAL:
1273 /* We do not support double-indirect registers. */
1274 tcg_debug_assert(!base_ts->indirect_reg);
1275 base_ts->indirect_base = 1;
1276 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1277 ? 2 : 1);
1278 indirect_reg = 1;
1279 break;
1280 default:
1281 g_assert_not_reached();
1284 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1285 TCGTemp *ts2 = tcg_global_alloc(s);
1286 char buf[64];
1288 ts->base_type = TCG_TYPE_I64;
1289 ts->type = TCG_TYPE_I32;
1290 ts->indirect_reg = indirect_reg;
1291 ts->mem_allocated = 1;
1292 ts->mem_base = base_ts;
1293 ts->mem_offset = offset + bigendian * 4;
1294 pstrcpy(buf, sizeof(buf), name);
1295 pstrcat(buf, sizeof(buf), "_0");
1296 ts->name = strdup(buf);
1298 tcg_debug_assert(ts2 == ts + 1);
1299 ts2->base_type = TCG_TYPE_I64;
1300 ts2->type = TCG_TYPE_I32;
1301 ts2->indirect_reg = indirect_reg;
1302 ts2->mem_allocated = 1;
1303 ts2->mem_base = base_ts;
1304 ts2->mem_offset = offset + (1 - bigendian) * 4;
1305 pstrcpy(buf, sizeof(buf), name);
1306 pstrcat(buf, sizeof(buf), "_1");
1307 ts2->name = strdup(buf);
1308 } else {
1309 ts->base_type = type;
1310 ts->type = type;
1311 ts->indirect_reg = indirect_reg;
1312 ts->mem_allocated = 1;
1313 ts->mem_base = base_ts;
1314 ts->mem_offset = offset;
1315 ts->name = name;
1317 return ts;
1320 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1322 TCGContext *s = tcg_ctx;
1323 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1324 TCGTemp *ts;
1325 int idx, k;
1327 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1328 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1329 if (idx < TCG_MAX_TEMPS) {
1330 /* There is already an available temp with the right type. */
1331 clear_bit(idx, s->free_temps[k].l);
1333 ts = &s->temps[idx];
1334 ts->temp_allocated = 1;
1335 tcg_debug_assert(ts->base_type == type);
1336 tcg_debug_assert(ts->kind == kind);
1337 } else {
1338 ts = tcg_temp_alloc(s);
1339 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1340 TCGTemp *ts2 = tcg_temp_alloc(s);
1342 ts->base_type = type;
1343 ts->type = TCG_TYPE_I32;
1344 ts->temp_allocated = 1;
1345 ts->kind = kind;
1347 tcg_debug_assert(ts2 == ts + 1);
1348 ts2->base_type = TCG_TYPE_I64;
1349 ts2->type = TCG_TYPE_I32;
1350 ts2->temp_allocated = 1;
1351 ts2->kind = kind;
1352 } else {
1353 ts->base_type = type;
1354 ts->type = type;
1355 ts->temp_allocated = 1;
1356 ts->kind = kind;
1360 #if defined(CONFIG_DEBUG_TCG)
1361 s->temps_in_use++;
1362 #endif
1363 return ts;
1366 TCGv_vec tcg_temp_new_vec(TCGType type)
1368 TCGTemp *t;
1370 #ifdef CONFIG_DEBUG_TCG
1371 switch (type) {
1372 case TCG_TYPE_V64:
1373 assert(TCG_TARGET_HAS_v64);
1374 break;
1375 case TCG_TYPE_V128:
1376 assert(TCG_TARGET_HAS_v128);
1377 break;
1378 case TCG_TYPE_V256:
1379 assert(TCG_TARGET_HAS_v256);
1380 break;
1381 default:
1382 g_assert_not_reached();
1384 #endif
1386 t = tcg_temp_new_internal(type, 0);
1387 return temp_tcgv_vec(t);
1390 /* Create a new temp of the same type as an existing temp. */
1391 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1393 TCGTemp *t = tcgv_vec_temp(match);
1395 tcg_debug_assert(t->temp_allocated != 0);
1397 t = tcg_temp_new_internal(t->base_type, 0);
1398 return temp_tcgv_vec(t);
1401 void tcg_temp_free_internal(TCGTemp *ts)
1403 TCGContext *s = tcg_ctx;
1404 int k, idx;
1406 /* In order to simplify users of tcg_constant_*, silently ignore free. */
1407 if (ts->kind == TEMP_CONST) {
1408 return;
1411 #if defined(CONFIG_DEBUG_TCG)
1412 s->temps_in_use--;
1413 if (s->temps_in_use < 0) {
1414 fprintf(stderr, "More temporaries freed than allocated!\n");
1416 #endif
1418 tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1419 tcg_debug_assert(ts->temp_allocated != 0);
1420 ts->temp_allocated = 0;
1422 idx = temp_idx(ts);
1423 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1424 set_bit(idx, s->free_temps[k].l);
1427 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1429 TCGContext *s = tcg_ctx;
1430 GHashTable *h = s->const_table[type];
1431 TCGTemp *ts;
1433 if (h == NULL) {
1434 h = g_hash_table_new(g_int64_hash, g_int64_equal);
1435 s->const_table[type] = h;
1438 ts = g_hash_table_lookup(h, &val);
1439 if (ts == NULL) {
1440 ts = tcg_temp_alloc(s);
1442 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1443 TCGTemp *ts2 = tcg_temp_alloc(s);
1445 ts->base_type = TCG_TYPE_I64;
1446 ts->type = TCG_TYPE_I32;
1447 ts->kind = TEMP_CONST;
1448 ts->temp_allocated = 1;
1450 * Retain the full value of the 64-bit constant in the low
1451 * part, so that the hash table works. Actual uses will
1452 * truncate the value to the low part.
1454 ts->val = val;
1456 tcg_debug_assert(ts2 == ts + 1);
1457 ts2->base_type = TCG_TYPE_I64;
1458 ts2->type = TCG_TYPE_I32;
1459 ts2->kind = TEMP_CONST;
1460 ts2->temp_allocated = 1;
1461 ts2->val = val >> 32;
1462 } else {
1463 ts->base_type = type;
1464 ts->type = type;
1465 ts->kind = TEMP_CONST;
1466 ts->temp_allocated = 1;
1467 ts->val = val;
1469 g_hash_table_insert(h, &ts->val, ts);
1472 return ts;
1475 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1477 val = dup_const(vece, val);
1478 return temp_tcgv_vec(tcg_constant_internal(type, val));
1481 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1483 TCGTemp *t = tcgv_vec_temp(match);
1485 tcg_debug_assert(t->temp_allocated != 0);
1486 return tcg_constant_vec(t->base_type, vece, val);
1489 TCGv_i32 tcg_const_i32(int32_t val)
1491 TCGv_i32 t0;
1492 t0 = tcg_temp_new_i32();
1493 tcg_gen_movi_i32(t0, val);
1494 return t0;
1497 TCGv_i64 tcg_const_i64(int64_t val)
1499 TCGv_i64 t0;
1500 t0 = tcg_temp_new_i64();
1501 tcg_gen_movi_i64(t0, val);
1502 return t0;
1505 TCGv_i32 tcg_const_local_i32(int32_t val)
1507 TCGv_i32 t0;
1508 t0 = tcg_temp_local_new_i32();
1509 tcg_gen_movi_i32(t0, val);
1510 return t0;
1513 TCGv_i64 tcg_const_local_i64(int64_t val)
1515 TCGv_i64 t0;
1516 t0 = tcg_temp_local_new_i64();
1517 tcg_gen_movi_i64(t0, val);
1518 return t0;
1521 #if defined(CONFIG_DEBUG_TCG)
1522 void tcg_clear_temp_count(void)
1524 TCGContext *s = tcg_ctx;
1525 s->temps_in_use = 0;
1528 int tcg_check_temp_count(void)
1530 TCGContext *s = tcg_ctx;
1531 if (s->temps_in_use) {
1532 /* Clear the count so that we don't give another
1533 * warning immediately next time around.
1535 s->temps_in_use = 0;
1536 return 1;
1538 return 0;
1540 #endif
1542 /* Return true if OP may appear in the opcode stream.
1543 Test the runtime variable that controls each opcode. */
1544 bool tcg_op_supported(TCGOpcode op)
1546 const bool have_vec
1547 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1549 switch (op) {
1550 case INDEX_op_discard:
1551 case INDEX_op_set_label:
1552 case INDEX_op_call:
1553 case INDEX_op_br:
1554 case INDEX_op_mb:
1555 case INDEX_op_insn_start:
1556 case INDEX_op_exit_tb:
1557 case INDEX_op_goto_tb:
1558 case INDEX_op_qemu_ld_i32:
1559 case INDEX_op_qemu_st_i32:
1560 case INDEX_op_qemu_ld_i64:
1561 case INDEX_op_qemu_st_i64:
1562 return true;
1564 case INDEX_op_qemu_st8_i32:
1565 return TCG_TARGET_HAS_qemu_st8_i32;
1567 case INDEX_op_goto_ptr:
1568 return TCG_TARGET_HAS_goto_ptr;
1570 case INDEX_op_mov_i32:
1571 case INDEX_op_setcond_i32:
1572 case INDEX_op_brcond_i32:
1573 case INDEX_op_ld8u_i32:
1574 case INDEX_op_ld8s_i32:
1575 case INDEX_op_ld16u_i32:
1576 case INDEX_op_ld16s_i32:
1577 case INDEX_op_ld_i32:
1578 case INDEX_op_st8_i32:
1579 case INDEX_op_st16_i32:
1580 case INDEX_op_st_i32:
1581 case INDEX_op_add_i32:
1582 case INDEX_op_sub_i32:
1583 case INDEX_op_mul_i32:
1584 case INDEX_op_and_i32:
1585 case INDEX_op_or_i32:
1586 case INDEX_op_xor_i32:
1587 case INDEX_op_shl_i32:
1588 case INDEX_op_shr_i32:
1589 case INDEX_op_sar_i32:
1590 return true;
1592 case INDEX_op_movcond_i32:
1593 return TCG_TARGET_HAS_movcond_i32;
1594 case INDEX_op_div_i32:
1595 case INDEX_op_divu_i32:
1596 return TCG_TARGET_HAS_div_i32;
1597 case INDEX_op_rem_i32:
1598 case INDEX_op_remu_i32:
1599 return TCG_TARGET_HAS_rem_i32;
1600 case INDEX_op_div2_i32:
1601 case INDEX_op_divu2_i32:
1602 return TCG_TARGET_HAS_div2_i32;
1603 case INDEX_op_rotl_i32:
1604 case INDEX_op_rotr_i32:
1605 return TCG_TARGET_HAS_rot_i32;
1606 case INDEX_op_deposit_i32:
1607 return TCG_TARGET_HAS_deposit_i32;
1608 case INDEX_op_extract_i32:
1609 return TCG_TARGET_HAS_extract_i32;
1610 case INDEX_op_sextract_i32:
1611 return TCG_TARGET_HAS_sextract_i32;
1612 case INDEX_op_extract2_i32:
1613 return TCG_TARGET_HAS_extract2_i32;
1614 case INDEX_op_add2_i32:
1615 return TCG_TARGET_HAS_add2_i32;
1616 case INDEX_op_sub2_i32:
1617 return TCG_TARGET_HAS_sub2_i32;
1618 case INDEX_op_mulu2_i32:
1619 return TCG_TARGET_HAS_mulu2_i32;
1620 case INDEX_op_muls2_i32:
1621 return TCG_TARGET_HAS_muls2_i32;
1622 case INDEX_op_muluh_i32:
1623 return TCG_TARGET_HAS_muluh_i32;
1624 case INDEX_op_mulsh_i32:
1625 return TCG_TARGET_HAS_mulsh_i32;
1626 case INDEX_op_ext8s_i32:
1627 return TCG_TARGET_HAS_ext8s_i32;
1628 case INDEX_op_ext16s_i32:
1629 return TCG_TARGET_HAS_ext16s_i32;
1630 case INDEX_op_ext8u_i32:
1631 return TCG_TARGET_HAS_ext8u_i32;
1632 case INDEX_op_ext16u_i32:
1633 return TCG_TARGET_HAS_ext16u_i32;
1634 case INDEX_op_bswap16_i32:
1635 return TCG_TARGET_HAS_bswap16_i32;
1636 case INDEX_op_bswap32_i32:
1637 return TCG_TARGET_HAS_bswap32_i32;
1638 case INDEX_op_not_i32:
1639 return TCG_TARGET_HAS_not_i32;
1640 case INDEX_op_neg_i32:
1641 return TCG_TARGET_HAS_neg_i32;
1642 case INDEX_op_andc_i32:
1643 return TCG_TARGET_HAS_andc_i32;
1644 case INDEX_op_orc_i32:
1645 return TCG_TARGET_HAS_orc_i32;
1646 case INDEX_op_eqv_i32:
1647 return TCG_TARGET_HAS_eqv_i32;
1648 case INDEX_op_nand_i32:
1649 return TCG_TARGET_HAS_nand_i32;
1650 case INDEX_op_nor_i32:
1651 return TCG_TARGET_HAS_nor_i32;
1652 case INDEX_op_clz_i32:
1653 return TCG_TARGET_HAS_clz_i32;
1654 case INDEX_op_ctz_i32:
1655 return TCG_TARGET_HAS_ctz_i32;
1656 case INDEX_op_ctpop_i32:
1657 return TCG_TARGET_HAS_ctpop_i32;
1659 case INDEX_op_brcond2_i32:
1660 case INDEX_op_setcond2_i32:
1661 return TCG_TARGET_REG_BITS == 32;
1663 case INDEX_op_mov_i64:
1664 case INDEX_op_setcond_i64:
1665 case INDEX_op_brcond_i64:
1666 case INDEX_op_ld8u_i64:
1667 case INDEX_op_ld8s_i64:
1668 case INDEX_op_ld16u_i64:
1669 case INDEX_op_ld16s_i64:
1670 case INDEX_op_ld32u_i64:
1671 case INDEX_op_ld32s_i64:
1672 case INDEX_op_ld_i64:
1673 case INDEX_op_st8_i64:
1674 case INDEX_op_st16_i64:
1675 case INDEX_op_st32_i64:
1676 case INDEX_op_st_i64:
1677 case INDEX_op_add_i64:
1678 case INDEX_op_sub_i64:
1679 case INDEX_op_mul_i64:
1680 case INDEX_op_and_i64:
1681 case INDEX_op_or_i64:
1682 case INDEX_op_xor_i64:
1683 case INDEX_op_shl_i64:
1684 case INDEX_op_shr_i64:
1685 case INDEX_op_sar_i64:
1686 case INDEX_op_ext_i32_i64:
1687 case INDEX_op_extu_i32_i64:
1688 return TCG_TARGET_REG_BITS == 64;
1690 case INDEX_op_movcond_i64:
1691 return TCG_TARGET_HAS_movcond_i64;
1692 case INDEX_op_div_i64:
1693 case INDEX_op_divu_i64:
1694 return TCG_TARGET_HAS_div_i64;
1695 case INDEX_op_rem_i64:
1696 case INDEX_op_remu_i64:
1697 return TCG_TARGET_HAS_rem_i64;
1698 case INDEX_op_div2_i64:
1699 case INDEX_op_divu2_i64:
1700 return TCG_TARGET_HAS_div2_i64;
1701 case INDEX_op_rotl_i64:
1702 case INDEX_op_rotr_i64:
1703 return TCG_TARGET_HAS_rot_i64;
1704 case INDEX_op_deposit_i64:
1705 return TCG_TARGET_HAS_deposit_i64;
1706 case INDEX_op_extract_i64:
1707 return TCG_TARGET_HAS_extract_i64;
1708 case INDEX_op_sextract_i64:
1709 return TCG_TARGET_HAS_sextract_i64;
1710 case INDEX_op_extract2_i64:
1711 return TCG_TARGET_HAS_extract2_i64;
1712 case INDEX_op_extrl_i64_i32:
1713 return TCG_TARGET_HAS_extrl_i64_i32;
1714 case INDEX_op_extrh_i64_i32:
1715 return TCG_TARGET_HAS_extrh_i64_i32;
1716 case INDEX_op_ext8s_i64:
1717 return TCG_TARGET_HAS_ext8s_i64;
1718 case INDEX_op_ext16s_i64:
1719 return TCG_TARGET_HAS_ext16s_i64;
1720 case INDEX_op_ext32s_i64:
1721 return TCG_TARGET_HAS_ext32s_i64;
1722 case INDEX_op_ext8u_i64:
1723 return TCG_TARGET_HAS_ext8u_i64;
1724 case INDEX_op_ext16u_i64:
1725 return TCG_TARGET_HAS_ext16u_i64;
1726 case INDEX_op_ext32u_i64:
1727 return TCG_TARGET_HAS_ext32u_i64;
1728 case INDEX_op_bswap16_i64:
1729 return TCG_TARGET_HAS_bswap16_i64;
1730 case INDEX_op_bswap32_i64:
1731 return TCG_TARGET_HAS_bswap32_i64;
1732 case INDEX_op_bswap64_i64:
1733 return TCG_TARGET_HAS_bswap64_i64;
1734 case INDEX_op_not_i64:
1735 return TCG_TARGET_HAS_not_i64;
1736 case INDEX_op_neg_i64:
1737 return TCG_TARGET_HAS_neg_i64;
1738 case INDEX_op_andc_i64:
1739 return TCG_TARGET_HAS_andc_i64;
1740 case INDEX_op_orc_i64:
1741 return TCG_TARGET_HAS_orc_i64;
1742 case INDEX_op_eqv_i64:
1743 return TCG_TARGET_HAS_eqv_i64;
1744 case INDEX_op_nand_i64:
1745 return TCG_TARGET_HAS_nand_i64;
1746 case INDEX_op_nor_i64:
1747 return TCG_TARGET_HAS_nor_i64;
1748 case INDEX_op_clz_i64:
1749 return TCG_TARGET_HAS_clz_i64;
1750 case INDEX_op_ctz_i64:
1751 return TCG_TARGET_HAS_ctz_i64;
1752 case INDEX_op_ctpop_i64:
1753 return TCG_TARGET_HAS_ctpop_i64;
1754 case INDEX_op_add2_i64:
1755 return TCG_TARGET_HAS_add2_i64;
1756 case INDEX_op_sub2_i64:
1757 return TCG_TARGET_HAS_sub2_i64;
1758 case INDEX_op_mulu2_i64:
1759 return TCG_TARGET_HAS_mulu2_i64;
1760 case INDEX_op_muls2_i64:
1761 return TCG_TARGET_HAS_muls2_i64;
1762 case INDEX_op_muluh_i64:
1763 return TCG_TARGET_HAS_muluh_i64;
1764 case INDEX_op_mulsh_i64:
1765 return TCG_TARGET_HAS_mulsh_i64;
1767 case INDEX_op_mov_vec:
1768 case INDEX_op_dup_vec:
1769 case INDEX_op_dupm_vec:
1770 case INDEX_op_ld_vec:
1771 case INDEX_op_st_vec:
1772 case INDEX_op_add_vec:
1773 case INDEX_op_sub_vec:
1774 case INDEX_op_and_vec:
1775 case INDEX_op_or_vec:
1776 case INDEX_op_xor_vec:
1777 case INDEX_op_cmp_vec:
1778 return have_vec;
1779 case INDEX_op_dup2_vec:
1780 return have_vec && TCG_TARGET_REG_BITS == 32;
1781 case INDEX_op_not_vec:
1782 return have_vec && TCG_TARGET_HAS_not_vec;
1783 case INDEX_op_neg_vec:
1784 return have_vec && TCG_TARGET_HAS_neg_vec;
1785 case INDEX_op_abs_vec:
1786 return have_vec && TCG_TARGET_HAS_abs_vec;
1787 case INDEX_op_andc_vec:
1788 return have_vec && TCG_TARGET_HAS_andc_vec;
1789 case INDEX_op_orc_vec:
1790 return have_vec && TCG_TARGET_HAS_orc_vec;
1791 case INDEX_op_mul_vec:
1792 return have_vec && TCG_TARGET_HAS_mul_vec;
1793 case INDEX_op_shli_vec:
1794 case INDEX_op_shri_vec:
1795 case INDEX_op_sari_vec:
1796 return have_vec && TCG_TARGET_HAS_shi_vec;
1797 case INDEX_op_shls_vec:
1798 case INDEX_op_shrs_vec:
1799 case INDEX_op_sars_vec:
1800 return have_vec && TCG_TARGET_HAS_shs_vec;
1801 case INDEX_op_shlv_vec:
1802 case INDEX_op_shrv_vec:
1803 case INDEX_op_sarv_vec:
1804 return have_vec && TCG_TARGET_HAS_shv_vec;
1805 case INDEX_op_rotli_vec:
1806 return have_vec && TCG_TARGET_HAS_roti_vec;
1807 case INDEX_op_rotls_vec:
1808 return have_vec && TCG_TARGET_HAS_rots_vec;
1809 case INDEX_op_rotlv_vec:
1810 case INDEX_op_rotrv_vec:
1811 return have_vec && TCG_TARGET_HAS_rotv_vec;
1812 case INDEX_op_ssadd_vec:
1813 case INDEX_op_usadd_vec:
1814 case INDEX_op_sssub_vec:
1815 case INDEX_op_ussub_vec:
1816 return have_vec && TCG_TARGET_HAS_sat_vec;
1817 case INDEX_op_smin_vec:
1818 case INDEX_op_umin_vec:
1819 case INDEX_op_smax_vec:
1820 case INDEX_op_umax_vec:
1821 return have_vec && TCG_TARGET_HAS_minmax_vec;
1822 case INDEX_op_bitsel_vec:
1823 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1824 case INDEX_op_cmpsel_vec:
1825 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1827 default:
1828 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1829 return true;
1833 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1834 and endian swap. Maybe it would be better to do the alignment
1835 and endian swap in tcg_reg_alloc_call(). */
1836 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1838 int i, real_args, nb_rets, pi;
1839 unsigned sizemask, flags;
1840 TCGHelperInfo *info;
1841 TCGOp *op;
1843 info = g_hash_table_lookup(helper_table, (gpointer)func);
1844 flags = info->flags;
1845 sizemask = info->sizemask;
1847 #ifdef CONFIG_PLUGIN
1848 /* detect non-plugin helpers */
1849 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1850 tcg_ctx->plugin_insn->calls_helpers = true;
1852 #endif
1854 #if defined(__sparc__) && !defined(__arch64__) \
1855 && !defined(CONFIG_TCG_INTERPRETER)
1856 /* We have 64-bit values in one register, but need to pass as two
1857 separate parameters. Split them. */
1858 int orig_sizemask = sizemask;
1859 int orig_nargs = nargs;
1860 TCGv_i64 retl, reth;
1861 TCGTemp *split_args[MAX_OPC_PARAM];
1863 retl = NULL;
1864 reth = NULL;
1865 if (sizemask != 0) {
1866 for (i = real_args = 0; i < nargs; ++i) {
1867 int is_64bit = sizemask & (1 << (i+1)*2);
1868 if (is_64bit) {
1869 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1870 TCGv_i32 h = tcg_temp_new_i32();
1871 TCGv_i32 l = tcg_temp_new_i32();
1872 tcg_gen_extr_i64_i32(l, h, orig);
1873 split_args[real_args++] = tcgv_i32_temp(h);
1874 split_args[real_args++] = tcgv_i32_temp(l);
1875 } else {
1876 split_args[real_args++] = args[i];
1879 nargs = real_args;
1880 args = split_args;
1881 sizemask = 0;
1883 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1884 for (i = 0; i < nargs; ++i) {
1885 int is_64bit = sizemask & (1 << (i+1)*2);
1886 int is_signed = sizemask & (2 << (i+1)*2);
1887 if (!is_64bit) {
1888 TCGv_i64 temp = tcg_temp_new_i64();
1889 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1890 if (is_signed) {
1891 tcg_gen_ext32s_i64(temp, orig);
1892 } else {
1893 tcg_gen_ext32u_i64(temp, orig);
1895 args[i] = tcgv_i64_temp(temp);
1898 #endif /* TCG_TARGET_EXTEND_ARGS */
1900 op = tcg_emit_op(INDEX_op_call);
1902 pi = 0;
1903 if (ret != NULL) {
1904 #if defined(__sparc__) && !defined(__arch64__) \
1905 && !defined(CONFIG_TCG_INTERPRETER)
1906 if (orig_sizemask & 1) {
1907 /* The 32-bit ABI is going to return the 64-bit value in
1908 the %o0/%o1 register pair. Prepare for this by using
1909 two return temporaries, and reassemble below. */
1910 retl = tcg_temp_new_i64();
1911 reth = tcg_temp_new_i64();
1912 op->args[pi++] = tcgv_i64_arg(reth);
1913 op->args[pi++] = tcgv_i64_arg(retl);
1914 nb_rets = 2;
1915 } else {
1916 op->args[pi++] = temp_arg(ret);
1917 nb_rets = 1;
1919 #else
1920 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1921 #ifdef HOST_WORDS_BIGENDIAN
1922 op->args[pi++] = temp_arg(ret + 1);
1923 op->args[pi++] = temp_arg(ret);
1924 #else
1925 op->args[pi++] = temp_arg(ret);
1926 op->args[pi++] = temp_arg(ret + 1);
1927 #endif
1928 nb_rets = 2;
1929 } else {
1930 op->args[pi++] = temp_arg(ret);
1931 nb_rets = 1;
1933 #endif
1934 } else {
1935 nb_rets = 0;
1937 TCGOP_CALLO(op) = nb_rets;
1939 real_args = 0;
1940 for (i = 0; i < nargs; i++) {
1941 int is_64bit = sizemask & (1 << (i+1)*2);
1942 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1943 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1944 /* some targets want aligned 64 bit args */
1945 if (real_args & 1) {
1946 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1947 real_args++;
1949 #endif
1950 /* If stack grows up, then we will be placing successive
1951 arguments at lower addresses, which means we need to
1952 reverse the order compared to how we would normally
1953 treat either big or little-endian. For those arguments
1954 that will wind up in registers, this still works for
1955 HPPA (the only current STACK_GROWSUP target) since the
1956 argument registers are *also* allocated in decreasing
1957 order. If another such target is added, this logic may
1958 have to get more complicated to differentiate between
1959 stack arguments and register arguments. */
1960 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1961 op->args[pi++] = temp_arg(args[i] + 1);
1962 op->args[pi++] = temp_arg(args[i]);
1963 #else
1964 op->args[pi++] = temp_arg(args[i]);
1965 op->args[pi++] = temp_arg(args[i] + 1);
1966 #endif
1967 real_args += 2;
1968 continue;
1971 op->args[pi++] = temp_arg(args[i]);
1972 real_args++;
1974 op->args[pi++] = (uintptr_t)func;
1975 op->args[pi++] = flags;
1976 TCGOP_CALLI(op) = real_args;
1978 /* Make sure the fields didn't overflow. */
1979 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1980 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1982 #if defined(__sparc__) && !defined(__arch64__) \
1983 && !defined(CONFIG_TCG_INTERPRETER)
1984 /* Free all of the parts we allocated above. */
1985 for (i = real_args = 0; i < orig_nargs; ++i) {
1986 int is_64bit = orig_sizemask & (1 << (i+1)*2);
1987 if (is_64bit) {
1988 tcg_temp_free_internal(args[real_args++]);
1989 tcg_temp_free_internal(args[real_args++]);
1990 } else {
1991 real_args++;
1994 if (orig_sizemask & 1) {
1995 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1996 Note that describing these as TCGv_i64 eliminates an unnecessary
1997 zero-extension that tcg_gen_concat_i32_i64 would create. */
1998 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1999 tcg_temp_free_i64(retl);
2000 tcg_temp_free_i64(reth);
2002 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2003 for (i = 0; i < nargs; ++i) {
2004 int is_64bit = sizemask & (1 << (i+1)*2);
2005 if (!is_64bit) {
2006 tcg_temp_free_internal(args[i]);
2009 #endif /* TCG_TARGET_EXTEND_ARGS */
2012 static void tcg_reg_alloc_start(TCGContext *s)
2014 int i, n;
2016 for (i = 0, n = s->nb_temps; i < n; i++) {
2017 TCGTemp *ts = &s->temps[i];
2018 TCGTempVal val = TEMP_VAL_MEM;
2020 switch (ts->kind) {
2021 case TEMP_CONST:
2022 val = TEMP_VAL_CONST;
2023 break;
2024 case TEMP_FIXED:
2025 val = TEMP_VAL_REG;
2026 break;
2027 case TEMP_GLOBAL:
2028 break;
2029 case TEMP_NORMAL:
2030 val = TEMP_VAL_DEAD;
2031 /* fall through */
2032 case TEMP_LOCAL:
2033 ts->mem_allocated = 0;
2034 break;
2035 default:
2036 g_assert_not_reached();
2038 ts->val_type = val;
2041 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2044 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2045 TCGTemp *ts)
2047 int idx = temp_idx(ts);
2049 switch (ts->kind) {
2050 case TEMP_FIXED:
2051 case TEMP_GLOBAL:
2052 pstrcpy(buf, buf_size, ts->name);
2053 break;
2054 case TEMP_LOCAL:
2055 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2056 break;
2057 case TEMP_NORMAL:
2058 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2059 break;
2060 case TEMP_CONST:
2061 switch (ts->type) {
2062 case TCG_TYPE_I32:
2063 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2064 break;
2065 #if TCG_TARGET_REG_BITS > 32
2066 case TCG_TYPE_I64:
2067 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2068 break;
2069 #endif
2070 case TCG_TYPE_V64:
2071 case TCG_TYPE_V128:
2072 case TCG_TYPE_V256:
2073 snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2074 64 << (ts->type - TCG_TYPE_V64), ts->val);
2075 break;
2076 default:
2077 g_assert_not_reached();
2079 break;
2081 return buf;
2084 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2085 int buf_size, TCGArg arg)
2087 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2090 /* Find helper name. */
2091 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2093 const char *ret = NULL;
2094 if (helper_table) {
2095 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2096 if (info) {
2097 ret = info->name;
2100 return ret;
2103 static const char * const cond_name[] =
2105 [TCG_COND_NEVER] = "never",
2106 [TCG_COND_ALWAYS] = "always",
2107 [TCG_COND_EQ] = "eq",
2108 [TCG_COND_NE] = "ne",
2109 [TCG_COND_LT] = "lt",
2110 [TCG_COND_GE] = "ge",
2111 [TCG_COND_LE] = "le",
2112 [TCG_COND_GT] = "gt",
2113 [TCG_COND_LTU] = "ltu",
2114 [TCG_COND_GEU] = "geu",
2115 [TCG_COND_LEU] = "leu",
2116 [TCG_COND_GTU] = "gtu"
2119 static const char * const ldst_name[] =
2121 [MO_UB] = "ub",
2122 [MO_SB] = "sb",
2123 [MO_LEUW] = "leuw",
2124 [MO_LESW] = "lesw",
2125 [MO_LEUL] = "leul",
2126 [MO_LESL] = "lesl",
2127 [MO_LEQ] = "leq",
2128 [MO_BEUW] = "beuw",
2129 [MO_BESW] = "besw",
2130 [MO_BEUL] = "beul",
2131 [MO_BESL] = "besl",
2132 [MO_BEQ] = "beq",
2135 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2136 #ifdef TARGET_ALIGNED_ONLY
2137 [MO_UNALN >> MO_ASHIFT] = "un+",
2138 [MO_ALIGN >> MO_ASHIFT] = "",
2139 #else
2140 [MO_UNALN >> MO_ASHIFT] = "",
2141 [MO_ALIGN >> MO_ASHIFT] = "al+",
2142 #endif
2143 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
2144 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
2145 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
2146 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2147 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2148 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2151 static inline bool tcg_regset_single(TCGRegSet d)
2153 return (d & (d - 1)) == 0;
2156 static inline TCGReg tcg_regset_first(TCGRegSet d)
2158 if (TCG_TARGET_NB_REGS <= 32) {
2159 return ctz32(d);
2160 } else {
2161 return ctz64(d);
2165 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2167 char buf[128];
2168 TCGOp *op;
2170 QTAILQ_FOREACH(op, &s->ops, link) {
2171 int i, k, nb_oargs, nb_iargs, nb_cargs;
2172 const TCGOpDef *def;
2173 TCGOpcode c;
2174 int col = 0;
2176 c = op->opc;
2177 def = &tcg_op_defs[c];
2179 if (c == INDEX_op_insn_start) {
2180 nb_oargs = 0;
2181 col += qemu_log("\n ----");
2183 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2184 target_ulong a;
2185 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2186 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2187 #else
2188 a = op->args[i];
2189 #endif
2190 col += qemu_log(" " TARGET_FMT_lx, a);
2192 } else if (c == INDEX_op_call) {
2193 /* variable number of arguments */
2194 nb_oargs = TCGOP_CALLO(op);
2195 nb_iargs = TCGOP_CALLI(op);
2196 nb_cargs = def->nb_cargs;
2198 /* function name, flags, out args */
2199 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2200 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2201 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2202 for (i = 0; i < nb_oargs; i++) {
2203 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2204 op->args[i]));
2206 for (i = 0; i < nb_iargs; i++) {
2207 TCGArg arg = op->args[nb_oargs + i];
2208 const char *t = "<dummy>";
2209 if (arg != TCG_CALL_DUMMY_ARG) {
2210 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2212 col += qemu_log(",%s", t);
2214 } else {
2215 col += qemu_log(" %s ", def->name);
2217 nb_oargs = def->nb_oargs;
2218 nb_iargs = def->nb_iargs;
2219 nb_cargs = def->nb_cargs;
2221 if (def->flags & TCG_OPF_VECTOR) {
2222 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2223 8 << TCGOP_VECE(op));
2226 k = 0;
2227 for (i = 0; i < nb_oargs; i++) {
2228 if (k != 0) {
2229 col += qemu_log(",");
2231 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2232 op->args[k++]));
2234 for (i = 0; i < nb_iargs; i++) {
2235 if (k != 0) {
2236 col += qemu_log(",");
2238 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2239 op->args[k++]));
2241 switch (c) {
2242 case INDEX_op_brcond_i32:
2243 case INDEX_op_setcond_i32:
2244 case INDEX_op_movcond_i32:
2245 case INDEX_op_brcond2_i32:
2246 case INDEX_op_setcond2_i32:
2247 case INDEX_op_brcond_i64:
2248 case INDEX_op_setcond_i64:
2249 case INDEX_op_movcond_i64:
2250 case INDEX_op_cmp_vec:
2251 case INDEX_op_cmpsel_vec:
2252 if (op->args[k] < ARRAY_SIZE(cond_name)
2253 && cond_name[op->args[k]]) {
2254 col += qemu_log(",%s", cond_name[op->args[k++]]);
2255 } else {
2256 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2258 i = 1;
2259 break;
2260 case INDEX_op_qemu_ld_i32:
2261 case INDEX_op_qemu_st_i32:
2262 case INDEX_op_qemu_st8_i32:
2263 case INDEX_op_qemu_ld_i64:
2264 case INDEX_op_qemu_st_i64:
2266 TCGMemOpIdx oi = op->args[k++];
2267 MemOp op = get_memop(oi);
2268 unsigned ix = get_mmuidx(oi);
2270 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2271 col += qemu_log(",$0x%x,%u", op, ix);
2272 } else {
2273 const char *s_al, *s_op;
2274 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2275 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2276 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2278 i = 1;
2280 break;
2281 default:
2282 i = 0;
2283 break;
2285 switch (c) {
2286 case INDEX_op_set_label:
2287 case INDEX_op_br:
2288 case INDEX_op_brcond_i32:
2289 case INDEX_op_brcond_i64:
2290 case INDEX_op_brcond2_i32:
2291 col += qemu_log("%s$L%d", k ? "," : "",
2292 arg_label(op->args[k])->id);
2293 i++, k++;
2294 break;
2295 default:
2296 break;
2298 for (; i < nb_cargs; i++, k++) {
2299 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2303 if (have_prefs || op->life) {
2305 QemuLogFile *logfile;
2307 rcu_read_lock();
2308 logfile = qatomic_rcu_read(&qemu_logfile);
2309 if (logfile) {
2310 for (; col < 40; ++col) {
2311 putc(' ', logfile->fd);
2314 rcu_read_unlock();
2317 if (op->life) {
2318 unsigned life = op->life;
2320 if (life & (SYNC_ARG * 3)) {
2321 qemu_log(" sync:");
2322 for (i = 0; i < 2; ++i) {
2323 if (life & (SYNC_ARG << i)) {
2324 qemu_log(" %d", i);
2328 life /= DEAD_ARG;
2329 if (life) {
2330 qemu_log(" dead:");
2331 for (i = 0; life; ++i, life >>= 1) {
2332 if (life & 1) {
2333 qemu_log(" %d", i);
2339 if (have_prefs) {
2340 for (i = 0; i < nb_oargs; ++i) {
2341 TCGRegSet set = op->output_pref[i];
2343 if (i == 0) {
2344 qemu_log(" pref=");
2345 } else {
2346 qemu_log(",");
2348 if (set == 0) {
2349 qemu_log("none");
2350 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2351 qemu_log("all");
2352 #ifdef CONFIG_DEBUG_TCG
2353 } else if (tcg_regset_single(set)) {
2354 TCGReg reg = tcg_regset_first(set);
2355 qemu_log("%s", tcg_target_reg_names[reg]);
2356 #endif
2357 } else if (TCG_TARGET_NB_REGS <= 32) {
2358 qemu_log("%#x", (uint32_t)set);
2359 } else {
2360 qemu_log("%#" PRIx64, (uint64_t)set);
2365 qemu_log("\n");
2369 /* we give more priority to constraints with less registers */
2370 static int get_constraint_priority(const TCGOpDef *def, int k)
2372 const TCGArgConstraint *arg_ct = &def->args_ct[k];
2373 int n;
2375 if (arg_ct->oalias) {
2376 /* an alias is equivalent to a single register */
2377 n = 1;
2378 } else {
2379 n = ctpop64(arg_ct->regs);
2381 return TCG_TARGET_NB_REGS - n + 1;
2384 /* sort from highest priority to lowest */
2385 static void sort_constraints(TCGOpDef *def, int start, int n)
2387 int i, j;
2388 TCGArgConstraint *a = def->args_ct;
2390 for (i = 0; i < n; i++) {
2391 a[start + i].sort_index = start + i;
2393 if (n <= 1) {
2394 return;
2396 for (i = 0; i < n - 1; i++) {
2397 for (j = i + 1; j < n; j++) {
2398 int p1 = get_constraint_priority(def, a[start + i].sort_index);
2399 int p2 = get_constraint_priority(def, a[start + j].sort_index);
2400 if (p1 < p2) {
2401 int tmp = a[start + i].sort_index;
2402 a[start + i].sort_index = a[start + j].sort_index;
2403 a[start + j].sort_index = tmp;
2409 static void process_op_defs(TCGContext *s)
2411 TCGOpcode op;
2413 for (op = 0; op < NB_OPS; op++) {
2414 TCGOpDef *def = &tcg_op_defs[op];
2415 const TCGTargetOpDef *tdefs;
2416 int i, nb_args;
2418 if (def->flags & TCG_OPF_NOT_PRESENT) {
2419 continue;
2422 nb_args = def->nb_iargs + def->nb_oargs;
2423 if (nb_args == 0) {
2424 continue;
2427 tdefs = tcg_target_op_def(op);
2428 /* Missing TCGTargetOpDef entry. */
2429 tcg_debug_assert(tdefs != NULL);
2431 for (i = 0; i < nb_args; i++) {
2432 const char *ct_str = tdefs->args_ct_str[i];
2433 /* Incomplete TCGTargetOpDef entry. */
2434 tcg_debug_assert(ct_str != NULL);
2436 while (*ct_str != '\0') {
2437 switch(*ct_str) {
2438 case '0' ... '9':
2440 int oarg = *ct_str - '0';
2441 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2442 tcg_debug_assert(oarg < def->nb_oargs);
2443 tcg_debug_assert(def->args_ct[oarg].regs != 0);
2444 def->args_ct[i] = def->args_ct[oarg];
2445 /* The output sets oalias. */
2446 def->args_ct[oarg].oalias = true;
2447 def->args_ct[oarg].alias_index = i;
2448 /* The input sets ialias. */
2449 def->args_ct[i].ialias = true;
2450 def->args_ct[i].alias_index = oarg;
2452 ct_str++;
2453 break;
2454 case '&':
2455 def->args_ct[i].newreg = true;
2456 ct_str++;
2457 break;
2458 case 'i':
2459 def->args_ct[i].ct |= TCG_CT_CONST;
2460 ct_str++;
2461 break;
2463 /* Include all of the target-specific constraints. */
2465 #undef CONST
2466 #define CONST(CASE, MASK) \
2467 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2468 #define REGS(CASE, MASK) \
2469 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2471 #include "tcg-target-con-str.h"
2473 #undef REGS
2474 #undef CONST
2475 default:
2476 /* Typo in TCGTargetOpDef constraint. */
2477 g_assert_not_reached();
2482 /* TCGTargetOpDef entry with too much information? */
2483 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2485 /* sort the constraints (XXX: this is just an heuristic) */
2486 sort_constraints(def, 0, def->nb_oargs);
2487 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2491 void tcg_op_remove(TCGContext *s, TCGOp *op)
2493 TCGLabel *label;
2495 switch (op->opc) {
2496 case INDEX_op_br:
2497 label = arg_label(op->args[0]);
2498 label->refs--;
2499 break;
2500 case INDEX_op_brcond_i32:
2501 case INDEX_op_brcond_i64:
2502 label = arg_label(op->args[3]);
2503 label->refs--;
2504 break;
2505 case INDEX_op_brcond2_i32:
2506 label = arg_label(op->args[5]);
2507 label->refs--;
2508 break;
2509 default:
2510 break;
2513 QTAILQ_REMOVE(&s->ops, op, link);
2514 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2515 s->nb_ops--;
2517 #ifdef CONFIG_PROFILER
2518 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2519 #endif
2522 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2524 TCGContext *s = tcg_ctx;
2525 TCGOp *op;
2527 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2528 op = tcg_malloc(sizeof(TCGOp));
2529 } else {
2530 op = QTAILQ_FIRST(&s->free_ops);
2531 QTAILQ_REMOVE(&s->free_ops, op, link);
2533 memset(op, 0, offsetof(TCGOp, link));
2534 op->opc = opc;
2535 s->nb_ops++;
2537 return op;
2540 TCGOp *tcg_emit_op(TCGOpcode opc)
2542 TCGOp *op = tcg_op_alloc(opc);
2543 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2544 return op;
2547 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2549 TCGOp *new_op = tcg_op_alloc(opc);
2550 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2551 return new_op;
2554 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2556 TCGOp *new_op = tcg_op_alloc(opc);
2557 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2558 return new_op;
2561 /* Reachable analysis : remove unreachable code. */
2562 static void reachable_code_pass(TCGContext *s)
2564 TCGOp *op, *op_next;
2565 bool dead = false;
2567 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2568 bool remove = dead;
2569 TCGLabel *label;
2570 int call_flags;
2572 switch (op->opc) {
2573 case INDEX_op_set_label:
2574 label = arg_label(op->args[0]);
2575 if (label->refs == 0) {
2577 * While there is an occasional backward branch, virtually
2578 * all branches generated by the translators are forward.
2579 * Which means that generally we will have already removed
2580 * all references to the label that will be, and there is
2581 * little to be gained by iterating.
2583 remove = true;
2584 } else {
2585 /* Once we see a label, insns become live again. */
2586 dead = false;
2587 remove = false;
2590 * Optimization can fold conditional branches to unconditional.
2591 * If we find a label with one reference which is preceded by
2592 * an unconditional branch to it, remove both. This needed to
2593 * wait until the dead code in between them was removed.
2595 if (label->refs == 1) {
2596 TCGOp *op_prev = QTAILQ_PREV(op, link);
2597 if (op_prev->opc == INDEX_op_br &&
2598 label == arg_label(op_prev->args[0])) {
2599 tcg_op_remove(s, op_prev);
2600 remove = true;
2604 break;
2606 case INDEX_op_br:
2607 case INDEX_op_exit_tb:
2608 case INDEX_op_goto_ptr:
2609 /* Unconditional branches; everything following is dead. */
2610 dead = true;
2611 break;
2613 case INDEX_op_call:
2614 /* Notice noreturn helper calls, raising exceptions. */
2615 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2616 if (call_flags & TCG_CALL_NO_RETURN) {
2617 dead = true;
2619 break;
2621 case INDEX_op_insn_start:
2622 /* Never remove -- we need to keep these for unwind. */
2623 remove = false;
2624 break;
2626 default:
2627 break;
2630 if (remove) {
2631 tcg_op_remove(s, op);
2636 #define TS_DEAD 1
2637 #define TS_MEM 2
2639 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2640 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2642 /* For liveness_pass_1, the register preferences for a given temp. */
2643 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2645 return ts->state_ptr;
2648 /* For liveness_pass_1, reset the preferences for a given temp to the
2649 * maximal regset for its type.
2651 static inline void la_reset_pref(TCGTemp *ts)
2653 *la_temp_pref(ts)
2654 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2657 /* liveness analysis: end of function: all temps are dead, and globals
2658 should be in memory. */
2659 static void la_func_end(TCGContext *s, int ng, int nt)
2661 int i;
2663 for (i = 0; i < ng; ++i) {
2664 s->temps[i].state = TS_DEAD | TS_MEM;
2665 la_reset_pref(&s->temps[i]);
2667 for (i = ng; i < nt; ++i) {
2668 s->temps[i].state = TS_DEAD;
2669 la_reset_pref(&s->temps[i]);
2673 /* liveness analysis: end of basic block: all temps are dead, globals
2674 and local temps should be in memory. */
2675 static void la_bb_end(TCGContext *s, int ng, int nt)
2677 int i;
2679 for (i = 0; i < nt; ++i) {
2680 TCGTemp *ts = &s->temps[i];
2681 int state;
2683 switch (ts->kind) {
2684 case TEMP_FIXED:
2685 case TEMP_GLOBAL:
2686 case TEMP_LOCAL:
2687 state = TS_DEAD | TS_MEM;
2688 break;
2689 case TEMP_NORMAL:
2690 case TEMP_CONST:
2691 state = TS_DEAD;
2692 break;
2693 default:
2694 g_assert_not_reached();
2696 ts->state = state;
2697 la_reset_pref(ts);
2701 /* liveness analysis: sync globals back to memory. */
2702 static void la_global_sync(TCGContext *s, int ng)
2704 int i;
2706 for (i = 0; i < ng; ++i) {
2707 int state = s->temps[i].state;
2708 s->temps[i].state = state | TS_MEM;
2709 if (state == TS_DEAD) {
2710 /* If the global was previously dead, reset prefs. */
2711 la_reset_pref(&s->temps[i]);
2717 * liveness analysis: conditional branch: all temps are dead,
2718 * globals and local temps should be synced.
2720 static void la_bb_sync(TCGContext *s, int ng, int nt)
2722 la_global_sync(s, ng);
2724 for (int i = ng; i < nt; ++i) {
2725 TCGTemp *ts = &s->temps[i];
2726 int state;
2728 switch (ts->kind) {
2729 case TEMP_LOCAL:
2730 state = ts->state;
2731 ts->state = state | TS_MEM;
2732 if (state != TS_DEAD) {
2733 continue;
2735 break;
2736 case TEMP_NORMAL:
2737 s->temps[i].state = TS_DEAD;
2738 break;
2739 case TEMP_CONST:
2740 continue;
2741 default:
2742 g_assert_not_reached();
2744 la_reset_pref(&s->temps[i]);
2748 /* liveness analysis: sync globals back to memory and kill. */
2749 static void la_global_kill(TCGContext *s, int ng)
2751 int i;
2753 for (i = 0; i < ng; i++) {
2754 s->temps[i].state = TS_DEAD | TS_MEM;
2755 la_reset_pref(&s->temps[i]);
2759 /* liveness analysis: note live globals crossing calls. */
2760 static void la_cross_call(TCGContext *s, int nt)
2762 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2763 int i;
2765 for (i = 0; i < nt; i++) {
2766 TCGTemp *ts = &s->temps[i];
2767 if (!(ts->state & TS_DEAD)) {
2768 TCGRegSet *pset = la_temp_pref(ts);
2769 TCGRegSet set = *pset;
2771 set &= mask;
2772 /* If the combination is not possible, restart. */
2773 if (set == 0) {
2774 set = tcg_target_available_regs[ts->type] & mask;
2776 *pset = set;
2781 /* Liveness analysis : update the opc_arg_life array to tell if a
2782 given input arguments is dead. Instructions updating dead
2783 temporaries are removed. */
2784 static void liveness_pass_1(TCGContext *s)
2786 int nb_globals = s->nb_globals;
2787 int nb_temps = s->nb_temps;
2788 TCGOp *op, *op_prev;
2789 TCGRegSet *prefs;
2790 int i;
2792 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2793 for (i = 0; i < nb_temps; ++i) {
2794 s->temps[i].state_ptr = prefs + i;
2797 /* ??? Should be redundant with the exit_tb that ends the TB. */
2798 la_func_end(s, nb_globals, nb_temps);
2800 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2801 int nb_iargs, nb_oargs;
2802 TCGOpcode opc_new, opc_new2;
2803 bool have_opc_new2;
2804 TCGLifeData arg_life = 0;
2805 TCGTemp *ts;
2806 TCGOpcode opc = op->opc;
2807 const TCGOpDef *def = &tcg_op_defs[opc];
2809 switch (opc) {
2810 case INDEX_op_call:
2812 int call_flags;
2813 int nb_call_regs;
2815 nb_oargs = TCGOP_CALLO(op);
2816 nb_iargs = TCGOP_CALLI(op);
2817 call_flags = op->args[nb_oargs + nb_iargs + 1];
2819 /* pure functions can be removed if their result is unused */
2820 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2821 for (i = 0; i < nb_oargs; i++) {
2822 ts = arg_temp(op->args[i]);
2823 if (ts->state != TS_DEAD) {
2824 goto do_not_remove_call;
2827 goto do_remove;
2829 do_not_remove_call:
2831 /* Output args are dead. */
2832 for (i = 0; i < nb_oargs; i++) {
2833 ts = arg_temp(op->args[i]);
2834 if (ts->state & TS_DEAD) {
2835 arg_life |= DEAD_ARG << i;
2837 if (ts->state & TS_MEM) {
2838 arg_life |= SYNC_ARG << i;
2840 ts->state = TS_DEAD;
2841 la_reset_pref(ts);
2843 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2844 op->output_pref[i] = 0;
2847 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2848 TCG_CALL_NO_READ_GLOBALS))) {
2849 la_global_kill(s, nb_globals);
2850 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2851 la_global_sync(s, nb_globals);
2854 /* Record arguments that die in this helper. */
2855 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2856 ts = arg_temp(op->args[i]);
2857 if (ts && ts->state & TS_DEAD) {
2858 arg_life |= DEAD_ARG << i;
2862 /* For all live registers, remove call-clobbered prefs. */
2863 la_cross_call(s, nb_temps);
2865 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2867 /* Input arguments are live for preceding opcodes. */
2868 for (i = 0; i < nb_iargs; i++) {
2869 ts = arg_temp(op->args[i + nb_oargs]);
2870 if (ts && ts->state & TS_DEAD) {
2871 /* For those arguments that die, and will be allocated
2872 * in registers, clear the register set for that arg,
2873 * to be filled in below. For args that will be on
2874 * the stack, reset to any available reg.
2876 *la_temp_pref(ts)
2877 = (i < nb_call_regs ? 0 :
2878 tcg_target_available_regs[ts->type]);
2879 ts->state &= ~TS_DEAD;
2883 /* For each input argument, add its input register to prefs.
2884 If a temp is used once, this produces a single set bit. */
2885 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2886 ts = arg_temp(op->args[i + nb_oargs]);
2887 if (ts) {
2888 tcg_regset_set_reg(*la_temp_pref(ts),
2889 tcg_target_call_iarg_regs[i]);
2893 break;
2894 case INDEX_op_insn_start:
2895 break;
2896 case INDEX_op_discard:
2897 /* mark the temporary as dead */
2898 ts = arg_temp(op->args[0]);
2899 ts->state = TS_DEAD;
2900 la_reset_pref(ts);
2901 break;
2903 case INDEX_op_add2_i32:
2904 opc_new = INDEX_op_add_i32;
2905 goto do_addsub2;
2906 case INDEX_op_sub2_i32:
2907 opc_new = INDEX_op_sub_i32;
2908 goto do_addsub2;
2909 case INDEX_op_add2_i64:
2910 opc_new = INDEX_op_add_i64;
2911 goto do_addsub2;
2912 case INDEX_op_sub2_i64:
2913 opc_new = INDEX_op_sub_i64;
2914 do_addsub2:
2915 nb_iargs = 4;
2916 nb_oargs = 2;
2917 /* Test if the high part of the operation is dead, but not
2918 the low part. The result can be optimized to a simple
2919 add or sub. This happens often for x86_64 guest when the
2920 cpu mode is set to 32 bit. */
2921 if (arg_temp(op->args[1])->state == TS_DEAD) {
2922 if (arg_temp(op->args[0])->state == TS_DEAD) {
2923 goto do_remove;
2925 /* Replace the opcode and adjust the args in place,
2926 leaving 3 unused args at the end. */
2927 op->opc = opc = opc_new;
2928 op->args[1] = op->args[2];
2929 op->args[2] = op->args[4];
2930 /* Fall through and mark the single-word operation live. */
2931 nb_iargs = 2;
2932 nb_oargs = 1;
2934 goto do_not_remove;
2936 case INDEX_op_mulu2_i32:
2937 opc_new = INDEX_op_mul_i32;
2938 opc_new2 = INDEX_op_muluh_i32;
2939 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2940 goto do_mul2;
2941 case INDEX_op_muls2_i32:
2942 opc_new = INDEX_op_mul_i32;
2943 opc_new2 = INDEX_op_mulsh_i32;
2944 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2945 goto do_mul2;
2946 case INDEX_op_mulu2_i64:
2947 opc_new = INDEX_op_mul_i64;
2948 opc_new2 = INDEX_op_muluh_i64;
2949 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2950 goto do_mul2;
2951 case INDEX_op_muls2_i64:
2952 opc_new = INDEX_op_mul_i64;
2953 opc_new2 = INDEX_op_mulsh_i64;
2954 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2955 goto do_mul2;
2956 do_mul2:
2957 nb_iargs = 2;
2958 nb_oargs = 2;
2959 if (arg_temp(op->args[1])->state == TS_DEAD) {
2960 if (arg_temp(op->args[0])->state == TS_DEAD) {
2961 /* Both parts of the operation are dead. */
2962 goto do_remove;
2964 /* The high part of the operation is dead; generate the low. */
2965 op->opc = opc = opc_new;
2966 op->args[1] = op->args[2];
2967 op->args[2] = op->args[3];
2968 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2969 /* The low part of the operation is dead; generate the high. */
2970 op->opc = opc = opc_new2;
2971 op->args[0] = op->args[1];
2972 op->args[1] = op->args[2];
2973 op->args[2] = op->args[3];
2974 } else {
2975 goto do_not_remove;
2977 /* Mark the single-word operation live. */
2978 nb_oargs = 1;
2979 goto do_not_remove;
2981 default:
2982 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2983 nb_iargs = def->nb_iargs;
2984 nb_oargs = def->nb_oargs;
2986 /* Test if the operation can be removed because all
2987 its outputs are dead. We assume that nb_oargs == 0
2988 implies side effects */
2989 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2990 for (i = 0; i < nb_oargs; i++) {
2991 if (arg_temp(op->args[i])->state != TS_DEAD) {
2992 goto do_not_remove;
2995 goto do_remove;
2997 goto do_not_remove;
2999 do_remove:
3000 tcg_op_remove(s, op);
3001 break;
3003 do_not_remove:
3004 for (i = 0; i < nb_oargs; i++) {
3005 ts = arg_temp(op->args[i]);
3007 /* Remember the preference of the uses that followed. */
3008 op->output_pref[i] = *la_temp_pref(ts);
3010 /* Output args are dead. */
3011 if (ts->state & TS_DEAD) {
3012 arg_life |= DEAD_ARG << i;
3014 if (ts->state & TS_MEM) {
3015 arg_life |= SYNC_ARG << i;
3017 ts->state = TS_DEAD;
3018 la_reset_pref(ts);
3021 /* If end of basic block, update. */
3022 if (def->flags & TCG_OPF_BB_EXIT) {
3023 la_func_end(s, nb_globals, nb_temps);
3024 } else if (def->flags & TCG_OPF_COND_BRANCH) {
3025 la_bb_sync(s, nb_globals, nb_temps);
3026 } else if (def->flags & TCG_OPF_BB_END) {
3027 la_bb_end(s, nb_globals, nb_temps);
3028 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3029 la_global_sync(s, nb_globals);
3030 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3031 la_cross_call(s, nb_temps);
3035 /* Record arguments that die in this opcode. */
3036 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3037 ts = arg_temp(op->args[i]);
3038 if (ts->state & TS_DEAD) {
3039 arg_life |= DEAD_ARG << i;
3043 /* Input arguments are live for preceding opcodes. */
3044 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3045 ts = arg_temp(op->args[i]);
3046 if (ts->state & TS_DEAD) {
3047 /* For operands that were dead, initially allow
3048 all regs for the type. */
3049 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3050 ts->state &= ~TS_DEAD;
3054 /* Incorporate constraints for this operand. */
3055 switch (opc) {
3056 case INDEX_op_mov_i32:
3057 case INDEX_op_mov_i64:
3058 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3059 have proper constraints. That said, special case
3060 moves to propagate preferences backward. */
3061 if (IS_DEAD_ARG(1)) {
3062 *la_temp_pref(arg_temp(op->args[0]))
3063 = *la_temp_pref(arg_temp(op->args[1]));
3065 break;
3067 default:
3068 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3069 const TCGArgConstraint *ct = &def->args_ct[i];
3070 TCGRegSet set, *pset;
3072 ts = arg_temp(op->args[i]);
3073 pset = la_temp_pref(ts);
3074 set = *pset;
3076 set &= ct->regs;
3077 if (ct->ialias) {
3078 set &= op->output_pref[ct->alias_index];
3080 /* If the combination is not possible, restart. */
3081 if (set == 0) {
3082 set = ct->regs;
3084 *pset = set;
3086 break;
3088 break;
3090 op->life = arg_life;
3094 /* Liveness analysis: Convert indirect regs to direct temporaries. */
3095 static bool liveness_pass_2(TCGContext *s)
3097 int nb_globals = s->nb_globals;
3098 int nb_temps, i;
3099 bool changes = false;
3100 TCGOp *op, *op_next;
3102 /* Create a temporary for each indirect global. */
3103 for (i = 0; i < nb_globals; ++i) {
3104 TCGTemp *its = &s->temps[i];
3105 if (its->indirect_reg) {
3106 TCGTemp *dts = tcg_temp_alloc(s);
3107 dts->type = its->type;
3108 dts->base_type = its->base_type;
3109 its->state_ptr = dts;
3110 } else {
3111 its->state_ptr = NULL;
3113 /* All globals begin dead. */
3114 its->state = TS_DEAD;
3116 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3117 TCGTemp *its = &s->temps[i];
3118 its->state_ptr = NULL;
3119 its->state = TS_DEAD;
3122 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3123 TCGOpcode opc = op->opc;
3124 const TCGOpDef *def = &tcg_op_defs[opc];
3125 TCGLifeData arg_life = op->life;
3126 int nb_iargs, nb_oargs, call_flags;
3127 TCGTemp *arg_ts, *dir_ts;
3129 if (opc == INDEX_op_call) {
3130 nb_oargs = TCGOP_CALLO(op);
3131 nb_iargs = TCGOP_CALLI(op);
3132 call_flags = op->args[nb_oargs + nb_iargs + 1];
3133 } else {
3134 nb_iargs = def->nb_iargs;
3135 nb_oargs = def->nb_oargs;
3137 /* Set flags similar to how calls require. */
3138 if (def->flags & TCG_OPF_COND_BRANCH) {
3139 /* Like reading globals: sync_globals */
3140 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3141 } else if (def->flags & TCG_OPF_BB_END) {
3142 /* Like writing globals: save_globals */
3143 call_flags = 0;
3144 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3145 /* Like reading globals: sync_globals */
3146 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3147 } else {
3148 /* No effect on globals. */
3149 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3150 TCG_CALL_NO_WRITE_GLOBALS);
3154 /* Make sure that input arguments are available. */
3155 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3156 arg_ts = arg_temp(op->args[i]);
3157 if (arg_ts) {
3158 dir_ts = arg_ts->state_ptr;
3159 if (dir_ts && arg_ts->state == TS_DEAD) {
3160 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3161 ? INDEX_op_ld_i32
3162 : INDEX_op_ld_i64);
3163 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3165 lop->args[0] = temp_arg(dir_ts);
3166 lop->args[1] = temp_arg(arg_ts->mem_base);
3167 lop->args[2] = arg_ts->mem_offset;
3169 /* Loaded, but synced with memory. */
3170 arg_ts->state = TS_MEM;
3175 /* Perform input replacement, and mark inputs that became dead.
3176 No action is required except keeping temp_state up to date
3177 so that we reload when needed. */
3178 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3179 arg_ts = arg_temp(op->args[i]);
3180 if (arg_ts) {
3181 dir_ts = arg_ts->state_ptr;
3182 if (dir_ts) {
3183 op->args[i] = temp_arg(dir_ts);
3184 changes = true;
3185 if (IS_DEAD_ARG(i)) {
3186 arg_ts->state = TS_DEAD;
3192 /* Liveness analysis should ensure that the following are
3193 all correct, for call sites and basic block end points. */
3194 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3195 /* Nothing to do */
3196 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3197 for (i = 0; i < nb_globals; ++i) {
3198 /* Liveness should see that globals are synced back,
3199 that is, either TS_DEAD or TS_MEM. */
3200 arg_ts = &s->temps[i];
3201 tcg_debug_assert(arg_ts->state_ptr == 0
3202 || arg_ts->state != 0);
3204 } else {
3205 for (i = 0; i < nb_globals; ++i) {
3206 /* Liveness should see that globals are saved back,
3207 that is, TS_DEAD, waiting to be reloaded. */
3208 arg_ts = &s->temps[i];
3209 tcg_debug_assert(arg_ts->state_ptr == 0
3210 || arg_ts->state == TS_DEAD);
3214 /* Outputs become available. */
3215 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3216 arg_ts = arg_temp(op->args[0]);
3217 dir_ts = arg_ts->state_ptr;
3218 if (dir_ts) {
3219 op->args[0] = temp_arg(dir_ts);
3220 changes = true;
3222 /* The output is now live and modified. */
3223 arg_ts->state = 0;
3225 if (NEED_SYNC_ARG(0)) {
3226 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3227 ? INDEX_op_st_i32
3228 : INDEX_op_st_i64);
3229 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3230 TCGTemp *out_ts = dir_ts;
3232 if (IS_DEAD_ARG(0)) {
3233 out_ts = arg_temp(op->args[1]);
3234 arg_ts->state = TS_DEAD;
3235 tcg_op_remove(s, op);
3236 } else {
3237 arg_ts->state = TS_MEM;
3240 sop->args[0] = temp_arg(out_ts);
3241 sop->args[1] = temp_arg(arg_ts->mem_base);
3242 sop->args[2] = arg_ts->mem_offset;
3243 } else {
3244 tcg_debug_assert(!IS_DEAD_ARG(0));
3247 } else {
3248 for (i = 0; i < nb_oargs; i++) {
3249 arg_ts = arg_temp(op->args[i]);
3250 dir_ts = arg_ts->state_ptr;
3251 if (!dir_ts) {
3252 continue;
3254 op->args[i] = temp_arg(dir_ts);
3255 changes = true;
3257 /* The output is now live and modified. */
3258 arg_ts->state = 0;
3260 /* Sync outputs upon their last write. */
3261 if (NEED_SYNC_ARG(i)) {
3262 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3263 ? INDEX_op_st_i32
3264 : INDEX_op_st_i64);
3265 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3267 sop->args[0] = temp_arg(dir_ts);
3268 sop->args[1] = temp_arg(arg_ts->mem_base);
3269 sop->args[2] = arg_ts->mem_offset;
3271 arg_ts->state = TS_MEM;
3273 /* Drop outputs that are dead. */
3274 if (IS_DEAD_ARG(i)) {
3275 arg_ts->state = TS_DEAD;
3281 return changes;
3284 #ifdef CONFIG_DEBUG_TCG
3285 static void dump_regs(TCGContext *s)
3287 TCGTemp *ts;
3288 int i;
3289 char buf[64];
3291 for(i = 0; i < s->nb_temps; i++) {
3292 ts = &s->temps[i];
3293 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3294 switch(ts->val_type) {
3295 case TEMP_VAL_REG:
3296 printf("%s", tcg_target_reg_names[ts->reg]);
3297 break;
3298 case TEMP_VAL_MEM:
3299 printf("%d(%s)", (int)ts->mem_offset,
3300 tcg_target_reg_names[ts->mem_base->reg]);
3301 break;
3302 case TEMP_VAL_CONST:
3303 printf("$0x%" PRIx64, ts->val);
3304 break;
3305 case TEMP_VAL_DEAD:
3306 printf("D");
3307 break;
3308 default:
3309 printf("???");
3310 break;
3312 printf("\n");
3315 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3316 if (s->reg_to_temp[i] != NULL) {
3317 printf("%s: %s\n",
3318 tcg_target_reg_names[i],
3319 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3324 static void check_regs(TCGContext *s)
3326 int reg;
3327 int k;
3328 TCGTemp *ts;
3329 char buf[64];
3331 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3332 ts = s->reg_to_temp[reg];
3333 if (ts != NULL) {
3334 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3335 printf("Inconsistency for register %s:\n",
3336 tcg_target_reg_names[reg]);
3337 goto fail;
3341 for (k = 0; k < s->nb_temps; k++) {
3342 ts = &s->temps[k];
3343 if (ts->val_type == TEMP_VAL_REG
3344 && ts->kind != TEMP_FIXED
3345 && s->reg_to_temp[ts->reg] != ts) {
3346 printf("Inconsistency for temp %s:\n",
3347 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3348 fail:
3349 printf("reg state:\n");
3350 dump_regs(s);
3351 tcg_abort();
3355 #endif
3357 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3359 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3360 /* Sparc64 stack is accessed with offset of 2047 */
3361 s->current_frame_offset = (s->current_frame_offset +
3362 (tcg_target_long)sizeof(tcg_target_long) - 1) &
3363 ~(sizeof(tcg_target_long) - 1);
3364 #endif
3365 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3366 s->frame_end) {
3367 tcg_abort();
3369 ts->mem_offset = s->current_frame_offset;
3370 ts->mem_base = s->frame_temp;
3371 ts->mem_allocated = 1;
3372 s->current_frame_offset += sizeof(tcg_target_long);
3375 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3377 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3378 mark it free; otherwise mark it dead. */
3379 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3381 TCGTempVal new_type;
3383 switch (ts->kind) {
3384 case TEMP_FIXED:
3385 return;
3386 case TEMP_GLOBAL:
3387 case TEMP_LOCAL:
3388 new_type = TEMP_VAL_MEM;
3389 break;
3390 case TEMP_NORMAL:
3391 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3392 break;
3393 case TEMP_CONST:
3394 new_type = TEMP_VAL_CONST;
3395 break;
3396 default:
3397 g_assert_not_reached();
3399 if (ts->val_type == TEMP_VAL_REG) {
3400 s->reg_to_temp[ts->reg] = NULL;
3402 ts->val_type = new_type;
3405 /* Mark a temporary as dead. */
3406 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3408 temp_free_or_dead(s, ts, 1);
3411 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3412 registers needs to be allocated to store a constant. If 'free_or_dead'
3413 is non-zero, subsequently release the temporary; if it is positive, the
3414 temp is dead; if it is negative, the temp is free. */
3415 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3416 TCGRegSet preferred_regs, int free_or_dead)
3418 if (!temp_readonly(ts) && !ts->mem_coherent) {
3419 if (!ts->mem_allocated) {
3420 temp_allocate_frame(s, ts);
3422 switch (ts->val_type) {
3423 case TEMP_VAL_CONST:
3424 /* If we're going to free the temp immediately, then we won't
3425 require it later in a register, so attempt to store the
3426 constant to memory directly. */
3427 if (free_or_dead
3428 && tcg_out_sti(s, ts->type, ts->val,
3429 ts->mem_base->reg, ts->mem_offset)) {
3430 break;
3432 temp_load(s, ts, tcg_target_available_regs[ts->type],
3433 allocated_regs, preferred_regs);
3434 /* fallthrough */
3436 case TEMP_VAL_REG:
3437 tcg_out_st(s, ts->type, ts->reg,
3438 ts->mem_base->reg, ts->mem_offset);
3439 break;
3441 case TEMP_VAL_MEM:
3442 break;
3444 case TEMP_VAL_DEAD:
3445 default:
3446 tcg_abort();
3448 ts->mem_coherent = 1;
3450 if (free_or_dead) {
3451 temp_free_or_dead(s, ts, free_or_dead);
3455 /* free register 'reg' by spilling the corresponding temporary if necessary */
3456 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3458 TCGTemp *ts = s->reg_to_temp[reg];
3459 if (ts != NULL) {
3460 temp_sync(s, ts, allocated_regs, 0, -1);
3465 * tcg_reg_alloc:
3466 * @required_regs: Set of registers in which we must allocate.
3467 * @allocated_regs: Set of registers which must be avoided.
3468 * @preferred_regs: Set of registers we should prefer.
3469 * @rev: True if we search the registers in "indirect" order.
3471 * The allocated register must be in @required_regs & ~@allocated_regs,
3472 * but if we can put it in @preferred_regs we may save a move later.
3474 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3475 TCGRegSet allocated_regs,
3476 TCGRegSet preferred_regs, bool rev)
3478 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3479 TCGRegSet reg_ct[2];
3480 const int *order;
3482 reg_ct[1] = required_regs & ~allocated_regs;
3483 tcg_debug_assert(reg_ct[1] != 0);
3484 reg_ct[0] = reg_ct[1] & preferred_regs;
3486 /* Skip the preferred_regs option if it cannot be satisfied,
3487 or if the preference made no difference. */
3488 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3490 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3492 /* Try free registers, preferences first. */
3493 for (j = f; j < 2; j++) {
3494 TCGRegSet set = reg_ct[j];
3496 if (tcg_regset_single(set)) {
3497 /* One register in the set. */
3498 TCGReg reg = tcg_regset_first(set);
3499 if (s->reg_to_temp[reg] == NULL) {
3500 return reg;
3502 } else {
3503 for (i = 0; i < n; i++) {
3504 TCGReg reg = order[i];
3505 if (s->reg_to_temp[reg] == NULL &&
3506 tcg_regset_test_reg(set, reg)) {
3507 return reg;
3513 /* We must spill something. */
3514 for (j = f; j < 2; j++) {
3515 TCGRegSet set = reg_ct[j];
3517 if (tcg_regset_single(set)) {
3518 /* One register in the set. */
3519 TCGReg reg = tcg_regset_first(set);
3520 tcg_reg_free(s, reg, allocated_regs);
3521 return reg;
3522 } else {
3523 for (i = 0; i < n; i++) {
3524 TCGReg reg = order[i];
3525 if (tcg_regset_test_reg(set, reg)) {
3526 tcg_reg_free(s, reg, allocated_regs);
3527 return reg;
3533 tcg_abort();
3536 /* Make sure the temporary is in a register. If needed, allocate the register
3537 from DESIRED while avoiding ALLOCATED. */
3538 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3539 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3541 TCGReg reg;
3543 switch (ts->val_type) {
3544 case TEMP_VAL_REG:
3545 return;
3546 case TEMP_VAL_CONST:
3547 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3548 preferred_regs, ts->indirect_base);
3549 if (ts->type <= TCG_TYPE_I64) {
3550 tcg_out_movi(s, ts->type, reg, ts->val);
3551 } else {
3552 uint64_t val = ts->val;
3553 MemOp vece = MO_64;
3556 * Find the minimal vector element that matches the constant.
3557 * The targets will, in general, have to do this search anyway,
3558 * do this generically.
3560 if (val == dup_const(MO_8, val)) {
3561 vece = MO_8;
3562 } else if (val == dup_const(MO_16, val)) {
3563 vece = MO_16;
3564 } else if (val == dup_const(MO_32, val)) {
3565 vece = MO_32;
3568 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3570 ts->mem_coherent = 0;
3571 break;
3572 case TEMP_VAL_MEM:
3573 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3574 preferred_regs, ts->indirect_base);
3575 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3576 ts->mem_coherent = 1;
3577 break;
3578 case TEMP_VAL_DEAD:
3579 default:
3580 tcg_abort();
3582 ts->reg = reg;
3583 ts->val_type = TEMP_VAL_REG;
3584 s->reg_to_temp[reg] = ts;
3587 /* Save a temporary to memory. 'allocated_regs' is used in case a
3588 temporary registers needs to be allocated to store a constant. */
3589 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3591 /* The liveness analysis already ensures that globals are back
3592 in memory. Keep an tcg_debug_assert for safety. */
3593 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3596 /* save globals to their canonical location and assume they can be
3597 modified be the following code. 'allocated_regs' is used in case a
3598 temporary registers needs to be allocated to store a constant. */
3599 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3601 int i, n;
3603 for (i = 0, n = s->nb_globals; i < n; i++) {
3604 temp_save(s, &s->temps[i], allocated_regs);
3608 /* sync globals to their canonical location and assume they can be
3609 read by the following code. 'allocated_regs' is used in case a
3610 temporary registers needs to be allocated to store a constant. */
3611 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3613 int i, n;
3615 for (i = 0, n = s->nb_globals; i < n; i++) {
3616 TCGTemp *ts = &s->temps[i];
3617 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3618 || ts->kind == TEMP_FIXED
3619 || ts->mem_coherent);
3623 /* at the end of a basic block, we assume all temporaries are dead and
3624 all globals are stored at their canonical location. */
3625 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3627 int i;
3629 for (i = s->nb_globals; i < s->nb_temps; i++) {
3630 TCGTemp *ts = &s->temps[i];
3632 switch (ts->kind) {
3633 case TEMP_LOCAL:
3634 temp_save(s, ts, allocated_regs);
3635 break;
3636 case TEMP_NORMAL:
3637 /* The liveness analysis already ensures that temps are dead.
3638 Keep an tcg_debug_assert for safety. */
3639 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3640 break;
3641 case TEMP_CONST:
3642 /* Similarly, we should have freed any allocated register. */
3643 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3644 break;
3645 default:
3646 g_assert_not_reached();
3650 save_globals(s, allocated_regs);
3654 * At a conditional branch, we assume all temporaries are dead and
3655 * all globals and local temps are synced to their location.
3657 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3659 sync_globals(s, allocated_regs);
3661 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3662 TCGTemp *ts = &s->temps[i];
3664 * The liveness analysis already ensures that temps are dead.
3665 * Keep tcg_debug_asserts for safety.
3667 switch (ts->kind) {
3668 case TEMP_LOCAL:
3669 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3670 break;
3671 case TEMP_NORMAL:
3672 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3673 break;
3674 case TEMP_CONST:
3675 break;
3676 default:
3677 g_assert_not_reached();
3683 * Specialized code generation for INDEX_op_mov_* with a constant.
3685 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3686 tcg_target_ulong val, TCGLifeData arg_life,
3687 TCGRegSet preferred_regs)
3689 /* ENV should not be modified. */
3690 tcg_debug_assert(!temp_readonly(ots));
3692 /* The movi is not explicitly generated here. */
3693 if (ots->val_type == TEMP_VAL_REG) {
3694 s->reg_to_temp[ots->reg] = NULL;
3696 ots->val_type = TEMP_VAL_CONST;
3697 ots->val = val;
3698 ots->mem_coherent = 0;
3699 if (NEED_SYNC_ARG(0)) {
3700 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3701 } else if (IS_DEAD_ARG(0)) {
3702 temp_dead(s, ots);
3707 * Specialized code generation for INDEX_op_mov_*.
3709 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3711 const TCGLifeData arg_life = op->life;
3712 TCGRegSet allocated_regs, preferred_regs;
3713 TCGTemp *ts, *ots;
3714 TCGType otype, itype;
3716 allocated_regs = s->reserved_regs;
3717 preferred_regs = op->output_pref[0];
3718 ots = arg_temp(op->args[0]);
3719 ts = arg_temp(op->args[1]);
3721 /* ENV should not be modified. */
3722 tcg_debug_assert(!temp_readonly(ots));
3724 /* Note that otype != itype for no-op truncation. */
3725 otype = ots->type;
3726 itype = ts->type;
3728 if (ts->val_type == TEMP_VAL_CONST) {
3729 /* propagate constant or generate sti */
3730 tcg_target_ulong val = ts->val;
3731 if (IS_DEAD_ARG(1)) {
3732 temp_dead(s, ts);
3734 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3735 return;
3738 /* If the source value is in memory we're going to be forced
3739 to have it in a register in order to perform the copy. Copy
3740 the SOURCE value into its own register first, that way we
3741 don't have to reload SOURCE the next time it is used. */
3742 if (ts->val_type == TEMP_VAL_MEM) {
3743 temp_load(s, ts, tcg_target_available_regs[itype],
3744 allocated_regs, preferred_regs);
3747 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3748 if (IS_DEAD_ARG(0)) {
3749 /* mov to a non-saved dead register makes no sense (even with
3750 liveness analysis disabled). */
3751 tcg_debug_assert(NEED_SYNC_ARG(0));
3752 if (!ots->mem_allocated) {
3753 temp_allocate_frame(s, ots);
3755 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3756 if (IS_DEAD_ARG(1)) {
3757 temp_dead(s, ts);
3759 temp_dead(s, ots);
3760 } else {
3761 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3762 /* the mov can be suppressed */
3763 if (ots->val_type == TEMP_VAL_REG) {
3764 s->reg_to_temp[ots->reg] = NULL;
3766 ots->reg = ts->reg;
3767 temp_dead(s, ts);
3768 } else {
3769 if (ots->val_type != TEMP_VAL_REG) {
3770 /* When allocating a new register, make sure to not spill the
3771 input one. */
3772 tcg_regset_set_reg(allocated_regs, ts->reg);
3773 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3774 allocated_regs, preferred_regs,
3775 ots->indirect_base);
3777 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3779 * Cross register class move not supported.
3780 * Store the source register into the destination slot
3781 * and leave the destination temp as TEMP_VAL_MEM.
3783 assert(!temp_readonly(ots));
3784 if (!ts->mem_allocated) {
3785 temp_allocate_frame(s, ots);
3787 tcg_out_st(s, ts->type, ts->reg,
3788 ots->mem_base->reg, ots->mem_offset);
3789 ots->mem_coherent = 1;
3790 temp_free_or_dead(s, ots, -1);
3791 return;
3794 ots->val_type = TEMP_VAL_REG;
3795 ots->mem_coherent = 0;
3796 s->reg_to_temp[ots->reg] = ots;
3797 if (NEED_SYNC_ARG(0)) {
3798 temp_sync(s, ots, allocated_regs, 0, 0);
3804 * Specialized code generation for INDEX_op_dup_vec.
3806 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3808 const TCGLifeData arg_life = op->life;
3809 TCGRegSet dup_out_regs, dup_in_regs;
3810 TCGTemp *its, *ots;
3811 TCGType itype, vtype;
3812 intptr_t endian_fixup;
3813 unsigned vece;
3814 bool ok;
3816 ots = arg_temp(op->args[0]);
3817 its = arg_temp(op->args[1]);
3819 /* ENV should not be modified. */
3820 tcg_debug_assert(!temp_readonly(ots));
3822 itype = its->type;
3823 vece = TCGOP_VECE(op);
3824 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3826 if (its->val_type == TEMP_VAL_CONST) {
3827 /* Propagate constant via movi -> dupi. */
3828 tcg_target_ulong val = its->val;
3829 if (IS_DEAD_ARG(1)) {
3830 temp_dead(s, its);
3832 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3833 return;
3836 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3837 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3839 /* Allocate the output register now. */
3840 if (ots->val_type != TEMP_VAL_REG) {
3841 TCGRegSet allocated_regs = s->reserved_regs;
3843 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3844 /* Make sure to not spill the input register. */
3845 tcg_regset_set_reg(allocated_regs, its->reg);
3847 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3848 op->output_pref[0], ots->indirect_base);
3849 ots->val_type = TEMP_VAL_REG;
3850 ots->mem_coherent = 0;
3851 s->reg_to_temp[ots->reg] = ots;
3854 switch (its->val_type) {
3855 case TEMP_VAL_REG:
3857 * The dup constriaints must be broad, covering all possible VECE.
3858 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3859 * to fail, indicating that extra moves are required for that case.
3861 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3862 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3863 goto done;
3865 /* Try again from memory or a vector input register. */
3867 if (!its->mem_coherent) {
3869 * The input register is not synced, and so an extra store
3870 * would be required to use memory. Attempt an integer-vector
3871 * register move first. We do not have a TCGRegSet for this.
3873 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3874 break;
3876 /* Sync the temp back to its slot and load from there. */
3877 temp_sync(s, its, s->reserved_regs, 0, 0);
3879 /* fall through */
3881 case TEMP_VAL_MEM:
3882 #ifdef HOST_WORDS_BIGENDIAN
3883 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3884 endian_fixup -= 1 << vece;
3885 #else
3886 endian_fixup = 0;
3887 #endif
3888 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3889 its->mem_offset + endian_fixup)) {
3890 goto done;
3892 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3893 break;
3895 default:
3896 g_assert_not_reached();
3899 /* We now have a vector input register, so dup must succeed. */
3900 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3901 tcg_debug_assert(ok);
3903 done:
3904 if (IS_DEAD_ARG(1)) {
3905 temp_dead(s, its);
3907 if (NEED_SYNC_ARG(0)) {
3908 temp_sync(s, ots, s->reserved_regs, 0, 0);
3910 if (IS_DEAD_ARG(0)) {
3911 temp_dead(s, ots);
3915 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3917 const TCGLifeData arg_life = op->life;
3918 const TCGOpDef * const def = &tcg_op_defs[op->opc];
3919 TCGRegSet i_allocated_regs;
3920 TCGRegSet o_allocated_regs;
3921 int i, k, nb_iargs, nb_oargs;
3922 TCGReg reg;
3923 TCGArg arg;
3924 const TCGArgConstraint *arg_ct;
3925 TCGTemp *ts;
3926 TCGArg new_args[TCG_MAX_OP_ARGS];
3927 int const_args[TCG_MAX_OP_ARGS];
3929 nb_oargs = def->nb_oargs;
3930 nb_iargs = def->nb_iargs;
3932 /* copy constants */
3933 memcpy(new_args + nb_oargs + nb_iargs,
3934 op->args + nb_oargs + nb_iargs,
3935 sizeof(TCGArg) * def->nb_cargs);
3937 i_allocated_regs = s->reserved_regs;
3938 o_allocated_regs = s->reserved_regs;
3940 /* satisfy input constraints */
3941 for (k = 0; k < nb_iargs; k++) {
3942 TCGRegSet i_preferred_regs, o_preferred_regs;
3944 i = def->args_ct[nb_oargs + k].sort_index;
3945 arg = op->args[i];
3946 arg_ct = &def->args_ct[i];
3947 ts = arg_temp(arg);
3949 if (ts->val_type == TEMP_VAL_CONST
3950 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3951 /* constant is OK for instruction */
3952 const_args[i] = 1;
3953 new_args[i] = ts->val;
3954 continue;
3957 i_preferred_regs = o_preferred_regs = 0;
3958 if (arg_ct->ialias) {
3959 o_preferred_regs = op->output_pref[arg_ct->alias_index];
3962 * If the input is readonly, then it cannot also be an
3963 * output and aliased to itself. If the input is not
3964 * dead after the instruction, we must allocate a new
3965 * register and move it.
3967 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3968 goto allocate_in_reg;
3972 * Check if the current register has already been allocated
3973 * for another input aliased to an output.
3975 if (ts->val_type == TEMP_VAL_REG) {
3976 reg = ts->reg;
3977 for (int k2 = 0; k2 < k; k2++) {
3978 int i2 = def->args_ct[nb_oargs + k2].sort_index;
3979 if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3980 goto allocate_in_reg;
3984 i_preferred_regs = o_preferred_regs;
3987 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3988 reg = ts->reg;
3990 if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3991 allocate_in_reg:
3993 * Allocate a new register matching the constraint
3994 * and move the temporary register into it.
3996 temp_load(s, ts, tcg_target_available_regs[ts->type],
3997 i_allocated_regs, 0);
3998 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3999 o_preferred_regs, ts->indirect_base);
4000 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4002 * Cross register class move not supported. Sync the
4003 * temp back to its slot and load from there.
4005 temp_sync(s, ts, i_allocated_regs, 0, 0);
4006 tcg_out_ld(s, ts->type, reg,
4007 ts->mem_base->reg, ts->mem_offset);
4010 new_args[i] = reg;
4011 const_args[i] = 0;
4012 tcg_regset_set_reg(i_allocated_regs, reg);
4015 /* mark dead temporaries and free the associated registers */
4016 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4017 if (IS_DEAD_ARG(i)) {
4018 temp_dead(s, arg_temp(op->args[i]));
4022 if (def->flags & TCG_OPF_COND_BRANCH) {
4023 tcg_reg_alloc_cbranch(s, i_allocated_regs);
4024 } else if (def->flags & TCG_OPF_BB_END) {
4025 tcg_reg_alloc_bb_end(s, i_allocated_regs);
4026 } else {
4027 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4028 /* XXX: permit generic clobber register list ? */
4029 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4030 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4031 tcg_reg_free(s, i, i_allocated_regs);
4035 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4036 /* sync globals if the op has side effects and might trigger
4037 an exception. */
4038 sync_globals(s, i_allocated_regs);
4041 /* satisfy the output constraints */
4042 for(k = 0; k < nb_oargs; k++) {
4043 i = def->args_ct[k].sort_index;
4044 arg = op->args[i];
4045 arg_ct = &def->args_ct[i];
4046 ts = arg_temp(arg);
4048 /* ENV should not be modified. */
4049 tcg_debug_assert(!temp_readonly(ts));
4051 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4052 reg = new_args[arg_ct->alias_index];
4053 } else if (arg_ct->newreg) {
4054 reg = tcg_reg_alloc(s, arg_ct->regs,
4055 i_allocated_regs | o_allocated_regs,
4056 op->output_pref[k], ts->indirect_base);
4057 } else {
4058 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4059 op->output_pref[k], ts->indirect_base);
4061 tcg_regset_set_reg(o_allocated_regs, reg);
4062 if (ts->val_type == TEMP_VAL_REG) {
4063 s->reg_to_temp[ts->reg] = NULL;
4065 ts->val_type = TEMP_VAL_REG;
4066 ts->reg = reg;
4068 * Temp value is modified, so the value kept in memory is
4069 * potentially not the same.
4071 ts->mem_coherent = 0;
4072 s->reg_to_temp[reg] = ts;
4073 new_args[i] = reg;
4077 /* emit instruction */
4078 if (def->flags & TCG_OPF_VECTOR) {
4079 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4080 new_args, const_args);
4081 } else {
4082 tcg_out_op(s, op->opc, new_args, const_args);
4085 /* move the outputs in the correct register if needed */
4086 for(i = 0; i < nb_oargs; i++) {
4087 ts = arg_temp(op->args[i]);
4089 /* ENV should not be modified. */
4090 tcg_debug_assert(!temp_readonly(ts));
4092 if (NEED_SYNC_ARG(i)) {
4093 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4094 } else if (IS_DEAD_ARG(i)) {
4095 temp_dead(s, ts);
4100 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4102 const TCGLifeData arg_life = op->life;
4103 TCGTemp *ots, *itsl, *itsh;
4104 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4106 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4107 tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4108 tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4110 ots = arg_temp(op->args[0]);
4111 itsl = arg_temp(op->args[1]);
4112 itsh = arg_temp(op->args[2]);
4114 /* ENV should not be modified. */
4115 tcg_debug_assert(!temp_readonly(ots));
4117 /* Allocate the output register now. */
4118 if (ots->val_type != TEMP_VAL_REG) {
4119 TCGRegSet allocated_regs = s->reserved_regs;
4120 TCGRegSet dup_out_regs =
4121 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4123 /* Make sure to not spill the input registers. */
4124 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4125 tcg_regset_set_reg(allocated_regs, itsl->reg);
4127 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4128 tcg_regset_set_reg(allocated_regs, itsh->reg);
4131 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4132 op->output_pref[0], ots->indirect_base);
4133 ots->val_type = TEMP_VAL_REG;
4134 ots->mem_coherent = 0;
4135 s->reg_to_temp[ots->reg] = ots;
4138 /* Promote dup2 of immediates to dupi_vec. */
4139 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4140 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4141 MemOp vece = MO_64;
4143 if (val == dup_const(MO_8, val)) {
4144 vece = MO_8;
4145 } else if (val == dup_const(MO_16, val)) {
4146 vece = MO_16;
4147 } else if (val == dup_const(MO_32, val)) {
4148 vece = MO_32;
4151 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4152 goto done;
4155 /* If the two inputs form one 64-bit value, try dupm_vec. */
4156 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4157 if (!itsl->mem_coherent) {
4158 temp_sync(s, itsl, s->reserved_regs, 0, 0);
4160 if (!itsh->mem_coherent) {
4161 temp_sync(s, itsh, s->reserved_regs, 0, 0);
4163 #ifdef HOST_WORDS_BIGENDIAN
4164 TCGTemp *its = itsh;
4165 #else
4166 TCGTemp *its = itsl;
4167 #endif
4168 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4169 its->mem_base->reg, its->mem_offset)) {
4170 goto done;
4174 /* Fall back to generic expansion. */
4175 return false;
4177 done:
4178 if (IS_DEAD_ARG(1)) {
4179 temp_dead(s, itsl);
4181 if (IS_DEAD_ARG(2)) {
4182 temp_dead(s, itsh);
4184 if (NEED_SYNC_ARG(0)) {
4185 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4186 } else if (IS_DEAD_ARG(0)) {
4187 temp_dead(s, ots);
4189 return true;
4192 #ifdef TCG_TARGET_STACK_GROWSUP
4193 #define STACK_DIR(x) (-(x))
4194 #else
4195 #define STACK_DIR(x) (x)
4196 #endif
4198 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4200 const int nb_oargs = TCGOP_CALLO(op);
4201 const int nb_iargs = TCGOP_CALLI(op);
4202 const TCGLifeData arg_life = op->life;
4203 int flags, nb_regs, i;
4204 TCGReg reg;
4205 TCGArg arg;
4206 TCGTemp *ts;
4207 intptr_t stack_offset;
4208 size_t call_stack_size;
4209 tcg_insn_unit *func_addr;
4210 int allocate_args;
4211 TCGRegSet allocated_regs;
4213 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4214 flags = op->args[nb_oargs + nb_iargs + 1];
4216 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4217 if (nb_regs > nb_iargs) {
4218 nb_regs = nb_iargs;
4221 /* assign stack slots first */
4222 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4223 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4224 ~(TCG_TARGET_STACK_ALIGN - 1);
4225 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4226 if (allocate_args) {
4227 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4228 preallocate call stack */
4229 tcg_abort();
4232 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4233 for (i = nb_regs; i < nb_iargs; i++) {
4234 arg = op->args[nb_oargs + i];
4235 #ifdef TCG_TARGET_STACK_GROWSUP
4236 stack_offset -= sizeof(tcg_target_long);
4237 #endif
4238 if (arg != TCG_CALL_DUMMY_ARG) {
4239 ts = arg_temp(arg);
4240 temp_load(s, ts, tcg_target_available_regs[ts->type],
4241 s->reserved_regs, 0);
4242 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4244 #ifndef TCG_TARGET_STACK_GROWSUP
4245 stack_offset += sizeof(tcg_target_long);
4246 #endif
4249 /* assign input registers */
4250 allocated_regs = s->reserved_regs;
4251 for (i = 0; i < nb_regs; i++) {
4252 arg = op->args[nb_oargs + i];
4253 if (arg != TCG_CALL_DUMMY_ARG) {
4254 ts = arg_temp(arg);
4255 reg = tcg_target_call_iarg_regs[i];
4257 if (ts->val_type == TEMP_VAL_REG) {
4258 if (ts->reg != reg) {
4259 tcg_reg_free(s, reg, allocated_regs);
4260 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4262 * Cross register class move not supported. Sync the
4263 * temp back to its slot and load from there.
4265 temp_sync(s, ts, allocated_regs, 0, 0);
4266 tcg_out_ld(s, ts->type, reg,
4267 ts->mem_base->reg, ts->mem_offset);
4270 } else {
4271 TCGRegSet arg_set = 0;
4273 tcg_reg_free(s, reg, allocated_regs);
4274 tcg_regset_set_reg(arg_set, reg);
4275 temp_load(s, ts, arg_set, allocated_regs, 0);
4278 tcg_regset_set_reg(allocated_regs, reg);
4282 /* mark dead temporaries and free the associated registers */
4283 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4284 if (IS_DEAD_ARG(i)) {
4285 temp_dead(s, arg_temp(op->args[i]));
4289 /* clobber call registers */
4290 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4291 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4292 tcg_reg_free(s, i, allocated_regs);
4296 /* Save globals if they might be written by the helper, sync them if
4297 they might be read. */
4298 if (flags & TCG_CALL_NO_READ_GLOBALS) {
4299 /* Nothing to do */
4300 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4301 sync_globals(s, allocated_regs);
4302 } else {
4303 save_globals(s, allocated_regs);
4306 tcg_out_call(s, func_addr);
4308 /* assign output registers and emit moves if needed */
4309 for(i = 0; i < nb_oargs; i++) {
4310 arg = op->args[i];
4311 ts = arg_temp(arg);
4313 /* ENV should not be modified. */
4314 tcg_debug_assert(!temp_readonly(ts));
4316 reg = tcg_target_call_oarg_regs[i];
4317 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4318 if (ts->val_type == TEMP_VAL_REG) {
4319 s->reg_to_temp[ts->reg] = NULL;
4321 ts->val_type = TEMP_VAL_REG;
4322 ts->reg = reg;
4323 ts->mem_coherent = 0;
4324 s->reg_to_temp[reg] = ts;
4325 if (NEED_SYNC_ARG(i)) {
4326 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4327 } else if (IS_DEAD_ARG(i)) {
4328 temp_dead(s, ts);
4333 #ifdef CONFIG_PROFILER
4335 /* avoid copy/paste errors */
4336 #define PROF_ADD(to, from, field) \
4337 do { \
4338 (to)->field += qatomic_read(&((from)->field)); \
4339 } while (0)
4341 #define PROF_MAX(to, from, field) \
4342 do { \
4343 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4344 if (val__ > (to)->field) { \
4345 (to)->field = val__; \
4347 } while (0)
4349 /* Pass in a zero'ed @prof */
4350 static inline
4351 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4353 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4354 unsigned int i;
4356 for (i = 0; i < n_ctxs; i++) {
4357 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4358 const TCGProfile *orig = &s->prof;
4360 if (counters) {
4361 PROF_ADD(prof, orig, cpu_exec_time);
4362 PROF_ADD(prof, orig, tb_count1);
4363 PROF_ADD(prof, orig, tb_count);
4364 PROF_ADD(prof, orig, op_count);
4365 PROF_MAX(prof, orig, op_count_max);
4366 PROF_ADD(prof, orig, temp_count);
4367 PROF_MAX(prof, orig, temp_count_max);
4368 PROF_ADD(prof, orig, del_op_count);
4369 PROF_ADD(prof, orig, code_in_len);
4370 PROF_ADD(prof, orig, code_out_len);
4371 PROF_ADD(prof, orig, search_out_len);
4372 PROF_ADD(prof, orig, interm_time);
4373 PROF_ADD(prof, orig, code_time);
4374 PROF_ADD(prof, orig, la_time);
4375 PROF_ADD(prof, orig, opt_time);
4376 PROF_ADD(prof, orig, restore_count);
4377 PROF_ADD(prof, orig, restore_time);
4379 if (table) {
4380 int i;
4382 for (i = 0; i < NB_OPS; i++) {
4383 PROF_ADD(prof, orig, table_op_count[i]);
4389 #undef PROF_ADD
4390 #undef PROF_MAX
4392 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4394 tcg_profile_snapshot(prof, true, false);
4397 static void tcg_profile_snapshot_table(TCGProfile *prof)
4399 tcg_profile_snapshot(prof, false, true);
4402 void tcg_dump_op_count(void)
4404 TCGProfile prof = {};
4405 int i;
4407 tcg_profile_snapshot_table(&prof);
4408 for (i = 0; i < NB_OPS; i++) {
4409 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4410 prof.table_op_count[i]);
4414 int64_t tcg_cpu_exec_time(void)
4416 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4417 unsigned int i;
4418 int64_t ret = 0;
4420 for (i = 0; i < n_ctxs; i++) {
4421 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4422 const TCGProfile *prof = &s->prof;
4424 ret += qatomic_read(&prof->cpu_exec_time);
4426 return ret;
4428 #else
4429 void tcg_dump_op_count(void)
4431 qemu_printf("[TCG profiler not compiled]\n");
4434 int64_t tcg_cpu_exec_time(void)
4436 error_report("%s: TCG profiler not compiled", __func__);
4437 exit(EXIT_FAILURE);
4439 #endif
4442 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4444 #ifdef CONFIG_PROFILER
4445 TCGProfile *prof = &s->prof;
4446 #endif
4447 int i, num_insns;
4448 TCGOp *op;
4450 #ifdef CONFIG_PROFILER
4452 int n = 0;
4454 QTAILQ_FOREACH(op, &s->ops, link) {
4455 n++;
4457 qatomic_set(&prof->op_count, prof->op_count + n);
4458 if (n > prof->op_count_max) {
4459 qatomic_set(&prof->op_count_max, n);
4462 n = s->nb_temps;
4463 qatomic_set(&prof->temp_count, prof->temp_count + n);
4464 if (n > prof->temp_count_max) {
4465 qatomic_set(&prof->temp_count_max, n);
4468 #endif
4470 #ifdef DEBUG_DISAS
4471 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4472 && qemu_log_in_addr_range(tb->pc))) {
4473 FILE *logfile = qemu_log_lock();
4474 qemu_log("OP:\n");
4475 tcg_dump_ops(s, false);
4476 qemu_log("\n");
4477 qemu_log_unlock(logfile);
4479 #endif
4481 #ifdef CONFIG_DEBUG_TCG
4482 /* Ensure all labels referenced have been emitted. */
4484 TCGLabel *l;
4485 bool error = false;
4487 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4488 if (unlikely(!l->present) && l->refs) {
4489 qemu_log_mask(CPU_LOG_TB_OP,
4490 "$L%d referenced but not present.\n", l->id);
4491 error = true;
4494 assert(!error);
4496 #endif
4498 #ifdef CONFIG_PROFILER
4499 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4500 #endif
4502 #ifdef USE_TCG_OPTIMIZATIONS
4503 tcg_optimize(s);
4504 #endif
4506 #ifdef CONFIG_PROFILER
4507 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4508 qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4509 #endif
4511 reachable_code_pass(s);
4512 liveness_pass_1(s);
4514 if (s->nb_indirects > 0) {
4515 #ifdef DEBUG_DISAS
4516 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4517 && qemu_log_in_addr_range(tb->pc))) {
4518 FILE *logfile = qemu_log_lock();
4519 qemu_log("OP before indirect lowering:\n");
4520 tcg_dump_ops(s, false);
4521 qemu_log("\n");
4522 qemu_log_unlock(logfile);
4524 #endif
4525 /* Replace indirect temps with direct temps. */
4526 if (liveness_pass_2(s)) {
4527 /* If changes were made, re-run liveness. */
4528 liveness_pass_1(s);
4532 #ifdef CONFIG_PROFILER
4533 qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4534 #endif
4536 #ifdef DEBUG_DISAS
4537 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4538 && qemu_log_in_addr_range(tb->pc))) {
4539 FILE *logfile = qemu_log_lock();
4540 qemu_log("OP after optimization and liveness analysis:\n");
4541 tcg_dump_ops(s, true);
4542 qemu_log("\n");
4543 qemu_log_unlock(logfile);
4545 #endif
4547 tcg_reg_alloc_start(s);
4550 * Reset the buffer pointers when restarting after overflow.
4551 * TODO: Move this into translate-all.c with the rest of the
4552 * buffer management. Having only this done here is confusing.
4554 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4555 s->code_ptr = s->code_buf;
4557 #ifdef TCG_TARGET_NEED_LDST_LABELS
4558 QSIMPLEQ_INIT(&s->ldst_labels);
4559 #endif
4560 #ifdef TCG_TARGET_NEED_POOL_LABELS
4561 s->pool_labels = NULL;
4562 #endif
4564 num_insns = -1;
4565 QTAILQ_FOREACH(op, &s->ops, link) {
4566 TCGOpcode opc = op->opc;
4568 #ifdef CONFIG_PROFILER
4569 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4570 #endif
4572 switch (opc) {
4573 case INDEX_op_mov_i32:
4574 case INDEX_op_mov_i64:
4575 case INDEX_op_mov_vec:
4576 tcg_reg_alloc_mov(s, op);
4577 break;
4578 case INDEX_op_dup_vec:
4579 tcg_reg_alloc_dup(s, op);
4580 break;
4581 case INDEX_op_insn_start:
4582 if (num_insns >= 0) {
4583 size_t off = tcg_current_code_size(s);
4584 s->gen_insn_end_off[num_insns] = off;
4585 /* Assert that we do not overflow our stored offset. */
4586 assert(s->gen_insn_end_off[num_insns] == off);
4588 num_insns++;
4589 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4590 target_ulong a;
4591 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4592 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4593 #else
4594 a = op->args[i];
4595 #endif
4596 s->gen_insn_data[num_insns][i] = a;
4598 break;
4599 case INDEX_op_discard:
4600 temp_dead(s, arg_temp(op->args[0]));
4601 break;
4602 case INDEX_op_set_label:
4603 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4604 tcg_out_label(s, arg_label(op->args[0]));
4605 break;
4606 case INDEX_op_call:
4607 tcg_reg_alloc_call(s, op);
4608 break;
4609 case INDEX_op_dup2_vec:
4610 if (tcg_reg_alloc_dup2(s, op)) {
4611 break;
4613 /* fall through */
4614 default:
4615 /* Sanity check that we've not introduced any unhandled opcodes. */
4616 tcg_debug_assert(tcg_op_supported(opc));
4617 /* Note: in order to speed up the code, it would be much
4618 faster to have specialized register allocator functions for
4619 some common argument patterns */
4620 tcg_reg_alloc_op(s, op);
4621 break;
4623 #ifdef CONFIG_DEBUG_TCG
4624 check_regs(s);
4625 #endif
4626 /* Test for (pending) buffer overflow. The assumption is that any
4627 one operation beginning below the high water mark cannot overrun
4628 the buffer completely. Thus we can test for overflow after
4629 generating code without having to check during generation. */
4630 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4631 return -1;
4633 /* Test for TB overflow, as seen by gen_insn_end_off. */
4634 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4635 return -2;
4638 tcg_debug_assert(num_insns >= 0);
4639 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4641 /* Generate TB finalization at the end of block */
4642 #ifdef TCG_TARGET_NEED_LDST_LABELS
4643 i = tcg_out_ldst_finalize(s);
4644 if (i < 0) {
4645 return i;
4647 #endif
4648 #ifdef TCG_TARGET_NEED_POOL_LABELS
4649 i = tcg_out_pool_finalize(s);
4650 if (i < 0) {
4651 return i;
4653 #endif
4654 if (!tcg_resolve_relocs(s)) {
4655 return -2;
4658 #ifndef CONFIG_TCG_INTERPRETER
4659 /* flush instruction cache */
4660 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4661 (uintptr_t)s->code_buf,
4662 tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4663 #endif
4665 return tcg_current_code_size(s);
4668 #ifdef CONFIG_PROFILER
4669 void tcg_dump_info(void)
4671 TCGProfile prof = {};
4672 const TCGProfile *s;
4673 int64_t tb_count;
4674 int64_t tb_div_count;
4675 int64_t tot;
4677 tcg_profile_snapshot_counters(&prof);
4678 s = &prof;
4679 tb_count = s->tb_count;
4680 tb_div_count = tb_count ? tb_count : 1;
4681 tot = s->interm_time + s->code_time;
4683 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4684 tot, tot / 2.4e9);
4685 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4686 " %0.1f%%)\n",
4687 tb_count, s->tb_count1 - tb_count,
4688 (double)(s->tb_count1 - s->tb_count)
4689 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4690 qemu_printf("avg ops/TB %0.1f max=%d\n",
4691 (double)s->op_count / tb_div_count, s->op_count_max);
4692 qemu_printf("deleted ops/TB %0.2f\n",
4693 (double)s->del_op_count / tb_div_count);
4694 qemu_printf("avg temps/TB %0.2f max=%d\n",
4695 (double)s->temp_count / tb_div_count, s->temp_count_max);
4696 qemu_printf("avg host code/TB %0.1f\n",
4697 (double)s->code_out_len / tb_div_count);
4698 qemu_printf("avg search data/TB %0.1f\n",
4699 (double)s->search_out_len / tb_div_count);
4701 qemu_printf("cycles/op %0.1f\n",
4702 s->op_count ? (double)tot / s->op_count : 0);
4703 qemu_printf("cycles/in byte %0.1f\n",
4704 s->code_in_len ? (double)tot / s->code_in_len : 0);
4705 qemu_printf("cycles/out byte %0.1f\n",
4706 s->code_out_len ? (double)tot / s->code_out_len : 0);
4707 qemu_printf("cycles/search byte %0.1f\n",
4708 s->search_out_len ? (double)tot / s->search_out_len : 0);
4709 if (tot == 0) {
4710 tot = 1;
4712 qemu_printf(" gen_interm time %0.1f%%\n",
4713 (double)s->interm_time / tot * 100.0);
4714 qemu_printf(" gen_code time %0.1f%%\n",
4715 (double)s->code_time / tot * 100.0);
4716 qemu_printf("optim./code time %0.1f%%\n",
4717 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4718 * 100.0);
4719 qemu_printf("liveness/code time %0.1f%%\n",
4720 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4721 qemu_printf("cpu_restore count %" PRId64 "\n",
4722 s->restore_count);
4723 qemu_printf(" avg cycles %0.1f\n",
4724 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4726 #else
4727 void tcg_dump_info(void)
4729 qemu_printf("[TCG profiler not compiled]\n");
4731 #endif
4733 #ifdef ELF_HOST_MACHINE
4734 /* In order to use this feature, the backend needs to do three things:
4736 (1) Define ELF_HOST_MACHINE to indicate both what value to
4737 put into the ELF image and to indicate support for the feature.
4739 (2) Define tcg_register_jit. This should create a buffer containing
4740 the contents of a .debug_frame section that describes the post-
4741 prologue unwind info for the tcg machine.
4743 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4746 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4747 typedef enum {
4748 JIT_NOACTION = 0,
4749 JIT_REGISTER_FN,
4750 JIT_UNREGISTER_FN
4751 } jit_actions_t;
4753 struct jit_code_entry {
4754 struct jit_code_entry *next_entry;
4755 struct jit_code_entry *prev_entry;
4756 const void *symfile_addr;
4757 uint64_t symfile_size;
4760 struct jit_descriptor {
4761 uint32_t version;
4762 uint32_t action_flag;
4763 struct jit_code_entry *relevant_entry;
4764 struct jit_code_entry *first_entry;
4767 void __jit_debug_register_code(void) __attribute__((noinline));
4768 void __jit_debug_register_code(void)
4770 asm("");
4773 /* Must statically initialize the version, because GDB may check
4774 the version before we can set it. */
4775 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4777 /* End GDB interface. */
4779 static int find_string(const char *strtab, const char *str)
4781 const char *p = strtab + 1;
4783 while (1) {
4784 if (strcmp(p, str) == 0) {
4785 return p - strtab;
4787 p += strlen(p) + 1;
4791 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4792 const void *debug_frame,
4793 size_t debug_frame_size)
4795 struct __attribute__((packed)) DebugInfo {
4796 uint32_t len;
4797 uint16_t version;
4798 uint32_t abbrev;
4799 uint8_t ptr_size;
4800 uint8_t cu_die;
4801 uint16_t cu_lang;
4802 uintptr_t cu_low_pc;
4803 uintptr_t cu_high_pc;
4804 uint8_t fn_die;
4805 char fn_name[16];
4806 uintptr_t fn_low_pc;
4807 uintptr_t fn_high_pc;
4808 uint8_t cu_eoc;
4811 struct ElfImage {
4812 ElfW(Ehdr) ehdr;
4813 ElfW(Phdr) phdr;
4814 ElfW(Shdr) shdr[7];
4815 ElfW(Sym) sym[2];
4816 struct DebugInfo di;
4817 uint8_t da[24];
4818 char str[80];
4821 struct ElfImage *img;
4823 static const struct ElfImage img_template = {
4824 .ehdr = {
4825 .e_ident[EI_MAG0] = ELFMAG0,
4826 .e_ident[EI_MAG1] = ELFMAG1,
4827 .e_ident[EI_MAG2] = ELFMAG2,
4828 .e_ident[EI_MAG3] = ELFMAG3,
4829 .e_ident[EI_CLASS] = ELF_CLASS,
4830 .e_ident[EI_DATA] = ELF_DATA,
4831 .e_ident[EI_VERSION] = EV_CURRENT,
4832 .e_type = ET_EXEC,
4833 .e_machine = ELF_HOST_MACHINE,
4834 .e_version = EV_CURRENT,
4835 .e_phoff = offsetof(struct ElfImage, phdr),
4836 .e_shoff = offsetof(struct ElfImage, shdr),
4837 .e_ehsize = sizeof(ElfW(Shdr)),
4838 .e_phentsize = sizeof(ElfW(Phdr)),
4839 .e_phnum = 1,
4840 .e_shentsize = sizeof(ElfW(Shdr)),
4841 .e_shnum = ARRAY_SIZE(img->shdr),
4842 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4843 #ifdef ELF_HOST_FLAGS
4844 .e_flags = ELF_HOST_FLAGS,
4845 #endif
4846 #ifdef ELF_OSABI
4847 .e_ident[EI_OSABI] = ELF_OSABI,
4848 #endif
4850 .phdr = {
4851 .p_type = PT_LOAD,
4852 .p_flags = PF_X,
4854 .shdr = {
4855 [0] = { .sh_type = SHT_NULL },
4856 /* Trick: The contents of code_gen_buffer are not present in
4857 this fake ELF file; that got allocated elsewhere. Therefore
4858 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4859 will not look for contents. We can record any address. */
4860 [1] = { /* .text */
4861 .sh_type = SHT_NOBITS,
4862 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4864 [2] = { /* .debug_info */
4865 .sh_type = SHT_PROGBITS,
4866 .sh_offset = offsetof(struct ElfImage, di),
4867 .sh_size = sizeof(struct DebugInfo),
4869 [3] = { /* .debug_abbrev */
4870 .sh_type = SHT_PROGBITS,
4871 .sh_offset = offsetof(struct ElfImage, da),
4872 .sh_size = sizeof(img->da),
4874 [4] = { /* .debug_frame */
4875 .sh_type = SHT_PROGBITS,
4876 .sh_offset = sizeof(struct ElfImage),
4878 [5] = { /* .symtab */
4879 .sh_type = SHT_SYMTAB,
4880 .sh_offset = offsetof(struct ElfImage, sym),
4881 .sh_size = sizeof(img->sym),
4882 .sh_info = 1,
4883 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4884 .sh_entsize = sizeof(ElfW(Sym)),
4886 [6] = { /* .strtab */
4887 .sh_type = SHT_STRTAB,
4888 .sh_offset = offsetof(struct ElfImage, str),
4889 .sh_size = sizeof(img->str),
4892 .sym = {
4893 [1] = { /* code_gen_buffer */
4894 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4895 .st_shndx = 1,
4898 .di = {
4899 .len = sizeof(struct DebugInfo) - 4,
4900 .version = 2,
4901 .ptr_size = sizeof(void *),
4902 .cu_die = 1,
4903 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
4904 .fn_die = 2,
4905 .fn_name = "code_gen_buffer"
4907 .da = {
4908 1, /* abbrev number (the cu) */
4909 0x11, 1, /* DW_TAG_compile_unit, has children */
4910 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4911 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4912 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4913 0, 0, /* end of abbrev */
4914 2, /* abbrev number (the fn) */
4915 0x2e, 0, /* DW_TAG_subprogram, no children */
4916 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4917 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4918 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4919 0, 0, /* end of abbrev */
4920 0 /* no more abbrev */
4922 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4923 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4926 /* We only need a single jit entry; statically allocate it. */
4927 static struct jit_code_entry one_entry;
4929 uintptr_t buf = (uintptr_t)buf_ptr;
4930 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4931 DebugFrameHeader *dfh;
4933 img = g_malloc(img_size);
4934 *img = img_template;
4936 img->phdr.p_vaddr = buf;
4937 img->phdr.p_paddr = buf;
4938 img->phdr.p_memsz = buf_size;
4940 img->shdr[1].sh_name = find_string(img->str, ".text");
4941 img->shdr[1].sh_addr = buf;
4942 img->shdr[1].sh_size = buf_size;
4944 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4945 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4947 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4948 img->shdr[4].sh_size = debug_frame_size;
4950 img->shdr[5].sh_name = find_string(img->str, ".symtab");
4951 img->shdr[6].sh_name = find_string(img->str, ".strtab");
4953 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4954 img->sym[1].st_value = buf;
4955 img->sym[1].st_size = buf_size;
4957 img->di.cu_low_pc = buf;
4958 img->di.cu_high_pc = buf + buf_size;
4959 img->di.fn_low_pc = buf;
4960 img->di.fn_high_pc = buf + buf_size;
4962 dfh = (DebugFrameHeader *)(img + 1);
4963 memcpy(dfh, debug_frame, debug_frame_size);
4964 dfh->fde.func_start = buf;
4965 dfh->fde.func_len = buf_size;
4967 #ifdef DEBUG_JIT
4968 /* Enable this block to be able to debug the ELF image file creation.
4969 One can use readelf, objdump, or other inspection utilities. */
4971 FILE *f = fopen("/tmp/qemu.jit", "w+b");
4972 if (f) {
4973 if (fwrite(img, img_size, 1, f) != img_size) {
4974 /* Avoid stupid unused return value warning for fwrite. */
4976 fclose(f);
4979 #endif
4981 one_entry.symfile_addr = img;
4982 one_entry.symfile_size = img_size;
4984 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4985 __jit_debug_descriptor.relevant_entry = &one_entry;
4986 __jit_debug_descriptor.first_entry = &one_entry;
4987 __jit_debug_register_code();
4989 #else
4990 /* No support for the feature. Provide the entry point expected by exec.c,
4991 and implement the internal function we declared earlier. */
4993 static void tcg_register_jit_int(const void *buf, size_t size,
4994 const void *debug_frame,
4995 size_t debug_frame_size)
4999 void tcg_register_jit(const void *buf, size_t buf_size)
5002 #endif /* ELF_HOST_MACHINE */
5004 #if !TCG_TARGET_MAYBE_vec
5005 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5007 g_assert_not_reached();
5009 #endif