hw/arm/virt: Disable pl011 clock migration if needed
[qemu/ar7.git] / tcg / tcg.c
blobde91bb6e9e1de9e299e97aadfabcf497849af81e
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
42 instructions */
43 #define NO_CPU_IO_DEFS
44 #include "cpu.h"
46 #include "exec/exec-all.h"
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
52 #include "tcg/tcg-op.h"
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS ELFCLASS32
56 #else
57 # define ELF_CLASS ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA ELFDATA2MSB
61 #else
62 # define ELF_DATA ELFDATA2LSB
63 #endif
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70 used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74 intptr_t value, intptr_t addend);
76 /* The CIE and FDE header definitions will be common to all hosts. */
77 typedef struct {
78 uint32_t len __attribute__((aligned((sizeof(void *)))));
79 uint32_t id;
80 uint8_t version;
81 char augmentation[1];
82 uint8_t code_align;
83 uint8_t data_align;
84 uint8_t return_column;
85 } DebugFrameCIE;
87 typedef struct QEMU_PACKED {
88 uint32_t len __attribute__((aligned((sizeof(void *)))));
89 uint32_t cie_offset;
90 uintptr_t func_start;
91 uintptr_t func_len;
92 } DebugFrameFDEHeader;
94 typedef struct QEMU_PACKED {
95 DebugFrameCIE cie;
96 DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
99 static void tcg_register_jit_int(const void *buf, size_t size,
100 const void *debug_frame,
101 size_t debug_frame_size)
102 __attribute__((unused));
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106 intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109 TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111 const TCGArg args[TCG_MAX_OP_ARGS],
112 const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115 TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117 TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119 TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121 unsigned vecl, unsigned vece,
122 const TCGArg args[TCG_MAX_OP_ARGS],
123 const int const_args[TCG_MAX_OP_ARGS]);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126 TCGReg dst, TCGReg src)
128 g_assert_not_reached();
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131 TCGReg dst, TCGReg base, intptr_t offset)
133 g_assert_not_reached();
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136 TCGReg dst, int64_t arg)
138 g_assert_not_reached();
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141 unsigned vecl, unsigned vece,
142 const TCGArg args[TCG_MAX_OP_ARGS],
143 const int const_args[TCG_MAX_OP_ARGS])
145 g_assert_not_reached();
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149 intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151 TCGReg base, intptr_t ofs);
152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
153 static int tcg_target_const_match(tcg_target_long val, TCGType type,
154 const TCGArgConstraint *arg_ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
157 #endif
159 #define TCG_HIGHWATER 1024
161 static TCGContext **tcg_ctxs;
162 static unsigned int n_tcg_ctxs;
163 TCGv_env cpu_env = 0;
164 const void *tcg_code_gen_epilogue;
165 uintptr_t tcg_splitwx_diff;
167 #ifndef CONFIG_TCG_INTERPRETER
168 tcg_prologue_fn *tcg_qemu_tb_exec;
169 #endif
171 struct tcg_region_tree {
172 QemuMutex lock;
173 GTree *tree;
174 /* padding to avoid false sharing is computed at run-time */
178 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
179 * dynamically allocate from as demand dictates. Given appropriate region
180 * sizing, this minimizes flushes even when some TCG threads generate a lot
181 * more code than others.
183 struct tcg_region_state {
184 QemuMutex lock;
186 /* fields set at init time */
187 void *start;
188 void *start_aligned;
189 void *end;
190 size_t n;
191 size_t size; /* size of one region */
192 size_t stride; /* .size + guard size */
194 /* fields protected by the lock */
195 size_t current; /* current region index */
196 size_t agg_size_full; /* aggregate size of full regions */
199 static struct tcg_region_state region;
201 * This is an array of struct tcg_region_tree's, with padding.
202 * We use void * to simplify the computation of region_trees[i]; each
203 * struct is found every tree_size bytes.
205 static void *region_trees;
206 static size_t tree_size;
207 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
208 static TCGRegSet tcg_target_call_clobber_regs;
210 #if TCG_TARGET_INSN_UNIT_SIZE == 1
211 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
213 *s->code_ptr++ = v;
216 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
217 uint8_t v)
219 *p = v;
221 #endif
223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
224 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
226 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
227 *s->code_ptr++ = v;
228 } else {
229 tcg_insn_unit *p = s->code_ptr;
230 memcpy(p, &v, sizeof(v));
231 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
235 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
236 uint16_t v)
238 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
239 *p = v;
240 } else {
241 memcpy(p, &v, sizeof(v));
244 #endif
246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
247 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
249 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
250 *s->code_ptr++ = v;
251 } else {
252 tcg_insn_unit *p = s->code_ptr;
253 memcpy(p, &v, sizeof(v));
254 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
258 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
259 uint32_t v)
261 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
262 *p = v;
263 } else {
264 memcpy(p, &v, sizeof(v));
267 #endif
269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
270 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
272 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
273 *s->code_ptr++ = v;
274 } else {
275 tcg_insn_unit *p = s->code_ptr;
276 memcpy(p, &v, sizeof(v));
277 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
281 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
282 uint64_t v)
284 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
285 *p = v;
286 } else {
287 memcpy(p, &v, sizeof(v));
290 #endif
292 /* label relocation processing */
294 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
295 TCGLabel *l, intptr_t addend)
297 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
299 r->type = type;
300 r->ptr = code_ptr;
301 r->addend = addend;
302 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
305 static void tcg_out_label(TCGContext *s, TCGLabel *l)
307 tcg_debug_assert(!l->has_value);
308 l->has_value = 1;
309 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
312 TCGLabel *gen_new_label(void)
314 TCGContext *s = tcg_ctx;
315 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
317 memset(l, 0, sizeof(TCGLabel));
318 l->id = s->nb_labels++;
319 QSIMPLEQ_INIT(&l->relocs);
321 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
323 return l;
326 static bool tcg_resolve_relocs(TCGContext *s)
328 TCGLabel *l;
330 QSIMPLEQ_FOREACH(l, &s->labels, next) {
331 TCGRelocation *r;
332 uintptr_t value = l->u.value;
334 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
335 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
336 return false;
340 return true;
343 static void set_jmp_reset_offset(TCGContext *s, int which)
346 * We will check for overflow at the end of the opcode loop in
347 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
349 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
352 /* Signal overflow, starting over with fewer guest insns. */
353 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
355 siglongjmp(s->jmp_trans, -2);
358 #define C_PFX1(P, A) P##A
359 #define C_PFX2(P, A, B) P##A##_##B
360 #define C_PFX3(P, A, B, C) P##A##_##B##_##C
361 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
362 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
363 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
365 /* Define an enumeration for the various combinations. */
367 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
368 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
369 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
370 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
372 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
373 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
374 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
375 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
377 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
379 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
380 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
381 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
382 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
384 typedef enum {
385 #include "tcg-target-con-set.h"
386 } TCGConstraintSetIndex;
388 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
390 #undef C_O0_I1
391 #undef C_O0_I2
392 #undef C_O0_I3
393 #undef C_O0_I4
394 #undef C_O1_I1
395 #undef C_O1_I2
396 #undef C_O1_I3
397 #undef C_O1_I4
398 #undef C_N1_I2
399 #undef C_O2_I1
400 #undef C_O2_I2
401 #undef C_O2_I3
402 #undef C_O2_I4
404 /* Put all of the constraint sets into an array, indexed by the enum. */
406 #define C_O0_I1(I1) { .args_ct_str = { #I1 } },
407 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
408 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
409 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
411 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
412 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
413 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
414 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
416 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
418 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
419 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
420 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
421 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
423 static const TCGTargetOpDef constraint_sets[] = {
424 #include "tcg-target-con-set.h"
428 #undef C_O0_I1
429 #undef C_O0_I2
430 #undef C_O0_I3
431 #undef C_O0_I4
432 #undef C_O1_I1
433 #undef C_O1_I2
434 #undef C_O1_I3
435 #undef C_O1_I4
436 #undef C_N1_I2
437 #undef C_O2_I1
438 #undef C_O2_I2
439 #undef C_O2_I3
440 #undef C_O2_I4
442 /* Expand the enumerator to be returned from tcg_target_op_def(). */
444 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
445 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
446 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
447 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
449 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
450 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
451 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
452 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
454 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
456 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
457 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
458 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
459 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
461 #include "tcg-target.c.inc"
463 /* compare a pointer @ptr and a tb_tc @s */
464 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
466 if (ptr >= s->ptr + s->size) {
467 return 1;
468 } else if (ptr < s->ptr) {
469 return -1;
471 return 0;
474 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
476 const struct tb_tc *a = ap;
477 const struct tb_tc *b = bp;
480 * When both sizes are set, we know this isn't a lookup.
481 * This is the most likely case: every TB must be inserted; lookups
482 * are a lot less frequent.
484 if (likely(a->size && b->size)) {
485 if (a->ptr > b->ptr) {
486 return 1;
487 } else if (a->ptr < b->ptr) {
488 return -1;
490 /* a->ptr == b->ptr should happen only on deletions */
491 g_assert(a->size == b->size);
492 return 0;
495 * All lookups have either .size field set to 0.
496 * From the glib sources we see that @ap is always the lookup key. However
497 * the docs provide no guarantee, so we just mark this case as likely.
499 if (likely(a->size == 0)) {
500 return ptr_cmp_tb_tc(a->ptr, b);
502 return ptr_cmp_tb_tc(b->ptr, a);
505 static void tcg_region_trees_init(void)
507 size_t i;
509 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
510 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
511 for (i = 0; i < region.n; i++) {
512 struct tcg_region_tree *rt = region_trees + i * tree_size;
514 qemu_mutex_init(&rt->lock);
515 rt->tree = g_tree_new(tb_tc_cmp);
519 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
521 size_t region_idx;
524 * Like tcg_splitwx_to_rw, with no assert. The pc may come from
525 * a signal handler over which the caller has no control.
527 if (!in_code_gen_buffer(p)) {
528 p -= tcg_splitwx_diff;
529 if (!in_code_gen_buffer(p)) {
530 return NULL;
534 if (p < region.start_aligned) {
535 region_idx = 0;
536 } else {
537 ptrdiff_t offset = p - region.start_aligned;
539 if (offset > region.stride * (region.n - 1)) {
540 region_idx = region.n - 1;
541 } else {
542 region_idx = offset / region.stride;
545 return region_trees + region_idx * tree_size;
548 void tcg_tb_insert(TranslationBlock *tb)
550 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
552 g_assert(rt != NULL);
553 qemu_mutex_lock(&rt->lock);
554 g_tree_insert(rt->tree, &tb->tc, tb);
555 qemu_mutex_unlock(&rt->lock);
558 void tcg_tb_remove(TranslationBlock *tb)
560 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
562 g_assert(rt != NULL);
563 qemu_mutex_lock(&rt->lock);
564 g_tree_remove(rt->tree, &tb->tc);
565 qemu_mutex_unlock(&rt->lock);
569 * Find the TB 'tb' such that
570 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
571 * Return NULL if not found.
573 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
575 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
576 TranslationBlock *tb;
577 struct tb_tc s = { .ptr = (void *)tc_ptr };
579 if (rt == NULL) {
580 return NULL;
583 qemu_mutex_lock(&rt->lock);
584 tb = g_tree_lookup(rt->tree, &s);
585 qemu_mutex_unlock(&rt->lock);
586 return tb;
589 static void tcg_region_tree_lock_all(void)
591 size_t i;
593 for (i = 0; i < region.n; i++) {
594 struct tcg_region_tree *rt = region_trees + i * tree_size;
596 qemu_mutex_lock(&rt->lock);
600 static void tcg_region_tree_unlock_all(void)
602 size_t i;
604 for (i = 0; i < region.n; i++) {
605 struct tcg_region_tree *rt = region_trees + i * tree_size;
607 qemu_mutex_unlock(&rt->lock);
611 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
613 size_t i;
615 tcg_region_tree_lock_all();
616 for (i = 0; i < region.n; i++) {
617 struct tcg_region_tree *rt = region_trees + i * tree_size;
619 g_tree_foreach(rt->tree, func, user_data);
621 tcg_region_tree_unlock_all();
624 size_t tcg_nb_tbs(void)
626 size_t nb_tbs = 0;
627 size_t i;
629 tcg_region_tree_lock_all();
630 for (i = 0; i < region.n; i++) {
631 struct tcg_region_tree *rt = region_trees + i * tree_size;
633 nb_tbs += g_tree_nnodes(rt->tree);
635 tcg_region_tree_unlock_all();
636 return nb_tbs;
639 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
641 TranslationBlock *tb = v;
643 tb_destroy(tb);
644 return FALSE;
647 static void tcg_region_tree_reset_all(void)
649 size_t i;
651 tcg_region_tree_lock_all();
652 for (i = 0; i < region.n; i++) {
653 struct tcg_region_tree *rt = region_trees + i * tree_size;
655 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
656 /* Increment the refcount first so that destroy acts as a reset */
657 g_tree_ref(rt->tree);
658 g_tree_destroy(rt->tree);
660 tcg_region_tree_unlock_all();
663 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
665 void *start, *end;
667 start = region.start_aligned + curr_region * region.stride;
668 end = start + region.size;
670 if (curr_region == 0) {
671 start = region.start;
673 if (curr_region == region.n - 1) {
674 end = region.end;
677 *pstart = start;
678 *pend = end;
681 static void tcg_region_assign(TCGContext *s, size_t curr_region)
683 void *start, *end;
685 tcg_region_bounds(curr_region, &start, &end);
687 s->code_gen_buffer = start;
688 s->code_gen_ptr = start;
689 s->code_gen_buffer_size = end - start;
690 s->code_gen_highwater = end - TCG_HIGHWATER;
693 static bool tcg_region_alloc__locked(TCGContext *s)
695 if (region.current == region.n) {
696 return true;
698 tcg_region_assign(s, region.current);
699 region.current++;
700 return false;
704 * Request a new region once the one in use has filled up.
705 * Returns true on error.
707 static bool tcg_region_alloc(TCGContext *s)
709 bool err;
710 /* read the region size now; alloc__locked will overwrite it on success */
711 size_t size_full = s->code_gen_buffer_size;
713 qemu_mutex_lock(&region.lock);
714 err = tcg_region_alloc__locked(s);
715 if (!err) {
716 region.agg_size_full += size_full - TCG_HIGHWATER;
718 qemu_mutex_unlock(&region.lock);
719 return err;
723 * Perform a context's first region allocation.
724 * This function does _not_ increment region.agg_size_full.
726 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
728 return tcg_region_alloc__locked(s);
731 /* Call from a safe-work context */
732 void tcg_region_reset_all(void)
734 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
735 unsigned int i;
737 qemu_mutex_lock(&region.lock);
738 region.current = 0;
739 region.agg_size_full = 0;
741 for (i = 0; i < n_ctxs; i++) {
742 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
743 bool err = tcg_region_initial_alloc__locked(s);
745 g_assert(!err);
747 qemu_mutex_unlock(&region.lock);
749 tcg_region_tree_reset_all();
752 #ifdef CONFIG_USER_ONLY
753 static size_t tcg_n_regions(void)
755 return 1;
757 #else
759 * It is likely that some vCPUs will translate more code than others, so we
760 * first try to set more regions than max_cpus, with those regions being of
761 * reasonable size. If that's not possible we make do by evenly dividing
762 * the code_gen_buffer among the vCPUs.
764 static size_t tcg_n_regions(void)
766 size_t i;
768 /* Use a single region if all we have is one vCPU thread */
769 #if !defined(CONFIG_USER_ONLY)
770 MachineState *ms = MACHINE(qdev_get_machine());
771 unsigned int max_cpus = ms->smp.max_cpus;
772 #endif
773 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
774 return 1;
777 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
778 for (i = 8; i > 0; i--) {
779 size_t regions_per_thread = i;
780 size_t region_size;
782 region_size = tcg_init_ctx.code_gen_buffer_size;
783 region_size /= max_cpus * regions_per_thread;
785 if (region_size >= 2 * 1024u * 1024) {
786 return max_cpus * regions_per_thread;
789 /* If we can't, then just allocate one region per vCPU thread */
790 return max_cpus;
792 #endif
795 * Initializes region partitioning.
797 * Called at init time from the parent thread (i.e. the one calling
798 * tcg_context_init), after the target's TCG globals have been set.
800 * Region partitioning works by splitting code_gen_buffer into separate regions,
801 * and then assigning regions to TCG threads so that the threads can translate
802 * code in parallel without synchronization.
804 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
805 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
806 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
807 * must have been parsed before calling this function, since it calls
808 * qemu_tcg_mttcg_enabled().
810 * In user-mode we use a single region. Having multiple regions in user-mode
811 * is not supported, because the number of vCPU threads (recall that each thread
812 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
813 * OS, and usually this number is huge (tens of thousands is not uncommon).
814 * Thus, given this large bound on the number of vCPU threads and the fact
815 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
816 * that the availability of at least one region per vCPU thread.
818 * However, this user-mode limitation is unlikely to be a significant problem
819 * in practice. Multi-threaded guests share most if not all of their translated
820 * code, which makes parallel code generation less appealing than in softmmu.
822 void tcg_region_init(void)
824 void *buf = tcg_init_ctx.code_gen_buffer;
825 void *aligned;
826 size_t size = tcg_init_ctx.code_gen_buffer_size;
827 size_t page_size = qemu_real_host_page_size;
828 size_t region_size;
829 size_t n_regions;
830 size_t i;
831 uintptr_t splitwx_diff;
833 n_regions = tcg_n_regions();
835 /* The first region will be 'aligned - buf' bytes larger than the others */
836 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
837 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
839 * Make region_size a multiple of page_size, using aligned as the start.
840 * As a result of this we might end up with a few extra pages at the end of
841 * the buffer; we will assign those to the last region.
843 region_size = (size - (aligned - buf)) / n_regions;
844 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
846 /* A region must have at least 2 pages; one code, one guard */
847 g_assert(region_size >= 2 * page_size);
849 /* init the region struct */
850 qemu_mutex_init(&region.lock);
851 region.n = n_regions;
852 region.size = region_size - page_size;
853 region.stride = region_size;
854 region.start = buf;
855 region.start_aligned = aligned;
856 /* page-align the end, since its last page will be a guard page */
857 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
858 /* account for that last guard page */
859 region.end -= page_size;
861 /* set guard pages */
862 splitwx_diff = tcg_splitwx_diff;
863 for (i = 0; i < region.n; i++) {
864 void *start, *end;
865 int rc;
867 tcg_region_bounds(i, &start, &end);
868 rc = qemu_mprotect_none(end, page_size);
869 g_assert(!rc);
870 if (splitwx_diff) {
871 rc = qemu_mprotect_none(end + splitwx_diff, page_size);
872 g_assert(!rc);
876 tcg_region_trees_init();
878 /* In user-mode we support only one ctx, so do the initial allocation now */
879 #ifdef CONFIG_USER_ONLY
881 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
883 g_assert(!err);
885 #endif
888 #ifdef CONFIG_DEBUG_TCG
889 const void *tcg_splitwx_to_rx(void *rw)
891 /* Pass NULL pointers unchanged. */
892 if (rw) {
893 g_assert(in_code_gen_buffer(rw));
894 rw += tcg_splitwx_diff;
896 return rw;
899 void *tcg_splitwx_to_rw(const void *rx)
901 /* Pass NULL pointers unchanged. */
902 if (rx) {
903 rx -= tcg_splitwx_diff;
904 /* Assert that we end with a pointer in the rw region. */
905 g_assert(in_code_gen_buffer(rx));
907 return (void *)rx;
909 #endif /* CONFIG_DEBUG_TCG */
911 static void alloc_tcg_plugin_context(TCGContext *s)
913 #ifdef CONFIG_PLUGIN
914 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
915 s->plugin_tb->insns =
916 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
917 #endif
921 * All TCG threads except the parent (i.e. the one that called tcg_context_init
922 * and registered the target's TCG globals) must register with this function
923 * before initiating translation.
925 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
926 * of tcg_region_init() for the reasoning behind this.
928 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
929 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
930 * is not used anymore for translation once this function is called.
932 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
933 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
935 #ifdef CONFIG_USER_ONLY
936 void tcg_register_thread(void)
938 tcg_ctx = &tcg_init_ctx;
940 #else
941 void tcg_register_thread(void)
943 MachineState *ms = MACHINE(qdev_get_machine());
944 TCGContext *s = g_malloc(sizeof(*s));
945 unsigned int i, n;
946 bool err;
948 *s = tcg_init_ctx;
950 /* Relink mem_base. */
951 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
952 if (tcg_init_ctx.temps[i].mem_base) {
953 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
954 tcg_debug_assert(b >= 0 && b < n);
955 s->temps[i].mem_base = &s->temps[b];
959 /* Claim an entry in tcg_ctxs */
960 n = qatomic_fetch_inc(&n_tcg_ctxs);
961 g_assert(n < ms->smp.max_cpus);
962 qatomic_set(&tcg_ctxs[n], s);
964 if (n > 0) {
965 alloc_tcg_plugin_context(s);
968 tcg_ctx = s;
969 qemu_mutex_lock(&region.lock);
970 err = tcg_region_initial_alloc__locked(tcg_ctx);
971 g_assert(!err);
972 qemu_mutex_unlock(&region.lock);
974 #endif /* !CONFIG_USER_ONLY */
977 * Returns the size (in bytes) of all translated code (i.e. from all regions)
978 * currently in the cache.
979 * See also: tcg_code_capacity()
980 * Do not confuse with tcg_current_code_size(); that one applies to a single
981 * TCG context.
983 size_t tcg_code_size(void)
985 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
986 unsigned int i;
987 size_t total;
989 qemu_mutex_lock(&region.lock);
990 total = region.agg_size_full;
991 for (i = 0; i < n_ctxs; i++) {
992 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
993 size_t size;
995 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
996 g_assert(size <= s->code_gen_buffer_size);
997 total += size;
999 qemu_mutex_unlock(&region.lock);
1000 return total;
1004 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
1005 * regions.
1006 * See also: tcg_code_size()
1008 size_t tcg_code_capacity(void)
1010 size_t guard_size, capacity;
1012 /* no need for synchronization; these variables are set at init time */
1013 guard_size = region.stride - region.size;
1014 capacity = region.end + guard_size - region.start;
1015 capacity -= region.n * (guard_size + TCG_HIGHWATER);
1016 return capacity;
1019 size_t tcg_tb_phys_invalidate_count(void)
1021 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
1022 unsigned int i;
1023 size_t total = 0;
1025 for (i = 0; i < n_ctxs; i++) {
1026 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
1028 total += qatomic_read(&s->tb_phys_invalidate_count);
1030 return total;
1033 /* pool based memory allocation */
1034 void *tcg_malloc_internal(TCGContext *s, int size)
1036 TCGPool *p;
1037 int pool_size;
1039 if (size > TCG_POOL_CHUNK_SIZE) {
1040 /* big malloc: insert a new pool (XXX: could optimize) */
1041 p = g_malloc(sizeof(TCGPool) + size);
1042 p->size = size;
1043 p->next = s->pool_first_large;
1044 s->pool_first_large = p;
1045 return p->data;
1046 } else {
1047 p = s->pool_current;
1048 if (!p) {
1049 p = s->pool_first;
1050 if (!p)
1051 goto new_pool;
1052 } else {
1053 if (!p->next) {
1054 new_pool:
1055 pool_size = TCG_POOL_CHUNK_SIZE;
1056 p = g_malloc(sizeof(TCGPool) + pool_size);
1057 p->size = pool_size;
1058 p->next = NULL;
1059 if (s->pool_current)
1060 s->pool_current->next = p;
1061 else
1062 s->pool_first = p;
1063 } else {
1064 p = p->next;
1068 s->pool_current = p;
1069 s->pool_cur = p->data + size;
1070 s->pool_end = p->data + p->size;
1071 return p->data;
1074 void tcg_pool_reset(TCGContext *s)
1076 TCGPool *p, *t;
1077 for (p = s->pool_first_large; p; p = t) {
1078 t = p->next;
1079 g_free(p);
1081 s->pool_first_large = NULL;
1082 s->pool_cur = s->pool_end = NULL;
1083 s->pool_current = NULL;
1086 typedef struct TCGHelperInfo {
1087 void *func;
1088 const char *name;
1089 unsigned flags;
1090 unsigned sizemask;
1091 } TCGHelperInfo;
1093 #include "exec/helper-proto.h"
1095 static const TCGHelperInfo all_helpers[] = {
1096 #include "exec/helper-tcg.h"
1098 static GHashTable *helper_table;
1100 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1101 static void process_op_defs(TCGContext *s);
1102 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1103 TCGReg reg, const char *name);
1105 void tcg_context_init(TCGContext *s)
1107 int op, total_args, n, i;
1108 TCGOpDef *def;
1109 TCGArgConstraint *args_ct;
1110 TCGTemp *ts;
1112 memset(s, 0, sizeof(*s));
1113 s->nb_globals = 0;
1115 /* Count total number of arguments and allocate the corresponding
1116 space */
1117 total_args = 0;
1118 for(op = 0; op < NB_OPS; op++) {
1119 def = &tcg_op_defs[op];
1120 n = def->nb_iargs + def->nb_oargs;
1121 total_args += n;
1124 args_ct = g_new0(TCGArgConstraint, total_args);
1126 for(op = 0; op < NB_OPS; op++) {
1127 def = &tcg_op_defs[op];
1128 def->args_ct = args_ct;
1129 n = def->nb_iargs + def->nb_oargs;
1130 args_ct += n;
1133 /* Register helpers. */
1134 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
1135 helper_table = g_hash_table_new(NULL, NULL);
1137 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1138 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1139 (gpointer)&all_helpers[i]);
1142 tcg_target_init(s);
1143 process_op_defs(s);
1145 /* Reverse the order of the saved registers, assuming they're all at
1146 the start of tcg_target_reg_alloc_order. */
1147 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1148 int r = tcg_target_reg_alloc_order[n];
1149 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1150 break;
1153 for (i = 0; i < n; ++i) {
1154 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1156 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1157 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1160 alloc_tcg_plugin_context(s);
1162 tcg_ctx = s;
1164 * In user-mode we simply share the init context among threads, since we
1165 * use a single region. See the documentation tcg_region_init() for the
1166 * reasoning behind this.
1167 * In softmmu we will have at most max_cpus TCG threads.
1169 #ifdef CONFIG_USER_ONLY
1170 tcg_ctxs = &tcg_ctx;
1171 n_tcg_ctxs = 1;
1172 #else
1173 MachineState *ms = MACHINE(qdev_get_machine());
1174 unsigned int max_cpus = ms->smp.max_cpus;
1175 tcg_ctxs = g_new(TCGContext *, max_cpus);
1176 #endif
1178 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1179 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1180 cpu_env = temp_tcgv_ptr(ts);
1184 * Allocate TBs right before their corresponding translated code, making
1185 * sure that TBs and code are on different cache lines.
1187 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1189 uintptr_t align = qemu_icache_linesize;
1190 TranslationBlock *tb;
1191 void *next;
1193 retry:
1194 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1195 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1197 if (unlikely(next > s->code_gen_highwater)) {
1198 if (tcg_region_alloc(s)) {
1199 return NULL;
1201 goto retry;
1203 qatomic_set(&s->code_gen_ptr, next);
1204 s->data_gen_ptr = NULL;
1205 return tb;
1208 void tcg_prologue_init(TCGContext *s)
1210 size_t prologue_size, total_size;
1211 void *buf0, *buf1;
1213 /* Put the prologue at the beginning of code_gen_buffer. */
1214 buf0 = s->code_gen_buffer;
1215 total_size = s->code_gen_buffer_size;
1216 s->code_ptr = buf0;
1217 s->code_buf = buf0;
1218 s->data_gen_ptr = NULL;
1221 * The region trees are not yet configured, but tcg_splitwx_to_rx
1222 * needs the bounds for an assert.
1224 region.start = buf0;
1225 region.end = buf0 + total_size;
1227 #ifndef CONFIG_TCG_INTERPRETER
1228 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1229 #endif
1231 /* Compute a high-water mark, at which we voluntarily flush the buffer
1232 and start over. The size here is arbitrary, significantly larger
1233 than we expect the code generation for any one opcode to require. */
1234 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1236 #ifdef TCG_TARGET_NEED_POOL_LABELS
1237 s->pool_labels = NULL;
1238 #endif
1240 qemu_thread_jit_write();
1241 /* Generate the prologue. */
1242 tcg_target_qemu_prologue(s);
1244 #ifdef TCG_TARGET_NEED_POOL_LABELS
1245 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1247 int result = tcg_out_pool_finalize(s);
1248 tcg_debug_assert(result == 0);
1250 #endif
1252 buf1 = s->code_ptr;
1253 #ifndef CONFIG_TCG_INTERPRETER
1254 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1255 tcg_ptr_byte_diff(buf1, buf0));
1256 #endif
1258 /* Deduct the prologue from the buffer. */
1259 prologue_size = tcg_current_code_size(s);
1260 s->code_gen_ptr = buf1;
1261 s->code_gen_buffer = buf1;
1262 s->code_buf = buf1;
1263 total_size -= prologue_size;
1264 s->code_gen_buffer_size = total_size;
1266 tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1268 #ifdef DEBUG_DISAS
1269 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1270 FILE *logfile = qemu_log_lock();
1271 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1272 if (s->data_gen_ptr) {
1273 size_t code_size = s->data_gen_ptr - buf0;
1274 size_t data_size = prologue_size - code_size;
1275 size_t i;
1277 log_disas(buf0, code_size);
1279 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1280 if (sizeof(tcg_target_ulong) == 8) {
1281 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1282 (uintptr_t)s->data_gen_ptr + i,
1283 *(uint64_t *)(s->data_gen_ptr + i));
1284 } else {
1285 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
1286 (uintptr_t)s->data_gen_ptr + i,
1287 *(uint32_t *)(s->data_gen_ptr + i));
1290 } else {
1291 log_disas(buf0, prologue_size);
1293 qemu_log("\n");
1294 qemu_log_flush();
1295 qemu_log_unlock(logfile);
1297 #endif
1299 /* Assert that goto_ptr is implemented completely. */
1300 if (TCG_TARGET_HAS_goto_ptr) {
1301 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1305 void tcg_func_start(TCGContext *s)
1307 tcg_pool_reset(s);
1308 s->nb_temps = s->nb_globals;
1310 /* No temps have been previously allocated for size or locality. */
1311 memset(s->free_temps, 0, sizeof(s->free_temps));
1313 /* No constant temps have been previously allocated. */
1314 for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1315 if (s->const_table[i]) {
1316 g_hash_table_remove_all(s->const_table[i]);
1320 s->nb_ops = 0;
1321 s->nb_labels = 0;
1322 s->current_frame_offset = s->frame_start;
1324 #ifdef CONFIG_DEBUG_TCG
1325 s->goto_tb_issue_mask = 0;
1326 #endif
1328 QTAILQ_INIT(&s->ops);
1329 QTAILQ_INIT(&s->free_ops);
1330 QSIMPLEQ_INIT(&s->labels);
1333 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1335 int n = s->nb_temps++;
1337 if (n >= TCG_MAX_TEMPS) {
1338 tcg_raise_tb_overflow(s);
1340 return memset(&s->temps[n], 0, sizeof(TCGTemp));
1343 static TCGTemp *tcg_global_alloc(TCGContext *s)
1345 TCGTemp *ts;
1347 tcg_debug_assert(s->nb_globals == s->nb_temps);
1348 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1349 s->nb_globals++;
1350 ts = tcg_temp_alloc(s);
1351 ts->kind = TEMP_GLOBAL;
1353 return ts;
1356 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1357 TCGReg reg, const char *name)
1359 TCGTemp *ts;
1361 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1362 tcg_abort();
1365 ts = tcg_global_alloc(s);
1366 ts->base_type = type;
1367 ts->type = type;
1368 ts->kind = TEMP_FIXED;
1369 ts->reg = reg;
1370 ts->name = name;
1371 tcg_regset_set_reg(s->reserved_regs, reg);
1373 return ts;
1376 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1378 s->frame_start = start;
1379 s->frame_end = start + size;
1380 s->frame_temp
1381 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1384 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1385 intptr_t offset, const char *name)
1387 TCGContext *s = tcg_ctx;
1388 TCGTemp *base_ts = tcgv_ptr_temp(base);
1389 TCGTemp *ts = tcg_global_alloc(s);
1390 int indirect_reg = 0, bigendian = 0;
1391 #ifdef HOST_WORDS_BIGENDIAN
1392 bigendian = 1;
1393 #endif
1395 switch (base_ts->kind) {
1396 case TEMP_FIXED:
1397 break;
1398 case TEMP_GLOBAL:
1399 /* We do not support double-indirect registers. */
1400 tcg_debug_assert(!base_ts->indirect_reg);
1401 base_ts->indirect_base = 1;
1402 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1403 ? 2 : 1);
1404 indirect_reg = 1;
1405 break;
1406 default:
1407 g_assert_not_reached();
1410 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1411 TCGTemp *ts2 = tcg_global_alloc(s);
1412 char buf[64];
1414 ts->base_type = TCG_TYPE_I64;
1415 ts->type = TCG_TYPE_I32;
1416 ts->indirect_reg = indirect_reg;
1417 ts->mem_allocated = 1;
1418 ts->mem_base = base_ts;
1419 ts->mem_offset = offset + bigendian * 4;
1420 pstrcpy(buf, sizeof(buf), name);
1421 pstrcat(buf, sizeof(buf), "_0");
1422 ts->name = strdup(buf);
1424 tcg_debug_assert(ts2 == ts + 1);
1425 ts2->base_type = TCG_TYPE_I64;
1426 ts2->type = TCG_TYPE_I32;
1427 ts2->indirect_reg = indirect_reg;
1428 ts2->mem_allocated = 1;
1429 ts2->mem_base = base_ts;
1430 ts2->mem_offset = offset + (1 - bigendian) * 4;
1431 pstrcpy(buf, sizeof(buf), name);
1432 pstrcat(buf, sizeof(buf), "_1");
1433 ts2->name = strdup(buf);
1434 } else {
1435 ts->base_type = type;
1436 ts->type = type;
1437 ts->indirect_reg = indirect_reg;
1438 ts->mem_allocated = 1;
1439 ts->mem_base = base_ts;
1440 ts->mem_offset = offset;
1441 ts->name = name;
1443 return ts;
1446 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1448 TCGContext *s = tcg_ctx;
1449 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1450 TCGTemp *ts;
1451 int idx, k;
1453 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1454 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1455 if (idx < TCG_MAX_TEMPS) {
1456 /* There is already an available temp with the right type. */
1457 clear_bit(idx, s->free_temps[k].l);
1459 ts = &s->temps[idx];
1460 ts->temp_allocated = 1;
1461 tcg_debug_assert(ts->base_type == type);
1462 tcg_debug_assert(ts->kind == kind);
1463 } else {
1464 ts = tcg_temp_alloc(s);
1465 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1466 TCGTemp *ts2 = tcg_temp_alloc(s);
1468 ts->base_type = type;
1469 ts->type = TCG_TYPE_I32;
1470 ts->temp_allocated = 1;
1471 ts->kind = kind;
1473 tcg_debug_assert(ts2 == ts + 1);
1474 ts2->base_type = TCG_TYPE_I64;
1475 ts2->type = TCG_TYPE_I32;
1476 ts2->temp_allocated = 1;
1477 ts2->kind = kind;
1478 } else {
1479 ts->base_type = type;
1480 ts->type = type;
1481 ts->temp_allocated = 1;
1482 ts->kind = kind;
1486 #if defined(CONFIG_DEBUG_TCG)
1487 s->temps_in_use++;
1488 #endif
1489 return ts;
1492 TCGv_vec tcg_temp_new_vec(TCGType type)
1494 TCGTemp *t;
1496 #ifdef CONFIG_DEBUG_TCG
1497 switch (type) {
1498 case TCG_TYPE_V64:
1499 assert(TCG_TARGET_HAS_v64);
1500 break;
1501 case TCG_TYPE_V128:
1502 assert(TCG_TARGET_HAS_v128);
1503 break;
1504 case TCG_TYPE_V256:
1505 assert(TCG_TARGET_HAS_v256);
1506 break;
1507 default:
1508 g_assert_not_reached();
1510 #endif
1512 t = tcg_temp_new_internal(type, 0);
1513 return temp_tcgv_vec(t);
1516 /* Create a new temp of the same type as an existing temp. */
1517 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1519 TCGTemp *t = tcgv_vec_temp(match);
1521 tcg_debug_assert(t->temp_allocated != 0);
1523 t = tcg_temp_new_internal(t->base_type, 0);
1524 return temp_tcgv_vec(t);
1527 void tcg_temp_free_internal(TCGTemp *ts)
1529 TCGContext *s = tcg_ctx;
1530 int k, idx;
1532 /* In order to simplify users of tcg_constant_*, silently ignore free. */
1533 if (ts->kind == TEMP_CONST) {
1534 return;
1537 #if defined(CONFIG_DEBUG_TCG)
1538 s->temps_in_use--;
1539 if (s->temps_in_use < 0) {
1540 fprintf(stderr, "More temporaries freed than allocated!\n");
1542 #endif
1544 tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1545 tcg_debug_assert(ts->temp_allocated != 0);
1546 ts->temp_allocated = 0;
1548 idx = temp_idx(ts);
1549 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1550 set_bit(idx, s->free_temps[k].l);
1553 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1555 TCGContext *s = tcg_ctx;
1556 GHashTable *h = s->const_table[type];
1557 TCGTemp *ts;
1559 if (h == NULL) {
1560 h = g_hash_table_new(g_int64_hash, g_int64_equal);
1561 s->const_table[type] = h;
1564 ts = g_hash_table_lookup(h, &val);
1565 if (ts == NULL) {
1566 ts = tcg_temp_alloc(s);
1568 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1569 TCGTemp *ts2 = tcg_temp_alloc(s);
1571 ts->base_type = TCG_TYPE_I64;
1572 ts->type = TCG_TYPE_I32;
1573 ts->kind = TEMP_CONST;
1574 ts->temp_allocated = 1;
1576 * Retain the full value of the 64-bit constant in the low
1577 * part, so that the hash table works. Actual uses will
1578 * truncate the value to the low part.
1580 ts->val = val;
1582 tcg_debug_assert(ts2 == ts + 1);
1583 ts2->base_type = TCG_TYPE_I64;
1584 ts2->type = TCG_TYPE_I32;
1585 ts2->kind = TEMP_CONST;
1586 ts2->temp_allocated = 1;
1587 ts2->val = val >> 32;
1588 } else {
1589 ts->base_type = type;
1590 ts->type = type;
1591 ts->kind = TEMP_CONST;
1592 ts->temp_allocated = 1;
1593 ts->val = val;
1595 g_hash_table_insert(h, &ts->val, ts);
1598 return ts;
1601 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1603 val = dup_const(vece, val);
1604 return temp_tcgv_vec(tcg_constant_internal(type, val));
1607 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1609 TCGTemp *t = tcgv_vec_temp(match);
1611 tcg_debug_assert(t->temp_allocated != 0);
1612 return tcg_constant_vec(t->base_type, vece, val);
1615 TCGv_i32 tcg_const_i32(int32_t val)
1617 TCGv_i32 t0;
1618 t0 = tcg_temp_new_i32();
1619 tcg_gen_movi_i32(t0, val);
1620 return t0;
1623 TCGv_i64 tcg_const_i64(int64_t val)
1625 TCGv_i64 t0;
1626 t0 = tcg_temp_new_i64();
1627 tcg_gen_movi_i64(t0, val);
1628 return t0;
1631 TCGv_i32 tcg_const_local_i32(int32_t val)
1633 TCGv_i32 t0;
1634 t0 = tcg_temp_local_new_i32();
1635 tcg_gen_movi_i32(t0, val);
1636 return t0;
1639 TCGv_i64 tcg_const_local_i64(int64_t val)
1641 TCGv_i64 t0;
1642 t0 = tcg_temp_local_new_i64();
1643 tcg_gen_movi_i64(t0, val);
1644 return t0;
1647 #if defined(CONFIG_DEBUG_TCG)
1648 void tcg_clear_temp_count(void)
1650 TCGContext *s = tcg_ctx;
1651 s->temps_in_use = 0;
1654 int tcg_check_temp_count(void)
1656 TCGContext *s = tcg_ctx;
1657 if (s->temps_in_use) {
1658 /* Clear the count so that we don't give another
1659 * warning immediately next time around.
1661 s->temps_in_use = 0;
1662 return 1;
1664 return 0;
1666 #endif
1668 /* Return true if OP may appear in the opcode stream.
1669 Test the runtime variable that controls each opcode. */
1670 bool tcg_op_supported(TCGOpcode op)
1672 const bool have_vec
1673 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1675 switch (op) {
1676 case INDEX_op_discard:
1677 case INDEX_op_set_label:
1678 case INDEX_op_call:
1679 case INDEX_op_br:
1680 case INDEX_op_mb:
1681 case INDEX_op_insn_start:
1682 case INDEX_op_exit_tb:
1683 case INDEX_op_goto_tb:
1684 case INDEX_op_qemu_ld_i32:
1685 case INDEX_op_qemu_st_i32:
1686 case INDEX_op_qemu_ld_i64:
1687 case INDEX_op_qemu_st_i64:
1688 return true;
1690 case INDEX_op_qemu_st8_i32:
1691 return TCG_TARGET_HAS_qemu_st8_i32;
1693 case INDEX_op_goto_ptr:
1694 return TCG_TARGET_HAS_goto_ptr;
1696 case INDEX_op_mov_i32:
1697 case INDEX_op_setcond_i32:
1698 case INDEX_op_brcond_i32:
1699 case INDEX_op_ld8u_i32:
1700 case INDEX_op_ld8s_i32:
1701 case INDEX_op_ld16u_i32:
1702 case INDEX_op_ld16s_i32:
1703 case INDEX_op_ld_i32:
1704 case INDEX_op_st8_i32:
1705 case INDEX_op_st16_i32:
1706 case INDEX_op_st_i32:
1707 case INDEX_op_add_i32:
1708 case INDEX_op_sub_i32:
1709 case INDEX_op_mul_i32:
1710 case INDEX_op_and_i32:
1711 case INDEX_op_or_i32:
1712 case INDEX_op_xor_i32:
1713 case INDEX_op_shl_i32:
1714 case INDEX_op_shr_i32:
1715 case INDEX_op_sar_i32:
1716 return true;
1718 case INDEX_op_movcond_i32:
1719 return TCG_TARGET_HAS_movcond_i32;
1720 case INDEX_op_div_i32:
1721 case INDEX_op_divu_i32:
1722 return TCG_TARGET_HAS_div_i32;
1723 case INDEX_op_rem_i32:
1724 case INDEX_op_remu_i32:
1725 return TCG_TARGET_HAS_rem_i32;
1726 case INDEX_op_div2_i32:
1727 case INDEX_op_divu2_i32:
1728 return TCG_TARGET_HAS_div2_i32;
1729 case INDEX_op_rotl_i32:
1730 case INDEX_op_rotr_i32:
1731 return TCG_TARGET_HAS_rot_i32;
1732 case INDEX_op_deposit_i32:
1733 return TCG_TARGET_HAS_deposit_i32;
1734 case INDEX_op_extract_i32:
1735 return TCG_TARGET_HAS_extract_i32;
1736 case INDEX_op_sextract_i32:
1737 return TCG_TARGET_HAS_sextract_i32;
1738 case INDEX_op_extract2_i32:
1739 return TCG_TARGET_HAS_extract2_i32;
1740 case INDEX_op_add2_i32:
1741 return TCG_TARGET_HAS_add2_i32;
1742 case INDEX_op_sub2_i32:
1743 return TCG_TARGET_HAS_sub2_i32;
1744 case INDEX_op_mulu2_i32:
1745 return TCG_TARGET_HAS_mulu2_i32;
1746 case INDEX_op_muls2_i32:
1747 return TCG_TARGET_HAS_muls2_i32;
1748 case INDEX_op_muluh_i32:
1749 return TCG_TARGET_HAS_muluh_i32;
1750 case INDEX_op_mulsh_i32:
1751 return TCG_TARGET_HAS_mulsh_i32;
1752 case INDEX_op_ext8s_i32:
1753 return TCG_TARGET_HAS_ext8s_i32;
1754 case INDEX_op_ext16s_i32:
1755 return TCG_TARGET_HAS_ext16s_i32;
1756 case INDEX_op_ext8u_i32:
1757 return TCG_TARGET_HAS_ext8u_i32;
1758 case INDEX_op_ext16u_i32:
1759 return TCG_TARGET_HAS_ext16u_i32;
1760 case INDEX_op_bswap16_i32:
1761 return TCG_TARGET_HAS_bswap16_i32;
1762 case INDEX_op_bswap32_i32:
1763 return TCG_TARGET_HAS_bswap32_i32;
1764 case INDEX_op_not_i32:
1765 return TCG_TARGET_HAS_not_i32;
1766 case INDEX_op_neg_i32:
1767 return TCG_TARGET_HAS_neg_i32;
1768 case INDEX_op_andc_i32:
1769 return TCG_TARGET_HAS_andc_i32;
1770 case INDEX_op_orc_i32:
1771 return TCG_TARGET_HAS_orc_i32;
1772 case INDEX_op_eqv_i32:
1773 return TCG_TARGET_HAS_eqv_i32;
1774 case INDEX_op_nand_i32:
1775 return TCG_TARGET_HAS_nand_i32;
1776 case INDEX_op_nor_i32:
1777 return TCG_TARGET_HAS_nor_i32;
1778 case INDEX_op_clz_i32:
1779 return TCG_TARGET_HAS_clz_i32;
1780 case INDEX_op_ctz_i32:
1781 return TCG_TARGET_HAS_ctz_i32;
1782 case INDEX_op_ctpop_i32:
1783 return TCG_TARGET_HAS_ctpop_i32;
1785 case INDEX_op_brcond2_i32:
1786 case INDEX_op_setcond2_i32:
1787 return TCG_TARGET_REG_BITS == 32;
1789 case INDEX_op_mov_i64:
1790 case INDEX_op_setcond_i64:
1791 case INDEX_op_brcond_i64:
1792 case INDEX_op_ld8u_i64:
1793 case INDEX_op_ld8s_i64:
1794 case INDEX_op_ld16u_i64:
1795 case INDEX_op_ld16s_i64:
1796 case INDEX_op_ld32u_i64:
1797 case INDEX_op_ld32s_i64:
1798 case INDEX_op_ld_i64:
1799 case INDEX_op_st8_i64:
1800 case INDEX_op_st16_i64:
1801 case INDEX_op_st32_i64:
1802 case INDEX_op_st_i64:
1803 case INDEX_op_add_i64:
1804 case INDEX_op_sub_i64:
1805 case INDEX_op_mul_i64:
1806 case INDEX_op_and_i64:
1807 case INDEX_op_or_i64:
1808 case INDEX_op_xor_i64:
1809 case INDEX_op_shl_i64:
1810 case INDEX_op_shr_i64:
1811 case INDEX_op_sar_i64:
1812 case INDEX_op_ext_i32_i64:
1813 case INDEX_op_extu_i32_i64:
1814 return TCG_TARGET_REG_BITS == 64;
1816 case INDEX_op_movcond_i64:
1817 return TCG_TARGET_HAS_movcond_i64;
1818 case INDEX_op_div_i64:
1819 case INDEX_op_divu_i64:
1820 return TCG_TARGET_HAS_div_i64;
1821 case INDEX_op_rem_i64:
1822 case INDEX_op_remu_i64:
1823 return TCG_TARGET_HAS_rem_i64;
1824 case INDEX_op_div2_i64:
1825 case INDEX_op_divu2_i64:
1826 return TCG_TARGET_HAS_div2_i64;
1827 case INDEX_op_rotl_i64:
1828 case INDEX_op_rotr_i64:
1829 return TCG_TARGET_HAS_rot_i64;
1830 case INDEX_op_deposit_i64:
1831 return TCG_TARGET_HAS_deposit_i64;
1832 case INDEX_op_extract_i64:
1833 return TCG_TARGET_HAS_extract_i64;
1834 case INDEX_op_sextract_i64:
1835 return TCG_TARGET_HAS_sextract_i64;
1836 case INDEX_op_extract2_i64:
1837 return TCG_TARGET_HAS_extract2_i64;
1838 case INDEX_op_extrl_i64_i32:
1839 return TCG_TARGET_HAS_extrl_i64_i32;
1840 case INDEX_op_extrh_i64_i32:
1841 return TCG_TARGET_HAS_extrh_i64_i32;
1842 case INDEX_op_ext8s_i64:
1843 return TCG_TARGET_HAS_ext8s_i64;
1844 case INDEX_op_ext16s_i64:
1845 return TCG_TARGET_HAS_ext16s_i64;
1846 case INDEX_op_ext32s_i64:
1847 return TCG_TARGET_HAS_ext32s_i64;
1848 case INDEX_op_ext8u_i64:
1849 return TCG_TARGET_HAS_ext8u_i64;
1850 case INDEX_op_ext16u_i64:
1851 return TCG_TARGET_HAS_ext16u_i64;
1852 case INDEX_op_ext32u_i64:
1853 return TCG_TARGET_HAS_ext32u_i64;
1854 case INDEX_op_bswap16_i64:
1855 return TCG_TARGET_HAS_bswap16_i64;
1856 case INDEX_op_bswap32_i64:
1857 return TCG_TARGET_HAS_bswap32_i64;
1858 case INDEX_op_bswap64_i64:
1859 return TCG_TARGET_HAS_bswap64_i64;
1860 case INDEX_op_not_i64:
1861 return TCG_TARGET_HAS_not_i64;
1862 case INDEX_op_neg_i64:
1863 return TCG_TARGET_HAS_neg_i64;
1864 case INDEX_op_andc_i64:
1865 return TCG_TARGET_HAS_andc_i64;
1866 case INDEX_op_orc_i64:
1867 return TCG_TARGET_HAS_orc_i64;
1868 case INDEX_op_eqv_i64:
1869 return TCG_TARGET_HAS_eqv_i64;
1870 case INDEX_op_nand_i64:
1871 return TCG_TARGET_HAS_nand_i64;
1872 case INDEX_op_nor_i64:
1873 return TCG_TARGET_HAS_nor_i64;
1874 case INDEX_op_clz_i64:
1875 return TCG_TARGET_HAS_clz_i64;
1876 case INDEX_op_ctz_i64:
1877 return TCG_TARGET_HAS_ctz_i64;
1878 case INDEX_op_ctpop_i64:
1879 return TCG_TARGET_HAS_ctpop_i64;
1880 case INDEX_op_add2_i64:
1881 return TCG_TARGET_HAS_add2_i64;
1882 case INDEX_op_sub2_i64:
1883 return TCG_TARGET_HAS_sub2_i64;
1884 case INDEX_op_mulu2_i64:
1885 return TCG_TARGET_HAS_mulu2_i64;
1886 case INDEX_op_muls2_i64:
1887 return TCG_TARGET_HAS_muls2_i64;
1888 case INDEX_op_muluh_i64:
1889 return TCG_TARGET_HAS_muluh_i64;
1890 case INDEX_op_mulsh_i64:
1891 return TCG_TARGET_HAS_mulsh_i64;
1893 case INDEX_op_mov_vec:
1894 case INDEX_op_dup_vec:
1895 case INDEX_op_dupm_vec:
1896 case INDEX_op_ld_vec:
1897 case INDEX_op_st_vec:
1898 case INDEX_op_add_vec:
1899 case INDEX_op_sub_vec:
1900 case INDEX_op_and_vec:
1901 case INDEX_op_or_vec:
1902 case INDEX_op_xor_vec:
1903 case INDEX_op_cmp_vec:
1904 return have_vec;
1905 case INDEX_op_dup2_vec:
1906 return have_vec && TCG_TARGET_REG_BITS == 32;
1907 case INDEX_op_not_vec:
1908 return have_vec && TCG_TARGET_HAS_not_vec;
1909 case INDEX_op_neg_vec:
1910 return have_vec && TCG_TARGET_HAS_neg_vec;
1911 case INDEX_op_abs_vec:
1912 return have_vec && TCG_TARGET_HAS_abs_vec;
1913 case INDEX_op_andc_vec:
1914 return have_vec && TCG_TARGET_HAS_andc_vec;
1915 case INDEX_op_orc_vec:
1916 return have_vec && TCG_TARGET_HAS_orc_vec;
1917 case INDEX_op_mul_vec:
1918 return have_vec && TCG_TARGET_HAS_mul_vec;
1919 case INDEX_op_shli_vec:
1920 case INDEX_op_shri_vec:
1921 case INDEX_op_sari_vec:
1922 return have_vec && TCG_TARGET_HAS_shi_vec;
1923 case INDEX_op_shls_vec:
1924 case INDEX_op_shrs_vec:
1925 case INDEX_op_sars_vec:
1926 return have_vec && TCG_TARGET_HAS_shs_vec;
1927 case INDEX_op_shlv_vec:
1928 case INDEX_op_shrv_vec:
1929 case INDEX_op_sarv_vec:
1930 return have_vec && TCG_TARGET_HAS_shv_vec;
1931 case INDEX_op_rotli_vec:
1932 return have_vec && TCG_TARGET_HAS_roti_vec;
1933 case INDEX_op_rotls_vec:
1934 return have_vec && TCG_TARGET_HAS_rots_vec;
1935 case INDEX_op_rotlv_vec:
1936 case INDEX_op_rotrv_vec:
1937 return have_vec && TCG_TARGET_HAS_rotv_vec;
1938 case INDEX_op_ssadd_vec:
1939 case INDEX_op_usadd_vec:
1940 case INDEX_op_sssub_vec:
1941 case INDEX_op_ussub_vec:
1942 return have_vec && TCG_TARGET_HAS_sat_vec;
1943 case INDEX_op_smin_vec:
1944 case INDEX_op_umin_vec:
1945 case INDEX_op_smax_vec:
1946 case INDEX_op_umax_vec:
1947 return have_vec && TCG_TARGET_HAS_minmax_vec;
1948 case INDEX_op_bitsel_vec:
1949 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1950 case INDEX_op_cmpsel_vec:
1951 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1953 default:
1954 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1955 return true;
1959 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1960 and endian swap. Maybe it would be better to do the alignment
1961 and endian swap in tcg_reg_alloc_call(). */
1962 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1964 int i, real_args, nb_rets, pi;
1965 unsigned sizemask, flags;
1966 TCGHelperInfo *info;
1967 TCGOp *op;
1969 info = g_hash_table_lookup(helper_table, (gpointer)func);
1970 flags = info->flags;
1971 sizemask = info->sizemask;
1973 #ifdef CONFIG_PLUGIN
1974 /* detect non-plugin helpers */
1975 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1976 tcg_ctx->plugin_insn->calls_helpers = true;
1978 #endif
1980 #if defined(__sparc__) && !defined(__arch64__) \
1981 && !defined(CONFIG_TCG_INTERPRETER)
1982 /* We have 64-bit values in one register, but need to pass as two
1983 separate parameters. Split them. */
1984 int orig_sizemask = sizemask;
1985 int orig_nargs = nargs;
1986 TCGv_i64 retl, reth;
1987 TCGTemp *split_args[MAX_OPC_PARAM];
1989 retl = NULL;
1990 reth = NULL;
1991 if (sizemask != 0) {
1992 for (i = real_args = 0; i < nargs; ++i) {
1993 int is_64bit = sizemask & (1 << (i+1)*2);
1994 if (is_64bit) {
1995 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1996 TCGv_i32 h = tcg_temp_new_i32();
1997 TCGv_i32 l = tcg_temp_new_i32();
1998 tcg_gen_extr_i64_i32(l, h, orig);
1999 split_args[real_args++] = tcgv_i32_temp(h);
2000 split_args[real_args++] = tcgv_i32_temp(l);
2001 } else {
2002 split_args[real_args++] = args[i];
2005 nargs = real_args;
2006 args = split_args;
2007 sizemask = 0;
2009 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2010 for (i = 0; i < nargs; ++i) {
2011 int is_64bit = sizemask & (1 << (i+1)*2);
2012 int is_signed = sizemask & (2 << (i+1)*2);
2013 if (!is_64bit) {
2014 TCGv_i64 temp = tcg_temp_new_i64();
2015 TCGv_i64 orig = temp_tcgv_i64(args[i]);
2016 if (is_signed) {
2017 tcg_gen_ext32s_i64(temp, orig);
2018 } else {
2019 tcg_gen_ext32u_i64(temp, orig);
2021 args[i] = tcgv_i64_temp(temp);
2024 #endif /* TCG_TARGET_EXTEND_ARGS */
2026 op = tcg_emit_op(INDEX_op_call);
2028 pi = 0;
2029 if (ret != NULL) {
2030 #if defined(__sparc__) && !defined(__arch64__) \
2031 && !defined(CONFIG_TCG_INTERPRETER)
2032 if (orig_sizemask & 1) {
2033 /* The 32-bit ABI is going to return the 64-bit value in
2034 the %o0/%o1 register pair. Prepare for this by using
2035 two return temporaries, and reassemble below. */
2036 retl = tcg_temp_new_i64();
2037 reth = tcg_temp_new_i64();
2038 op->args[pi++] = tcgv_i64_arg(reth);
2039 op->args[pi++] = tcgv_i64_arg(retl);
2040 nb_rets = 2;
2041 } else {
2042 op->args[pi++] = temp_arg(ret);
2043 nb_rets = 1;
2045 #else
2046 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
2047 #ifdef HOST_WORDS_BIGENDIAN
2048 op->args[pi++] = temp_arg(ret + 1);
2049 op->args[pi++] = temp_arg(ret);
2050 #else
2051 op->args[pi++] = temp_arg(ret);
2052 op->args[pi++] = temp_arg(ret + 1);
2053 #endif
2054 nb_rets = 2;
2055 } else {
2056 op->args[pi++] = temp_arg(ret);
2057 nb_rets = 1;
2059 #endif
2060 } else {
2061 nb_rets = 0;
2063 TCGOP_CALLO(op) = nb_rets;
2065 real_args = 0;
2066 for (i = 0; i < nargs; i++) {
2067 int is_64bit = sizemask & (1 << (i+1)*2);
2068 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
2069 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
2070 /* some targets want aligned 64 bit args */
2071 if (real_args & 1) {
2072 op->args[pi++] = TCG_CALL_DUMMY_ARG;
2073 real_args++;
2075 #endif
2076 /* If stack grows up, then we will be placing successive
2077 arguments at lower addresses, which means we need to
2078 reverse the order compared to how we would normally
2079 treat either big or little-endian. For those arguments
2080 that will wind up in registers, this still works for
2081 HPPA (the only current STACK_GROWSUP target) since the
2082 argument registers are *also* allocated in decreasing
2083 order. If another such target is added, this logic may
2084 have to get more complicated to differentiate between
2085 stack arguments and register arguments. */
2086 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
2087 op->args[pi++] = temp_arg(args[i] + 1);
2088 op->args[pi++] = temp_arg(args[i]);
2089 #else
2090 op->args[pi++] = temp_arg(args[i]);
2091 op->args[pi++] = temp_arg(args[i] + 1);
2092 #endif
2093 real_args += 2;
2094 continue;
2097 op->args[pi++] = temp_arg(args[i]);
2098 real_args++;
2100 op->args[pi++] = (uintptr_t)func;
2101 op->args[pi++] = flags;
2102 TCGOP_CALLI(op) = real_args;
2104 /* Make sure the fields didn't overflow. */
2105 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
2106 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
2108 #if defined(__sparc__) && !defined(__arch64__) \
2109 && !defined(CONFIG_TCG_INTERPRETER)
2110 /* Free all of the parts we allocated above. */
2111 for (i = real_args = 0; i < orig_nargs; ++i) {
2112 int is_64bit = orig_sizemask & (1 << (i+1)*2);
2113 if (is_64bit) {
2114 tcg_temp_free_internal(args[real_args++]);
2115 tcg_temp_free_internal(args[real_args++]);
2116 } else {
2117 real_args++;
2120 if (orig_sizemask & 1) {
2121 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
2122 Note that describing these as TCGv_i64 eliminates an unnecessary
2123 zero-extension that tcg_gen_concat_i32_i64 would create. */
2124 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
2125 tcg_temp_free_i64(retl);
2126 tcg_temp_free_i64(reth);
2128 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2129 for (i = 0; i < nargs; ++i) {
2130 int is_64bit = sizemask & (1 << (i+1)*2);
2131 if (!is_64bit) {
2132 tcg_temp_free_internal(args[i]);
2135 #endif /* TCG_TARGET_EXTEND_ARGS */
2138 static void tcg_reg_alloc_start(TCGContext *s)
2140 int i, n;
2142 for (i = 0, n = s->nb_temps; i < n; i++) {
2143 TCGTemp *ts = &s->temps[i];
2144 TCGTempVal val = TEMP_VAL_MEM;
2146 switch (ts->kind) {
2147 case TEMP_CONST:
2148 val = TEMP_VAL_CONST;
2149 break;
2150 case TEMP_FIXED:
2151 val = TEMP_VAL_REG;
2152 break;
2153 case TEMP_GLOBAL:
2154 break;
2155 case TEMP_NORMAL:
2156 val = TEMP_VAL_DEAD;
2157 /* fall through */
2158 case TEMP_LOCAL:
2159 ts->mem_allocated = 0;
2160 break;
2161 default:
2162 g_assert_not_reached();
2164 ts->val_type = val;
2167 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2170 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2171 TCGTemp *ts)
2173 int idx = temp_idx(ts);
2175 switch (ts->kind) {
2176 case TEMP_FIXED:
2177 case TEMP_GLOBAL:
2178 pstrcpy(buf, buf_size, ts->name);
2179 break;
2180 case TEMP_LOCAL:
2181 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2182 break;
2183 case TEMP_NORMAL:
2184 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2185 break;
2186 case TEMP_CONST:
2187 switch (ts->type) {
2188 case TCG_TYPE_I32:
2189 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2190 break;
2191 #if TCG_TARGET_REG_BITS > 32
2192 case TCG_TYPE_I64:
2193 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2194 break;
2195 #endif
2196 case TCG_TYPE_V64:
2197 case TCG_TYPE_V128:
2198 case TCG_TYPE_V256:
2199 snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2200 64 << (ts->type - TCG_TYPE_V64), ts->val);
2201 break;
2202 default:
2203 g_assert_not_reached();
2205 break;
2207 return buf;
2210 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2211 int buf_size, TCGArg arg)
2213 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2216 /* Find helper name. */
2217 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2219 const char *ret = NULL;
2220 if (helper_table) {
2221 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2222 if (info) {
2223 ret = info->name;
2226 return ret;
2229 static const char * const cond_name[] =
2231 [TCG_COND_NEVER] = "never",
2232 [TCG_COND_ALWAYS] = "always",
2233 [TCG_COND_EQ] = "eq",
2234 [TCG_COND_NE] = "ne",
2235 [TCG_COND_LT] = "lt",
2236 [TCG_COND_GE] = "ge",
2237 [TCG_COND_LE] = "le",
2238 [TCG_COND_GT] = "gt",
2239 [TCG_COND_LTU] = "ltu",
2240 [TCG_COND_GEU] = "geu",
2241 [TCG_COND_LEU] = "leu",
2242 [TCG_COND_GTU] = "gtu"
2245 static const char * const ldst_name[] =
2247 [MO_UB] = "ub",
2248 [MO_SB] = "sb",
2249 [MO_LEUW] = "leuw",
2250 [MO_LESW] = "lesw",
2251 [MO_LEUL] = "leul",
2252 [MO_LESL] = "lesl",
2253 [MO_LEQ] = "leq",
2254 [MO_BEUW] = "beuw",
2255 [MO_BESW] = "besw",
2256 [MO_BEUL] = "beul",
2257 [MO_BESL] = "besl",
2258 [MO_BEQ] = "beq",
2261 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2262 #ifdef TARGET_ALIGNED_ONLY
2263 [MO_UNALN >> MO_ASHIFT] = "un+",
2264 [MO_ALIGN >> MO_ASHIFT] = "",
2265 #else
2266 [MO_UNALN >> MO_ASHIFT] = "",
2267 [MO_ALIGN >> MO_ASHIFT] = "al+",
2268 #endif
2269 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
2270 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
2271 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
2272 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2273 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2274 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2277 static inline bool tcg_regset_single(TCGRegSet d)
2279 return (d & (d - 1)) == 0;
2282 static inline TCGReg tcg_regset_first(TCGRegSet d)
2284 if (TCG_TARGET_NB_REGS <= 32) {
2285 return ctz32(d);
2286 } else {
2287 return ctz64(d);
2291 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2293 char buf[128];
2294 TCGOp *op;
2296 QTAILQ_FOREACH(op, &s->ops, link) {
2297 int i, k, nb_oargs, nb_iargs, nb_cargs;
2298 const TCGOpDef *def;
2299 TCGOpcode c;
2300 int col = 0;
2302 c = op->opc;
2303 def = &tcg_op_defs[c];
2305 if (c == INDEX_op_insn_start) {
2306 nb_oargs = 0;
2307 col += qemu_log("\n ----");
2309 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2310 target_ulong a;
2311 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2312 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2313 #else
2314 a = op->args[i];
2315 #endif
2316 col += qemu_log(" " TARGET_FMT_lx, a);
2318 } else if (c == INDEX_op_call) {
2319 /* variable number of arguments */
2320 nb_oargs = TCGOP_CALLO(op);
2321 nb_iargs = TCGOP_CALLI(op);
2322 nb_cargs = def->nb_cargs;
2324 /* function name, flags, out args */
2325 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2326 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2327 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2328 for (i = 0; i < nb_oargs; i++) {
2329 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2330 op->args[i]));
2332 for (i = 0; i < nb_iargs; i++) {
2333 TCGArg arg = op->args[nb_oargs + i];
2334 const char *t = "<dummy>";
2335 if (arg != TCG_CALL_DUMMY_ARG) {
2336 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2338 col += qemu_log(",%s", t);
2340 } else {
2341 col += qemu_log(" %s ", def->name);
2343 nb_oargs = def->nb_oargs;
2344 nb_iargs = def->nb_iargs;
2345 nb_cargs = def->nb_cargs;
2347 if (def->flags & TCG_OPF_VECTOR) {
2348 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2349 8 << TCGOP_VECE(op));
2352 k = 0;
2353 for (i = 0; i < nb_oargs; i++) {
2354 if (k != 0) {
2355 col += qemu_log(",");
2357 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2358 op->args[k++]));
2360 for (i = 0; i < nb_iargs; i++) {
2361 if (k != 0) {
2362 col += qemu_log(",");
2364 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2365 op->args[k++]));
2367 switch (c) {
2368 case INDEX_op_brcond_i32:
2369 case INDEX_op_setcond_i32:
2370 case INDEX_op_movcond_i32:
2371 case INDEX_op_brcond2_i32:
2372 case INDEX_op_setcond2_i32:
2373 case INDEX_op_brcond_i64:
2374 case INDEX_op_setcond_i64:
2375 case INDEX_op_movcond_i64:
2376 case INDEX_op_cmp_vec:
2377 case INDEX_op_cmpsel_vec:
2378 if (op->args[k] < ARRAY_SIZE(cond_name)
2379 && cond_name[op->args[k]]) {
2380 col += qemu_log(",%s", cond_name[op->args[k++]]);
2381 } else {
2382 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2384 i = 1;
2385 break;
2386 case INDEX_op_qemu_ld_i32:
2387 case INDEX_op_qemu_st_i32:
2388 case INDEX_op_qemu_st8_i32:
2389 case INDEX_op_qemu_ld_i64:
2390 case INDEX_op_qemu_st_i64:
2392 TCGMemOpIdx oi = op->args[k++];
2393 MemOp op = get_memop(oi);
2394 unsigned ix = get_mmuidx(oi);
2396 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2397 col += qemu_log(",$0x%x,%u", op, ix);
2398 } else {
2399 const char *s_al, *s_op;
2400 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2401 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2402 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2404 i = 1;
2406 break;
2407 default:
2408 i = 0;
2409 break;
2411 switch (c) {
2412 case INDEX_op_set_label:
2413 case INDEX_op_br:
2414 case INDEX_op_brcond_i32:
2415 case INDEX_op_brcond_i64:
2416 case INDEX_op_brcond2_i32:
2417 col += qemu_log("%s$L%d", k ? "," : "",
2418 arg_label(op->args[k])->id);
2419 i++, k++;
2420 break;
2421 default:
2422 break;
2424 for (; i < nb_cargs; i++, k++) {
2425 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2429 if (have_prefs || op->life) {
2431 QemuLogFile *logfile;
2433 rcu_read_lock();
2434 logfile = qatomic_rcu_read(&qemu_logfile);
2435 if (logfile) {
2436 for (; col < 40; ++col) {
2437 putc(' ', logfile->fd);
2440 rcu_read_unlock();
2443 if (op->life) {
2444 unsigned life = op->life;
2446 if (life & (SYNC_ARG * 3)) {
2447 qemu_log(" sync:");
2448 for (i = 0; i < 2; ++i) {
2449 if (life & (SYNC_ARG << i)) {
2450 qemu_log(" %d", i);
2454 life /= DEAD_ARG;
2455 if (life) {
2456 qemu_log(" dead:");
2457 for (i = 0; life; ++i, life >>= 1) {
2458 if (life & 1) {
2459 qemu_log(" %d", i);
2465 if (have_prefs) {
2466 for (i = 0; i < nb_oargs; ++i) {
2467 TCGRegSet set = op->output_pref[i];
2469 if (i == 0) {
2470 qemu_log(" pref=");
2471 } else {
2472 qemu_log(",");
2474 if (set == 0) {
2475 qemu_log("none");
2476 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2477 qemu_log("all");
2478 #ifdef CONFIG_DEBUG_TCG
2479 } else if (tcg_regset_single(set)) {
2480 TCGReg reg = tcg_regset_first(set);
2481 qemu_log("%s", tcg_target_reg_names[reg]);
2482 #endif
2483 } else if (TCG_TARGET_NB_REGS <= 32) {
2484 qemu_log("%#x", (uint32_t)set);
2485 } else {
2486 qemu_log("%#" PRIx64, (uint64_t)set);
2491 qemu_log("\n");
2495 /* we give more priority to constraints with less registers */
2496 static int get_constraint_priority(const TCGOpDef *def, int k)
2498 const TCGArgConstraint *arg_ct = &def->args_ct[k];
2499 int n;
2501 if (arg_ct->oalias) {
2502 /* an alias is equivalent to a single register */
2503 n = 1;
2504 } else {
2505 n = ctpop64(arg_ct->regs);
2507 return TCG_TARGET_NB_REGS - n + 1;
2510 /* sort from highest priority to lowest */
2511 static void sort_constraints(TCGOpDef *def, int start, int n)
2513 int i, j;
2514 TCGArgConstraint *a = def->args_ct;
2516 for (i = 0; i < n; i++) {
2517 a[start + i].sort_index = start + i;
2519 if (n <= 1) {
2520 return;
2522 for (i = 0; i < n - 1; i++) {
2523 for (j = i + 1; j < n; j++) {
2524 int p1 = get_constraint_priority(def, a[start + i].sort_index);
2525 int p2 = get_constraint_priority(def, a[start + j].sort_index);
2526 if (p1 < p2) {
2527 int tmp = a[start + i].sort_index;
2528 a[start + i].sort_index = a[start + j].sort_index;
2529 a[start + j].sort_index = tmp;
2535 static void process_op_defs(TCGContext *s)
2537 TCGOpcode op;
2539 for (op = 0; op < NB_OPS; op++) {
2540 TCGOpDef *def = &tcg_op_defs[op];
2541 const TCGTargetOpDef *tdefs;
2542 int i, nb_args;
2544 if (def->flags & TCG_OPF_NOT_PRESENT) {
2545 continue;
2548 nb_args = def->nb_iargs + def->nb_oargs;
2549 if (nb_args == 0) {
2550 continue;
2554 * Macro magic should make it impossible, but double-check that
2555 * the array index is in range. Since the signness of an enum
2556 * is implementation defined, force the result to unsigned.
2558 unsigned con_set = tcg_target_op_def(op);
2559 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2560 tdefs = &constraint_sets[con_set];
2562 for (i = 0; i < nb_args; i++) {
2563 const char *ct_str = tdefs->args_ct_str[i];
2564 /* Incomplete TCGTargetOpDef entry. */
2565 tcg_debug_assert(ct_str != NULL);
2567 while (*ct_str != '\0') {
2568 switch(*ct_str) {
2569 case '0' ... '9':
2571 int oarg = *ct_str - '0';
2572 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2573 tcg_debug_assert(oarg < def->nb_oargs);
2574 tcg_debug_assert(def->args_ct[oarg].regs != 0);
2575 def->args_ct[i] = def->args_ct[oarg];
2576 /* The output sets oalias. */
2577 def->args_ct[oarg].oalias = true;
2578 def->args_ct[oarg].alias_index = i;
2579 /* The input sets ialias. */
2580 def->args_ct[i].ialias = true;
2581 def->args_ct[i].alias_index = oarg;
2583 ct_str++;
2584 break;
2585 case '&':
2586 def->args_ct[i].newreg = true;
2587 ct_str++;
2588 break;
2589 case 'i':
2590 def->args_ct[i].ct |= TCG_CT_CONST;
2591 ct_str++;
2592 break;
2594 /* Include all of the target-specific constraints. */
2596 #undef CONST
2597 #define CONST(CASE, MASK) \
2598 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2599 #define REGS(CASE, MASK) \
2600 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2602 #include "tcg-target-con-str.h"
2604 #undef REGS
2605 #undef CONST
2606 default:
2607 /* Typo in TCGTargetOpDef constraint. */
2608 g_assert_not_reached();
2613 /* TCGTargetOpDef entry with too much information? */
2614 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2616 /* sort the constraints (XXX: this is just an heuristic) */
2617 sort_constraints(def, 0, def->nb_oargs);
2618 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2622 void tcg_op_remove(TCGContext *s, TCGOp *op)
2624 TCGLabel *label;
2626 switch (op->opc) {
2627 case INDEX_op_br:
2628 label = arg_label(op->args[0]);
2629 label->refs--;
2630 break;
2631 case INDEX_op_brcond_i32:
2632 case INDEX_op_brcond_i64:
2633 label = arg_label(op->args[3]);
2634 label->refs--;
2635 break;
2636 case INDEX_op_brcond2_i32:
2637 label = arg_label(op->args[5]);
2638 label->refs--;
2639 break;
2640 default:
2641 break;
2644 QTAILQ_REMOVE(&s->ops, op, link);
2645 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2646 s->nb_ops--;
2648 #ifdef CONFIG_PROFILER
2649 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2650 #endif
2653 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2655 TCGContext *s = tcg_ctx;
2656 TCGOp *op;
2658 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2659 op = tcg_malloc(sizeof(TCGOp));
2660 } else {
2661 op = QTAILQ_FIRST(&s->free_ops);
2662 QTAILQ_REMOVE(&s->free_ops, op, link);
2664 memset(op, 0, offsetof(TCGOp, link));
2665 op->opc = opc;
2666 s->nb_ops++;
2668 return op;
2671 TCGOp *tcg_emit_op(TCGOpcode opc)
2673 TCGOp *op = tcg_op_alloc(opc);
2674 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2675 return op;
2678 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2680 TCGOp *new_op = tcg_op_alloc(opc);
2681 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2682 return new_op;
2685 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2687 TCGOp *new_op = tcg_op_alloc(opc);
2688 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2689 return new_op;
2692 /* Reachable analysis : remove unreachable code. */
2693 static void reachable_code_pass(TCGContext *s)
2695 TCGOp *op, *op_next;
2696 bool dead = false;
2698 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2699 bool remove = dead;
2700 TCGLabel *label;
2701 int call_flags;
2703 switch (op->opc) {
2704 case INDEX_op_set_label:
2705 label = arg_label(op->args[0]);
2706 if (label->refs == 0) {
2708 * While there is an occasional backward branch, virtually
2709 * all branches generated by the translators are forward.
2710 * Which means that generally we will have already removed
2711 * all references to the label that will be, and there is
2712 * little to be gained by iterating.
2714 remove = true;
2715 } else {
2716 /* Once we see a label, insns become live again. */
2717 dead = false;
2718 remove = false;
2721 * Optimization can fold conditional branches to unconditional.
2722 * If we find a label with one reference which is preceded by
2723 * an unconditional branch to it, remove both. This needed to
2724 * wait until the dead code in between them was removed.
2726 if (label->refs == 1) {
2727 TCGOp *op_prev = QTAILQ_PREV(op, link);
2728 if (op_prev->opc == INDEX_op_br &&
2729 label == arg_label(op_prev->args[0])) {
2730 tcg_op_remove(s, op_prev);
2731 remove = true;
2735 break;
2737 case INDEX_op_br:
2738 case INDEX_op_exit_tb:
2739 case INDEX_op_goto_ptr:
2740 /* Unconditional branches; everything following is dead. */
2741 dead = true;
2742 break;
2744 case INDEX_op_call:
2745 /* Notice noreturn helper calls, raising exceptions. */
2746 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2747 if (call_flags & TCG_CALL_NO_RETURN) {
2748 dead = true;
2750 break;
2752 case INDEX_op_insn_start:
2753 /* Never remove -- we need to keep these for unwind. */
2754 remove = false;
2755 break;
2757 default:
2758 break;
2761 if (remove) {
2762 tcg_op_remove(s, op);
2767 #define TS_DEAD 1
2768 #define TS_MEM 2
2770 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2771 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2773 /* For liveness_pass_1, the register preferences for a given temp. */
2774 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2776 return ts->state_ptr;
2779 /* For liveness_pass_1, reset the preferences for a given temp to the
2780 * maximal regset for its type.
2782 static inline void la_reset_pref(TCGTemp *ts)
2784 *la_temp_pref(ts)
2785 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2788 /* liveness analysis: end of function: all temps are dead, and globals
2789 should be in memory. */
2790 static void la_func_end(TCGContext *s, int ng, int nt)
2792 int i;
2794 for (i = 0; i < ng; ++i) {
2795 s->temps[i].state = TS_DEAD | TS_MEM;
2796 la_reset_pref(&s->temps[i]);
2798 for (i = ng; i < nt; ++i) {
2799 s->temps[i].state = TS_DEAD;
2800 la_reset_pref(&s->temps[i]);
2804 /* liveness analysis: end of basic block: all temps are dead, globals
2805 and local temps should be in memory. */
2806 static void la_bb_end(TCGContext *s, int ng, int nt)
2808 int i;
2810 for (i = 0; i < nt; ++i) {
2811 TCGTemp *ts = &s->temps[i];
2812 int state;
2814 switch (ts->kind) {
2815 case TEMP_FIXED:
2816 case TEMP_GLOBAL:
2817 case TEMP_LOCAL:
2818 state = TS_DEAD | TS_MEM;
2819 break;
2820 case TEMP_NORMAL:
2821 case TEMP_CONST:
2822 state = TS_DEAD;
2823 break;
2824 default:
2825 g_assert_not_reached();
2827 ts->state = state;
2828 la_reset_pref(ts);
2832 /* liveness analysis: sync globals back to memory. */
2833 static void la_global_sync(TCGContext *s, int ng)
2835 int i;
2837 for (i = 0; i < ng; ++i) {
2838 int state = s->temps[i].state;
2839 s->temps[i].state = state | TS_MEM;
2840 if (state == TS_DEAD) {
2841 /* If the global was previously dead, reset prefs. */
2842 la_reset_pref(&s->temps[i]);
2848 * liveness analysis: conditional branch: all temps are dead,
2849 * globals and local temps should be synced.
2851 static void la_bb_sync(TCGContext *s, int ng, int nt)
2853 la_global_sync(s, ng);
2855 for (int i = ng; i < nt; ++i) {
2856 TCGTemp *ts = &s->temps[i];
2857 int state;
2859 switch (ts->kind) {
2860 case TEMP_LOCAL:
2861 state = ts->state;
2862 ts->state = state | TS_MEM;
2863 if (state != TS_DEAD) {
2864 continue;
2866 break;
2867 case TEMP_NORMAL:
2868 s->temps[i].state = TS_DEAD;
2869 break;
2870 case TEMP_CONST:
2871 continue;
2872 default:
2873 g_assert_not_reached();
2875 la_reset_pref(&s->temps[i]);
2879 /* liveness analysis: sync globals back to memory and kill. */
2880 static void la_global_kill(TCGContext *s, int ng)
2882 int i;
2884 for (i = 0; i < ng; i++) {
2885 s->temps[i].state = TS_DEAD | TS_MEM;
2886 la_reset_pref(&s->temps[i]);
2890 /* liveness analysis: note live globals crossing calls. */
2891 static void la_cross_call(TCGContext *s, int nt)
2893 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2894 int i;
2896 for (i = 0; i < nt; i++) {
2897 TCGTemp *ts = &s->temps[i];
2898 if (!(ts->state & TS_DEAD)) {
2899 TCGRegSet *pset = la_temp_pref(ts);
2900 TCGRegSet set = *pset;
2902 set &= mask;
2903 /* If the combination is not possible, restart. */
2904 if (set == 0) {
2905 set = tcg_target_available_regs[ts->type] & mask;
2907 *pset = set;
2912 /* Liveness analysis : update the opc_arg_life array to tell if a
2913 given input arguments is dead. Instructions updating dead
2914 temporaries are removed. */
2915 static void liveness_pass_1(TCGContext *s)
2917 int nb_globals = s->nb_globals;
2918 int nb_temps = s->nb_temps;
2919 TCGOp *op, *op_prev;
2920 TCGRegSet *prefs;
2921 int i;
2923 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2924 for (i = 0; i < nb_temps; ++i) {
2925 s->temps[i].state_ptr = prefs + i;
2928 /* ??? Should be redundant with the exit_tb that ends the TB. */
2929 la_func_end(s, nb_globals, nb_temps);
2931 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2932 int nb_iargs, nb_oargs;
2933 TCGOpcode opc_new, opc_new2;
2934 bool have_opc_new2;
2935 TCGLifeData arg_life = 0;
2936 TCGTemp *ts;
2937 TCGOpcode opc = op->opc;
2938 const TCGOpDef *def = &tcg_op_defs[opc];
2940 switch (opc) {
2941 case INDEX_op_call:
2943 int call_flags;
2944 int nb_call_regs;
2946 nb_oargs = TCGOP_CALLO(op);
2947 nb_iargs = TCGOP_CALLI(op);
2948 call_flags = op->args[nb_oargs + nb_iargs + 1];
2950 /* pure functions can be removed if their result is unused */
2951 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2952 for (i = 0; i < nb_oargs; i++) {
2953 ts = arg_temp(op->args[i]);
2954 if (ts->state != TS_DEAD) {
2955 goto do_not_remove_call;
2958 goto do_remove;
2960 do_not_remove_call:
2962 /* Output args are dead. */
2963 for (i = 0; i < nb_oargs; i++) {
2964 ts = arg_temp(op->args[i]);
2965 if (ts->state & TS_DEAD) {
2966 arg_life |= DEAD_ARG << i;
2968 if (ts->state & TS_MEM) {
2969 arg_life |= SYNC_ARG << i;
2971 ts->state = TS_DEAD;
2972 la_reset_pref(ts);
2974 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2975 op->output_pref[i] = 0;
2978 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2979 TCG_CALL_NO_READ_GLOBALS))) {
2980 la_global_kill(s, nb_globals);
2981 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2982 la_global_sync(s, nb_globals);
2985 /* Record arguments that die in this helper. */
2986 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2987 ts = arg_temp(op->args[i]);
2988 if (ts && ts->state & TS_DEAD) {
2989 arg_life |= DEAD_ARG << i;
2993 /* For all live registers, remove call-clobbered prefs. */
2994 la_cross_call(s, nb_temps);
2996 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2998 /* Input arguments are live for preceding opcodes. */
2999 for (i = 0; i < nb_iargs; i++) {
3000 ts = arg_temp(op->args[i + nb_oargs]);
3001 if (ts && ts->state & TS_DEAD) {
3002 /* For those arguments that die, and will be allocated
3003 * in registers, clear the register set for that arg,
3004 * to be filled in below. For args that will be on
3005 * the stack, reset to any available reg.
3007 *la_temp_pref(ts)
3008 = (i < nb_call_regs ? 0 :
3009 tcg_target_available_regs[ts->type]);
3010 ts->state &= ~TS_DEAD;
3014 /* For each input argument, add its input register to prefs.
3015 If a temp is used once, this produces a single set bit. */
3016 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
3017 ts = arg_temp(op->args[i + nb_oargs]);
3018 if (ts) {
3019 tcg_regset_set_reg(*la_temp_pref(ts),
3020 tcg_target_call_iarg_regs[i]);
3024 break;
3025 case INDEX_op_insn_start:
3026 break;
3027 case INDEX_op_discard:
3028 /* mark the temporary as dead */
3029 ts = arg_temp(op->args[0]);
3030 ts->state = TS_DEAD;
3031 la_reset_pref(ts);
3032 break;
3034 case INDEX_op_add2_i32:
3035 opc_new = INDEX_op_add_i32;
3036 goto do_addsub2;
3037 case INDEX_op_sub2_i32:
3038 opc_new = INDEX_op_sub_i32;
3039 goto do_addsub2;
3040 case INDEX_op_add2_i64:
3041 opc_new = INDEX_op_add_i64;
3042 goto do_addsub2;
3043 case INDEX_op_sub2_i64:
3044 opc_new = INDEX_op_sub_i64;
3045 do_addsub2:
3046 nb_iargs = 4;
3047 nb_oargs = 2;
3048 /* Test if the high part of the operation is dead, but not
3049 the low part. The result can be optimized to a simple
3050 add or sub. This happens often for x86_64 guest when the
3051 cpu mode is set to 32 bit. */
3052 if (arg_temp(op->args[1])->state == TS_DEAD) {
3053 if (arg_temp(op->args[0])->state == TS_DEAD) {
3054 goto do_remove;
3056 /* Replace the opcode and adjust the args in place,
3057 leaving 3 unused args at the end. */
3058 op->opc = opc = opc_new;
3059 op->args[1] = op->args[2];
3060 op->args[2] = op->args[4];
3061 /* Fall through and mark the single-word operation live. */
3062 nb_iargs = 2;
3063 nb_oargs = 1;
3065 goto do_not_remove;
3067 case INDEX_op_mulu2_i32:
3068 opc_new = INDEX_op_mul_i32;
3069 opc_new2 = INDEX_op_muluh_i32;
3070 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3071 goto do_mul2;
3072 case INDEX_op_muls2_i32:
3073 opc_new = INDEX_op_mul_i32;
3074 opc_new2 = INDEX_op_mulsh_i32;
3075 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3076 goto do_mul2;
3077 case INDEX_op_mulu2_i64:
3078 opc_new = INDEX_op_mul_i64;
3079 opc_new2 = INDEX_op_muluh_i64;
3080 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3081 goto do_mul2;
3082 case INDEX_op_muls2_i64:
3083 opc_new = INDEX_op_mul_i64;
3084 opc_new2 = INDEX_op_mulsh_i64;
3085 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3086 goto do_mul2;
3087 do_mul2:
3088 nb_iargs = 2;
3089 nb_oargs = 2;
3090 if (arg_temp(op->args[1])->state == TS_DEAD) {
3091 if (arg_temp(op->args[0])->state == TS_DEAD) {
3092 /* Both parts of the operation are dead. */
3093 goto do_remove;
3095 /* The high part of the operation is dead; generate the low. */
3096 op->opc = opc = opc_new;
3097 op->args[1] = op->args[2];
3098 op->args[2] = op->args[3];
3099 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3100 /* The low part of the operation is dead; generate the high. */
3101 op->opc = opc = opc_new2;
3102 op->args[0] = op->args[1];
3103 op->args[1] = op->args[2];
3104 op->args[2] = op->args[3];
3105 } else {
3106 goto do_not_remove;
3108 /* Mark the single-word operation live. */
3109 nb_oargs = 1;
3110 goto do_not_remove;
3112 default:
3113 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3114 nb_iargs = def->nb_iargs;
3115 nb_oargs = def->nb_oargs;
3117 /* Test if the operation can be removed because all
3118 its outputs are dead. We assume that nb_oargs == 0
3119 implies side effects */
3120 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3121 for (i = 0; i < nb_oargs; i++) {
3122 if (arg_temp(op->args[i])->state != TS_DEAD) {
3123 goto do_not_remove;
3126 goto do_remove;
3128 goto do_not_remove;
3130 do_remove:
3131 tcg_op_remove(s, op);
3132 break;
3134 do_not_remove:
3135 for (i = 0; i < nb_oargs; i++) {
3136 ts = arg_temp(op->args[i]);
3138 /* Remember the preference of the uses that followed. */
3139 op->output_pref[i] = *la_temp_pref(ts);
3141 /* Output args are dead. */
3142 if (ts->state & TS_DEAD) {
3143 arg_life |= DEAD_ARG << i;
3145 if (ts->state & TS_MEM) {
3146 arg_life |= SYNC_ARG << i;
3148 ts->state = TS_DEAD;
3149 la_reset_pref(ts);
3152 /* If end of basic block, update. */
3153 if (def->flags & TCG_OPF_BB_EXIT) {
3154 la_func_end(s, nb_globals, nb_temps);
3155 } else if (def->flags & TCG_OPF_COND_BRANCH) {
3156 la_bb_sync(s, nb_globals, nb_temps);
3157 } else if (def->flags & TCG_OPF_BB_END) {
3158 la_bb_end(s, nb_globals, nb_temps);
3159 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3160 la_global_sync(s, nb_globals);
3161 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3162 la_cross_call(s, nb_temps);
3166 /* Record arguments that die in this opcode. */
3167 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3168 ts = arg_temp(op->args[i]);
3169 if (ts->state & TS_DEAD) {
3170 arg_life |= DEAD_ARG << i;
3174 /* Input arguments are live for preceding opcodes. */
3175 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3176 ts = arg_temp(op->args[i]);
3177 if (ts->state & TS_DEAD) {
3178 /* For operands that were dead, initially allow
3179 all regs for the type. */
3180 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3181 ts->state &= ~TS_DEAD;
3185 /* Incorporate constraints for this operand. */
3186 switch (opc) {
3187 case INDEX_op_mov_i32:
3188 case INDEX_op_mov_i64:
3189 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3190 have proper constraints. That said, special case
3191 moves to propagate preferences backward. */
3192 if (IS_DEAD_ARG(1)) {
3193 *la_temp_pref(arg_temp(op->args[0]))
3194 = *la_temp_pref(arg_temp(op->args[1]));
3196 break;
3198 default:
3199 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3200 const TCGArgConstraint *ct = &def->args_ct[i];
3201 TCGRegSet set, *pset;
3203 ts = arg_temp(op->args[i]);
3204 pset = la_temp_pref(ts);
3205 set = *pset;
3207 set &= ct->regs;
3208 if (ct->ialias) {
3209 set &= op->output_pref[ct->alias_index];
3211 /* If the combination is not possible, restart. */
3212 if (set == 0) {
3213 set = ct->regs;
3215 *pset = set;
3217 break;
3219 break;
3221 op->life = arg_life;
3225 /* Liveness analysis: Convert indirect regs to direct temporaries. */
3226 static bool liveness_pass_2(TCGContext *s)
3228 int nb_globals = s->nb_globals;
3229 int nb_temps, i;
3230 bool changes = false;
3231 TCGOp *op, *op_next;
3233 /* Create a temporary for each indirect global. */
3234 for (i = 0; i < nb_globals; ++i) {
3235 TCGTemp *its = &s->temps[i];
3236 if (its->indirect_reg) {
3237 TCGTemp *dts = tcg_temp_alloc(s);
3238 dts->type = its->type;
3239 dts->base_type = its->base_type;
3240 its->state_ptr = dts;
3241 } else {
3242 its->state_ptr = NULL;
3244 /* All globals begin dead. */
3245 its->state = TS_DEAD;
3247 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3248 TCGTemp *its = &s->temps[i];
3249 its->state_ptr = NULL;
3250 its->state = TS_DEAD;
3253 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3254 TCGOpcode opc = op->opc;
3255 const TCGOpDef *def = &tcg_op_defs[opc];
3256 TCGLifeData arg_life = op->life;
3257 int nb_iargs, nb_oargs, call_flags;
3258 TCGTemp *arg_ts, *dir_ts;
3260 if (opc == INDEX_op_call) {
3261 nb_oargs = TCGOP_CALLO(op);
3262 nb_iargs = TCGOP_CALLI(op);
3263 call_flags = op->args[nb_oargs + nb_iargs + 1];
3264 } else {
3265 nb_iargs = def->nb_iargs;
3266 nb_oargs = def->nb_oargs;
3268 /* Set flags similar to how calls require. */
3269 if (def->flags & TCG_OPF_COND_BRANCH) {
3270 /* Like reading globals: sync_globals */
3271 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3272 } else if (def->flags & TCG_OPF_BB_END) {
3273 /* Like writing globals: save_globals */
3274 call_flags = 0;
3275 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3276 /* Like reading globals: sync_globals */
3277 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3278 } else {
3279 /* No effect on globals. */
3280 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3281 TCG_CALL_NO_WRITE_GLOBALS);
3285 /* Make sure that input arguments are available. */
3286 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3287 arg_ts = arg_temp(op->args[i]);
3288 if (arg_ts) {
3289 dir_ts = arg_ts->state_ptr;
3290 if (dir_ts && arg_ts->state == TS_DEAD) {
3291 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3292 ? INDEX_op_ld_i32
3293 : INDEX_op_ld_i64);
3294 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3296 lop->args[0] = temp_arg(dir_ts);
3297 lop->args[1] = temp_arg(arg_ts->mem_base);
3298 lop->args[2] = arg_ts->mem_offset;
3300 /* Loaded, but synced with memory. */
3301 arg_ts->state = TS_MEM;
3306 /* Perform input replacement, and mark inputs that became dead.
3307 No action is required except keeping temp_state up to date
3308 so that we reload when needed. */
3309 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3310 arg_ts = arg_temp(op->args[i]);
3311 if (arg_ts) {
3312 dir_ts = arg_ts->state_ptr;
3313 if (dir_ts) {
3314 op->args[i] = temp_arg(dir_ts);
3315 changes = true;
3316 if (IS_DEAD_ARG(i)) {
3317 arg_ts->state = TS_DEAD;
3323 /* Liveness analysis should ensure that the following are
3324 all correct, for call sites and basic block end points. */
3325 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3326 /* Nothing to do */
3327 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3328 for (i = 0; i < nb_globals; ++i) {
3329 /* Liveness should see that globals are synced back,
3330 that is, either TS_DEAD or TS_MEM. */
3331 arg_ts = &s->temps[i];
3332 tcg_debug_assert(arg_ts->state_ptr == 0
3333 || arg_ts->state != 0);
3335 } else {
3336 for (i = 0; i < nb_globals; ++i) {
3337 /* Liveness should see that globals are saved back,
3338 that is, TS_DEAD, waiting to be reloaded. */
3339 arg_ts = &s->temps[i];
3340 tcg_debug_assert(arg_ts->state_ptr == 0
3341 || arg_ts->state == TS_DEAD);
3345 /* Outputs become available. */
3346 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3347 arg_ts = arg_temp(op->args[0]);
3348 dir_ts = arg_ts->state_ptr;
3349 if (dir_ts) {
3350 op->args[0] = temp_arg(dir_ts);
3351 changes = true;
3353 /* The output is now live and modified. */
3354 arg_ts->state = 0;
3356 if (NEED_SYNC_ARG(0)) {
3357 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3358 ? INDEX_op_st_i32
3359 : INDEX_op_st_i64);
3360 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3361 TCGTemp *out_ts = dir_ts;
3363 if (IS_DEAD_ARG(0)) {
3364 out_ts = arg_temp(op->args[1]);
3365 arg_ts->state = TS_DEAD;
3366 tcg_op_remove(s, op);
3367 } else {
3368 arg_ts->state = TS_MEM;
3371 sop->args[0] = temp_arg(out_ts);
3372 sop->args[1] = temp_arg(arg_ts->mem_base);
3373 sop->args[2] = arg_ts->mem_offset;
3374 } else {
3375 tcg_debug_assert(!IS_DEAD_ARG(0));
3378 } else {
3379 for (i = 0; i < nb_oargs; i++) {
3380 arg_ts = arg_temp(op->args[i]);
3381 dir_ts = arg_ts->state_ptr;
3382 if (!dir_ts) {
3383 continue;
3385 op->args[i] = temp_arg(dir_ts);
3386 changes = true;
3388 /* The output is now live and modified. */
3389 arg_ts->state = 0;
3391 /* Sync outputs upon their last write. */
3392 if (NEED_SYNC_ARG(i)) {
3393 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3394 ? INDEX_op_st_i32
3395 : INDEX_op_st_i64);
3396 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3398 sop->args[0] = temp_arg(dir_ts);
3399 sop->args[1] = temp_arg(arg_ts->mem_base);
3400 sop->args[2] = arg_ts->mem_offset;
3402 arg_ts->state = TS_MEM;
3404 /* Drop outputs that are dead. */
3405 if (IS_DEAD_ARG(i)) {
3406 arg_ts->state = TS_DEAD;
3412 return changes;
3415 #ifdef CONFIG_DEBUG_TCG
3416 static void dump_regs(TCGContext *s)
3418 TCGTemp *ts;
3419 int i;
3420 char buf[64];
3422 for(i = 0; i < s->nb_temps; i++) {
3423 ts = &s->temps[i];
3424 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3425 switch(ts->val_type) {
3426 case TEMP_VAL_REG:
3427 printf("%s", tcg_target_reg_names[ts->reg]);
3428 break;
3429 case TEMP_VAL_MEM:
3430 printf("%d(%s)", (int)ts->mem_offset,
3431 tcg_target_reg_names[ts->mem_base->reg]);
3432 break;
3433 case TEMP_VAL_CONST:
3434 printf("$0x%" PRIx64, ts->val);
3435 break;
3436 case TEMP_VAL_DEAD:
3437 printf("D");
3438 break;
3439 default:
3440 printf("???");
3441 break;
3443 printf("\n");
3446 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3447 if (s->reg_to_temp[i] != NULL) {
3448 printf("%s: %s\n",
3449 tcg_target_reg_names[i],
3450 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3455 static void check_regs(TCGContext *s)
3457 int reg;
3458 int k;
3459 TCGTemp *ts;
3460 char buf[64];
3462 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3463 ts = s->reg_to_temp[reg];
3464 if (ts != NULL) {
3465 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3466 printf("Inconsistency for register %s:\n",
3467 tcg_target_reg_names[reg]);
3468 goto fail;
3472 for (k = 0; k < s->nb_temps; k++) {
3473 ts = &s->temps[k];
3474 if (ts->val_type == TEMP_VAL_REG
3475 && ts->kind != TEMP_FIXED
3476 && s->reg_to_temp[ts->reg] != ts) {
3477 printf("Inconsistency for temp %s:\n",
3478 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3479 fail:
3480 printf("reg state:\n");
3481 dump_regs(s);
3482 tcg_abort();
3486 #endif
3488 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3490 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3491 /* Sparc64 stack is accessed with offset of 2047 */
3492 s->current_frame_offset = (s->current_frame_offset +
3493 (tcg_target_long)sizeof(tcg_target_long) - 1) &
3494 ~(sizeof(tcg_target_long) - 1);
3495 #endif
3496 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3497 s->frame_end) {
3498 tcg_abort();
3500 ts->mem_offset = s->current_frame_offset;
3501 ts->mem_base = s->frame_temp;
3502 ts->mem_allocated = 1;
3503 s->current_frame_offset += sizeof(tcg_target_long);
3506 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3508 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3509 mark it free; otherwise mark it dead. */
3510 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3512 TCGTempVal new_type;
3514 switch (ts->kind) {
3515 case TEMP_FIXED:
3516 return;
3517 case TEMP_GLOBAL:
3518 case TEMP_LOCAL:
3519 new_type = TEMP_VAL_MEM;
3520 break;
3521 case TEMP_NORMAL:
3522 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3523 break;
3524 case TEMP_CONST:
3525 new_type = TEMP_VAL_CONST;
3526 break;
3527 default:
3528 g_assert_not_reached();
3530 if (ts->val_type == TEMP_VAL_REG) {
3531 s->reg_to_temp[ts->reg] = NULL;
3533 ts->val_type = new_type;
3536 /* Mark a temporary as dead. */
3537 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3539 temp_free_or_dead(s, ts, 1);
3542 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3543 registers needs to be allocated to store a constant. If 'free_or_dead'
3544 is non-zero, subsequently release the temporary; if it is positive, the
3545 temp is dead; if it is negative, the temp is free. */
3546 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3547 TCGRegSet preferred_regs, int free_or_dead)
3549 if (!temp_readonly(ts) && !ts->mem_coherent) {
3550 if (!ts->mem_allocated) {
3551 temp_allocate_frame(s, ts);
3553 switch (ts->val_type) {
3554 case TEMP_VAL_CONST:
3555 /* If we're going to free the temp immediately, then we won't
3556 require it later in a register, so attempt to store the
3557 constant to memory directly. */
3558 if (free_or_dead
3559 && tcg_out_sti(s, ts->type, ts->val,
3560 ts->mem_base->reg, ts->mem_offset)) {
3561 break;
3563 temp_load(s, ts, tcg_target_available_regs[ts->type],
3564 allocated_regs, preferred_regs);
3565 /* fallthrough */
3567 case TEMP_VAL_REG:
3568 tcg_out_st(s, ts->type, ts->reg,
3569 ts->mem_base->reg, ts->mem_offset);
3570 break;
3572 case TEMP_VAL_MEM:
3573 break;
3575 case TEMP_VAL_DEAD:
3576 default:
3577 tcg_abort();
3579 ts->mem_coherent = 1;
3581 if (free_or_dead) {
3582 temp_free_or_dead(s, ts, free_or_dead);
3586 /* free register 'reg' by spilling the corresponding temporary if necessary */
3587 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3589 TCGTemp *ts = s->reg_to_temp[reg];
3590 if (ts != NULL) {
3591 temp_sync(s, ts, allocated_regs, 0, -1);
3596 * tcg_reg_alloc:
3597 * @required_regs: Set of registers in which we must allocate.
3598 * @allocated_regs: Set of registers which must be avoided.
3599 * @preferred_regs: Set of registers we should prefer.
3600 * @rev: True if we search the registers in "indirect" order.
3602 * The allocated register must be in @required_regs & ~@allocated_regs,
3603 * but if we can put it in @preferred_regs we may save a move later.
3605 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3606 TCGRegSet allocated_regs,
3607 TCGRegSet preferred_regs, bool rev)
3609 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3610 TCGRegSet reg_ct[2];
3611 const int *order;
3613 reg_ct[1] = required_regs & ~allocated_regs;
3614 tcg_debug_assert(reg_ct[1] != 0);
3615 reg_ct[0] = reg_ct[1] & preferred_regs;
3617 /* Skip the preferred_regs option if it cannot be satisfied,
3618 or if the preference made no difference. */
3619 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3621 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3623 /* Try free registers, preferences first. */
3624 for (j = f; j < 2; j++) {
3625 TCGRegSet set = reg_ct[j];
3627 if (tcg_regset_single(set)) {
3628 /* One register in the set. */
3629 TCGReg reg = tcg_regset_first(set);
3630 if (s->reg_to_temp[reg] == NULL) {
3631 return reg;
3633 } else {
3634 for (i = 0; i < n; i++) {
3635 TCGReg reg = order[i];
3636 if (s->reg_to_temp[reg] == NULL &&
3637 tcg_regset_test_reg(set, reg)) {
3638 return reg;
3644 /* We must spill something. */
3645 for (j = f; j < 2; j++) {
3646 TCGRegSet set = reg_ct[j];
3648 if (tcg_regset_single(set)) {
3649 /* One register in the set. */
3650 TCGReg reg = tcg_regset_first(set);
3651 tcg_reg_free(s, reg, allocated_regs);
3652 return reg;
3653 } else {
3654 for (i = 0; i < n; i++) {
3655 TCGReg reg = order[i];
3656 if (tcg_regset_test_reg(set, reg)) {
3657 tcg_reg_free(s, reg, allocated_regs);
3658 return reg;
3664 tcg_abort();
3667 /* Make sure the temporary is in a register. If needed, allocate the register
3668 from DESIRED while avoiding ALLOCATED. */
3669 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3670 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3672 TCGReg reg;
3674 switch (ts->val_type) {
3675 case TEMP_VAL_REG:
3676 return;
3677 case TEMP_VAL_CONST:
3678 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3679 preferred_regs, ts->indirect_base);
3680 if (ts->type <= TCG_TYPE_I64) {
3681 tcg_out_movi(s, ts->type, reg, ts->val);
3682 } else {
3683 uint64_t val = ts->val;
3684 MemOp vece = MO_64;
3687 * Find the minimal vector element that matches the constant.
3688 * The targets will, in general, have to do this search anyway,
3689 * do this generically.
3691 if (val == dup_const(MO_8, val)) {
3692 vece = MO_8;
3693 } else if (val == dup_const(MO_16, val)) {
3694 vece = MO_16;
3695 } else if (val == dup_const(MO_32, val)) {
3696 vece = MO_32;
3699 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3701 ts->mem_coherent = 0;
3702 break;
3703 case TEMP_VAL_MEM:
3704 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3705 preferred_regs, ts->indirect_base);
3706 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3707 ts->mem_coherent = 1;
3708 break;
3709 case TEMP_VAL_DEAD:
3710 default:
3711 tcg_abort();
3713 ts->reg = reg;
3714 ts->val_type = TEMP_VAL_REG;
3715 s->reg_to_temp[reg] = ts;
3718 /* Save a temporary to memory. 'allocated_regs' is used in case a
3719 temporary registers needs to be allocated to store a constant. */
3720 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3722 /* The liveness analysis already ensures that globals are back
3723 in memory. Keep an tcg_debug_assert for safety. */
3724 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3727 /* save globals to their canonical location and assume they can be
3728 modified be the following code. 'allocated_regs' is used in case a
3729 temporary registers needs to be allocated to store a constant. */
3730 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3732 int i, n;
3734 for (i = 0, n = s->nb_globals; i < n; i++) {
3735 temp_save(s, &s->temps[i], allocated_regs);
3739 /* sync globals to their canonical location and assume they can be
3740 read by the following code. 'allocated_regs' is used in case a
3741 temporary registers needs to be allocated to store a constant. */
3742 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3744 int i, n;
3746 for (i = 0, n = s->nb_globals; i < n; i++) {
3747 TCGTemp *ts = &s->temps[i];
3748 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3749 || ts->kind == TEMP_FIXED
3750 || ts->mem_coherent);
3754 /* at the end of a basic block, we assume all temporaries are dead and
3755 all globals are stored at their canonical location. */
3756 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3758 int i;
3760 for (i = s->nb_globals; i < s->nb_temps; i++) {
3761 TCGTemp *ts = &s->temps[i];
3763 switch (ts->kind) {
3764 case TEMP_LOCAL:
3765 temp_save(s, ts, allocated_regs);
3766 break;
3767 case TEMP_NORMAL:
3768 /* The liveness analysis already ensures that temps are dead.
3769 Keep an tcg_debug_assert for safety. */
3770 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3771 break;
3772 case TEMP_CONST:
3773 /* Similarly, we should have freed any allocated register. */
3774 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3775 break;
3776 default:
3777 g_assert_not_reached();
3781 save_globals(s, allocated_regs);
3785 * At a conditional branch, we assume all temporaries are dead and
3786 * all globals and local temps are synced to their location.
3788 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3790 sync_globals(s, allocated_regs);
3792 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3793 TCGTemp *ts = &s->temps[i];
3795 * The liveness analysis already ensures that temps are dead.
3796 * Keep tcg_debug_asserts for safety.
3798 switch (ts->kind) {
3799 case TEMP_LOCAL:
3800 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3801 break;
3802 case TEMP_NORMAL:
3803 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3804 break;
3805 case TEMP_CONST:
3806 break;
3807 default:
3808 g_assert_not_reached();
3814 * Specialized code generation for INDEX_op_mov_* with a constant.
3816 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3817 tcg_target_ulong val, TCGLifeData arg_life,
3818 TCGRegSet preferred_regs)
3820 /* ENV should not be modified. */
3821 tcg_debug_assert(!temp_readonly(ots));
3823 /* The movi is not explicitly generated here. */
3824 if (ots->val_type == TEMP_VAL_REG) {
3825 s->reg_to_temp[ots->reg] = NULL;
3827 ots->val_type = TEMP_VAL_CONST;
3828 ots->val = val;
3829 ots->mem_coherent = 0;
3830 if (NEED_SYNC_ARG(0)) {
3831 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3832 } else if (IS_DEAD_ARG(0)) {
3833 temp_dead(s, ots);
3838 * Specialized code generation for INDEX_op_mov_*.
3840 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3842 const TCGLifeData arg_life = op->life;
3843 TCGRegSet allocated_regs, preferred_regs;
3844 TCGTemp *ts, *ots;
3845 TCGType otype, itype;
3847 allocated_regs = s->reserved_regs;
3848 preferred_regs = op->output_pref[0];
3849 ots = arg_temp(op->args[0]);
3850 ts = arg_temp(op->args[1]);
3852 /* ENV should not be modified. */
3853 tcg_debug_assert(!temp_readonly(ots));
3855 /* Note that otype != itype for no-op truncation. */
3856 otype = ots->type;
3857 itype = ts->type;
3859 if (ts->val_type == TEMP_VAL_CONST) {
3860 /* propagate constant or generate sti */
3861 tcg_target_ulong val = ts->val;
3862 if (IS_DEAD_ARG(1)) {
3863 temp_dead(s, ts);
3865 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3866 return;
3869 /* If the source value is in memory we're going to be forced
3870 to have it in a register in order to perform the copy. Copy
3871 the SOURCE value into its own register first, that way we
3872 don't have to reload SOURCE the next time it is used. */
3873 if (ts->val_type == TEMP_VAL_MEM) {
3874 temp_load(s, ts, tcg_target_available_regs[itype],
3875 allocated_regs, preferred_regs);
3878 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3879 if (IS_DEAD_ARG(0)) {
3880 /* mov to a non-saved dead register makes no sense (even with
3881 liveness analysis disabled). */
3882 tcg_debug_assert(NEED_SYNC_ARG(0));
3883 if (!ots->mem_allocated) {
3884 temp_allocate_frame(s, ots);
3886 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3887 if (IS_DEAD_ARG(1)) {
3888 temp_dead(s, ts);
3890 temp_dead(s, ots);
3891 } else {
3892 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3893 /* the mov can be suppressed */
3894 if (ots->val_type == TEMP_VAL_REG) {
3895 s->reg_to_temp[ots->reg] = NULL;
3897 ots->reg = ts->reg;
3898 temp_dead(s, ts);
3899 } else {
3900 if (ots->val_type != TEMP_VAL_REG) {
3901 /* When allocating a new register, make sure to not spill the
3902 input one. */
3903 tcg_regset_set_reg(allocated_regs, ts->reg);
3904 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3905 allocated_regs, preferred_regs,
3906 ots->indirect_base);
3908 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3910 * Cross register class move not supported.
3911 * Store the source register into the destination slot
3912 * and leave the destination temp as TEMP_VAL_MEM.
3914 assert(!temp_readonly(ots));
3915 if (!ts->mem_allocated) {
3916 temp_allocate_frame(s, ots);
3918 tcg_out_st(s, ts->type, ts->reg,
3919 ots->mem_base->reg, ots->mem_offset);
3920 ots->mem_coherent = 1;
3921 temp_free_or_dead(s, ots, -1);
3922 return;
3925 ots->val_type = TEMP_VAL_REG;
3926 ots->mem_coherent = 0;
3927 s->reg_to_temp[ots->reg] = ots;
3928 if (NEED_SYNC_ARG(0)) {
3929 temp_sync(s, ots, allocated_regs, 0, 0);
3935 * Specialized code generation for INDEX_op_dup_vec.
3937 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3939 const TCGLifeData arg_life = op->life;
3940 TCGRegSet dup_out_regs, dup_in_regs;
3941 TCGTemp *its, *ots;
3942 TCGType itype, vtype;
3943 intptr_t endian_fixup;
3944 unsigned vece;
3945 bool ok;
3947 ots = arg_temp(op->args[0]);
3948 its = arg_temp(op->args[1]);
3950 /* ENV should not be modified. */
3951 tcg_debug_assert(!temp_readonly(ots));
3953 itype = its->type;
3954 vece = TCGOP_VECE(op);
3955 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3957 if (its->val_type == TEMP_VAL_CONST) {
3958 /* Propagate constant via movi -> dupi. */
3959 tcg_target_ulong val = its->val;
3960 if (IS_DEAD_ARG(1)) {
3961 temp_dead(s, its);
3963 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3964 return;
3967 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3968 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3970 /* Allocate the output register now. */
3971 if (ots->val_type != TEMP_VAL_REG) {
3972 TCGRegSet allocated_regs = s->reserved_regs;
3974 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3975 /* Make sure to not spill the input register. */
3976 tcg_regset_set_reg(allocated_regs, its->reg);
3978 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3979 op->output_pref[0], ots->indirect_base);
3980 ots->val_type = TEMP_VAL_REG;
3981 ots->mem_coherent = 0;
3982 s->reg_to_temp[ots->reg] = ots;
3985 switch (its->val_type) {
3986 case TEMP_VAL_REG:
3988 * The dup constriaints must be broad, covering all possible VECE.
3989 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3990 * to fail, indicating that extra moves are required for that case.
3992 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3993 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3994 goto done;
3996 /* Try again from memory or a vector input register. */
3998 if (!its->mem_coherent) {
4000 * The input register is not synced, and so an extra store
4001 * would be required to use memory. Attempt an integer-vector
4002 * register move first. We do not have a TCGRegSet for this.
4004 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4005 break;
4007 /* Sync the temp back to its slot and load from there. */
4008 temp_sync(s, its, s->reserved_regs, 0, 0);
4010 /* fall through */
4012 case TEMP_VAL_MEM:
4013 #ifdef HOST_WORDS_BIGENDIAN
4014 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
4015 endian_fixup -= 1 << vece;
4016 #else
4017 endian_fixup = 0;
4018 #endif
4019 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4020 its->mem_offset + endian_fixup)) {
4021 goto done;
4023 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4024 break;
4026 default:
4027 g_assert_not_reached();
4030 /* We now have a vector input register, so dup must succeed. */
4031 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4032 tcg_debug_assert(ok);
4034 done:
4035 if (IS_DEAD_ARG(1)) {
4036 temp_dead(s, its);
4038 if (NEED_SYNC_ARG(0)) {
4039 temp_sync(s, ots, s->reserved_regs, 0, 0);
4041 if (IS_DEAD_ARG(0)) {
4042 temp_dead(s, ots);
4046 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4048 const TCGLifeData arg_life = op->life;
4049 const TCGOpDef * const def = &tcg_op_defs[op->opc];
4050 TCGRegSet i_allocated_regs;
4051 TCGRegSet o_allocated_regs;
4052 int i, k, nb_iargs, nb_oargs;
4053 TCGReg reg;
4054 TCGArg arg;
4055 const TCGArgConstraint *arg_ct;
4056 TCGTemp *ts;
4057 TCGArg new_args[TCG_MAX_OP_ARGS];
4058 int const_args[TCG_MAX_OP_ARGS];
4060 nb_oargs = def->nb_oargs;
4061 nb_iargs = def->nb_iargs;
4063 /* copy constants */
4064 memcpy(new_args + nb_oargs + nb_iargs,
4065 op->args + nb_oargs + nb_iargs,
4066 sizeof(TCGArg) * def->nb_cargs);
4068 i_allocated_regs = s->reserved_regs;
4069 o_allocated_regs = s->reserved_regs;
4071 /* satisfy input constraints */
4072 for (k = 0; k < nb_iargs; k++) {
4073 TCGRegSet i_preferred_regs, o_preferred_regs;
4075 i = def->args_ct[nb_oargs + k].sort_index;
4076 arg = op->args[i];
4077 arg_ct = &def->args_ct[i];
4078 ts = arg_temp(arg);
4080 if (ts->val_type == TEMP_VAL_CONST
4081 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
4082 /* constant is OK for instruction */
4083 const_args[i] = 1;
4084 new_args[i] = ts->val;
4085 continue;
4088 i_preferred_regs = o_preferred_regs = 0;
4089 if (arg_ct->ialias) {
4090 o_preferred_regs = op->output_pref[arg_ct->alias_index];
4093 * If the input is readonly, then it cannot also be an
4094 * output and aliased to itself. If the input is not
4095 * dead after the instruction, we must allocate a new
4096 * register and move it.
4098 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4099 goto allocate_in_reg;
4103 * Check if the current register has already been allocated
4104 * for another input aliased to an output.
4106 if (ts->val_type == TEMP_VAL_REG) {
4107 reg = ts->reg;
4108 for (int k2 = 0; k2 < k; k2++) {
4109 int i2 = def->args_ct[nb_oargs + k2].sort_index;
4110 if (def->args_ct[i2].ialias && reg == new_args[i2]) {
4111 goto allocate_in_reg;
4115 i_preferred_regs = o_preferred_regs;
4118 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
4119 reg = ts->reg;
4121 if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
4122 allocate_in_reg:
4124 * Allocate a new register matching the constraint
4125 * and move the temporary register into it.
4127 temp_load(s, ts, tcg_target_available_regs[ts->type],
4128 i_allocated_regs, 0);
4129 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
4130 o_preferred_regs, ts->indirect_base);
4131 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4133 * Cross register class move not supported. Sync the
4134 * temp back to its slot and load from there.
4136 temp_sync(s, ts, i_allocated_regs, 0, 0);
4137 tcg_out_ld(s, ts->type, reg,
4138 ts->mem_base->reg, ts->mem_offset);
4141 new_args[i] = reg;
4142 const_args[i] = 0;
4143 tcg_regset_set_reg(i_allocated_regs, reg);
4146 /* mark dead temporaries and free the associated registers */
4147 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4148 if (IS_DEAD_ARG(i)) {
4149 temp_dead(s, arg_temp(op->args[i]));
4153 if (def->flags & TCG_OPF_COND_BRANCH) {
4154 tcg_reg_alloc_cbranch(s, i_allocated_regs);
4155 } else if (def->flags & TCG_OPF_BB_END) {
4156 tcg_reg_alloc_bb_end(s, i_allocated_regs);
4157 } else {
4158 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4159 /* XXX: permit generic clobber register list ? */
4160 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4161 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4162 tcg_reg_free(s, i, i_allocated_regs);
4166 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4167 /* sync globals if the op has side effects and might trigger
4168 an exception. */
4169 sync_globals(s, i_allocated_regs);
4172 /* satisfy the output constraints */
4173 for(k = 0; k < nb_oargs; k++) {
4174 i = def->args_ct[k].sort_index;
4175 arg = op->args[i];
4176 arg_ct = &def->args_ct[i];
4177 ts = arg_temp(arg);
4179 /* ENV should not be modified. */
4180 tcg_debug_assert(!temp_readonly(ts));
4182 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4183 reg = new_args[arg_ct->alias_index];
4184 } else if (arg_ct->newreg) {
4185 reg = tcg_reg_alloc(s, arg_ct->regs,
4186 i_allocated_regs | o_allocated_regs,
4187 op->output_pref[k], ts->indirect_base);
4188 } else {
4189 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4190 op->output_pref[k], ts->indirect_base);
4192 tcg_regset_set_reg(o_allocated_regs, reg);
4193 if (ts->val_type == TEMP_VAL_REG) {
4194 s->reg_to_temp[ts->reg] = NULL;
4196 ts->val_type = TEMP_VAL_REG;
4197 ts->reg = reg;
4199 * Temp value is modified, so the value kept in memory is
4200 * potentially not the same.
4202 ts->mem_coherent = 0;
4203 s->reg_to_temp[reg] = ts;
4204 new_args[i] = reg;
4208 /* emit instruction */
4209 if (def->flags & TCG_OPF_VECTOR) {
4210 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4211 new_args, const_args);
4212 } else {
4213 tcg_out_op(s, op->opc, new_args, const_args);
4216 /* move the outputs in the correct register if needed */
4217 for(i = 0; i < nb_oargs; i++) {
4218 ts = arg_temp(op->args[i]);
4220 /* ENV should not be modified. */
4221 tcg_debug_assert(!temp_readonly(ts));
4223 if (NEED_SYNC_ARG(i)) {
4224 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4225 } else if (IS_DEAD_ARG(i)) {
4226 temp_dead(s, ts);
4231 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4233 const TCGLifeData arg_life = op->life;
4234 TCGTemp *ots, *itsl, *itsh;
4235 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4237 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4238 tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4239 tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4241 ots = arg_temp(op->args[0]);
4242 itsl = arg_temp(op->args[1]);
4243 itsh = arg_temp(op->args[2]);
4245 /* ENV should not be modified. */
4246 tcg_debug_assert(!temp_readonly(ots));
4248 /* Allocate the output register now. */
4249 if (ots->val_type != TEMP_VAL_REG) {
4250 TCGRegSet allocated_regs = s->reserved_regs;
4251 TCGRegSet dup_out_regs =
4252 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4254 /* Make sure to not spill the input registers. */
4255 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4256 tcg_regset_set_reg(allocated_regs, itsl->reg);
4258 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4259 tcg_regset_set_reg(allocated_regs, itsh->reg);
4262 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4263 op->output_pref[0], ots->indirect_base);
4264 ots->val_type = TEMP_VAL_REG;
4265 ots->mem_coherent = 0;
4266 s->reg_to_temp[ots->reg] = ots;
4269 /* Promote dup2 of immediates to dupi_vec. */
4270 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4271 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4272 MemOp vece = MO_64;
4274 if (val == dup_const(MO_8, val)) {
4275 vece = MO_8;
4276 } else if (val == dup_const(MO_16, val)) {
4277 vece = MO_16;
4278 } else if (val == dup_const(MO_32, val)) {
4279 vece = MO_32;
4282 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4283 goto done;
4286 /* If the two inputs form one 64-bit value, try dupm_vec. */
4287 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4288 if (!itsl->mem_coherent) {
4289 temp_sync(s, itsl, s->reserved_regs, 0, 0);
4291 if (!itsh->mem_coherent) {
4292 temp_sync(s, itsh, s->reserved_regs, 0, 0);
4294 #ifdef HOST_WORDS_BIGENDIAN
4295 TCGTemp *its = itsh;
4296 #else
4297 TCGTemp *its = itsl;
4298 #endif
4299 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4300 its->mem_base->reg, its->mem_offset)) {
4301 goto done;
4305 /* Fall back to generic expansion. */
4306 return false;
4308 done:
4309 if (IS_DEAD_ARG(1)) {
4310 temp_dead(s, itsl);
4312 if (IS_DEAD_ARG(2)) {
4313 temp_dead(s, itsh);
4315 if (NEED_SYNC_ARG(0)) {
4316 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4317 } else if (IS_DEAD_ARG(0)) {
4318 temp_dead(s, ots);
4320 return true;
4323 #ifdef TCG_TARGET_STACK_GROWSUP
4324 #define STACK_DIR(x) (-(x))
4325 #else
4326 #define STACK_DIR(x) (x)
4327 #endif
4329 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4331 const int nb_oargs = TCGOP_CALLO(op);
4332 const int nb_iargs = TCGOP_CALLI(op);
4333 const TCGLifeData arg_life = op->life;
4334 int flags, nb_regs, i;
4335 TCGReg reg;
4336 TCGArg arg;
4337 TCGTemp *ts;
4338 intptr_t stack_offset;
4339 size_t call_stack_size;
4340 tcg_insn_unit *func_addr;
4341 int allocate_args;
4342 TCGRegSet allocated_regs;
4344 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4345 flags = op->args[nb_oargs + nb_iargs + 1];
4347 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4348 if (nb_regs > nb_iargs) {
4349 nb_regs = nb_iargs;
4352 /* assign stack slots first */
4353 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4354 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4355 ~(TCG_TARGET_STACK_ALIGN - 1);
4356 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4357 if (allocate_args) {
4358 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4359 preallocate call stack */
4360 tcg_abort();
4363 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4364 for (i = nb_regs; i < nb_iargs; i++) {
4365 arg = op->args[nb_oargs + i];
4366 #ifdef TCG_TARGET_STACK_GROWSUP
4367 stack_offset -= sizeof(tcg_target_long);
4368 #endif
4369 if (arg != TCG_CALL_DUMMY_ARG) {
4370 ts = arg_temp(arg);
4371 temp_load(s, ts, tcg_target_available_regs[ts->type],
4372 s->reserved_regs, 0);
4373 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4375 #ifndef TCG_TARGET_STACK_GROWSUP
4376 stack_offset += sizeof(tcg_target_long);
4377 #endif
4380 /* assign input registers */
4381 allocated_regs = s->reserved_regs;
4382 for (i = 0; i < nb_regs; i++) {
4383 arg = op->args[nb_oargs + i];
4384 if (arg != TCG_CALL_DUMMY_ARG) {
4385 ts = arg_temp(arg);
4386 reg = tcg_target_call_iarg_regs[i];
4388 if (ts->val_type == TEMP_VAL_REG) {
4389 if (ts->reg != reg) {
4390 tcg_reg_free(s, reg, allocated_regs);
4391 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4393 * Cross register class move not supported. Sync the
4394 * temp back to its slot and load from there.
4396 temp_sync(s, ts, allocated_regs, 0, 0);
4397 tcg_out_ld(s, ts->type, reg,
4398 ts->mem_base->reg, ts->mem_offset);
4401 } else {
4402 TCGRegSet arg_set = 0;
4404 tcg_reg_free(s, reg, allocated_regs);
4405 tcg_regset_set_reg(arg_set, reg);
4406 temp_load(s, ts, arg_set, allocated_regs, 0);
4409 tcg_regset_set_reg(allocated_regs, reg);
4413 /* mark dead temporaries and free the associated registers */
4414 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4415 if (IS_DEAD_ARG(i)) {
4416 temp_dead(s, arg_temp(op->args[i]));
4420 /* clobber call registers */
4421 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4422 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4423 tcg_reg_free(s, i, allocated_regs);
4427 /* Save globals if they might be written by the helper, sync them if
4428 they might be read. */
4429 if (flags & TCG_CALL_NO_READ_GLOBALS) {
4430 /* Nothing to do */
4431 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4432 sync_globals(s, allocated_regs);
4433 } else {
4434 save_globals(s, allocated_regs);
4437 tcg_out_call(s, func_addr);
4439 /* assign output registers and emit moves if needed */
4440 for(i = 0; i < nb_oargs; i++) {
4441 arg = op->args[i];
4442 ts = arg_temp(arg);
4444 /* ENV should not be modified. */
4445 tcg_debug_assert(!temp_readonly(ts));
4447 reg = tcg_target_call_oarg_regs[i];
4448 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4449 if (ts->val_type == TEMP_VAL_REG) {
4450 s->reg_to_temp[ts->reg] = NULL;
4452 ts->val_type = TEMP_VAL_REG;
4453 ts->reg = reg;
4454 ts->mem_coherent = 0;
4455 s->reg_to_temp[reg] = ts;
4456 if (NEED_SYNC_ARG(i)) {
4457 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4458 } else if (IS_DEAD_ARG(i)) {
4459 temp_dead(s, ts);
4464 #ifdef CONFIG_PROFILER
4466 /* avoid copy/paste errors */
4467 #define PROF_ADD(to, from, field) \
4468 do { \
4469 (to)->field += qatomic_read(&((from)->field)); \
4470 } while (0)
4472 #define PROF_MAX(to, from, field) \
4473 do { \
4474 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4475 if (val__ > (to)->field) { \
4476 (to)->field = val__; \
4478 } while (0)
4480 /* Pass in a zero'ed @prof */
4481 static inline
4482 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4484 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4485 unsigned int i;
4487 for (i = 0; i < n_ctxs; i++) {
4488 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4489 const TCGProfile *orig = &s->prof;
4491 if (counters) {
4492 PROF_ADD(prof, orig, cpu_exec_time);
4493 PROF_ADD(prof, orig, tb_count1);
4494 PROF_ADD(prof, orig, tb_count);
4495 PROF_ADD(prof, orig, op_count);
4496 PROF_MAX(prof, orig, op_count_max);
4497 PROF_ADD(prof, orig, temp_count);
4498 PROF_MAX(prof, orig, temp_count_max);
4499 PROF_ADD(prof, orig, del_op_count);
4500 PROF_ADD(prof, orig, code_in_len);
4501 PROF_ADD(prof, orig, code_out_len);
4502 PROF_ADD(prof, orig, search_out_len);
4503 PROF_ADD(prof, orig, interm_time);
4504 PROF_ADD(prof, orig, code_time);
4505 PROF_ADD(prof, orig, la_time);
4506 PROF_ADD(prof, orig, opt_time);
4507 PROF_ADD(prof, orig, restore_count);
4508 PROF_ADD(prof, orig, restore_time);
4510 if (table) {
4511 int i;
4513 for (i = 0; i < NB_OPS; i++) {
4514 PROF_ADD(prof, orig, table_op_count[i]);
4520 #undef PROF_ADD
4521 #undef PROF_MAX
4523 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4525 tcg_profile_snapshot(prof, true, false);
4528 static void tcg_profile_snapshot_table(TCGProfile *prof)
4530 tcg_profile_snapshot(prof, false, true);
4533 void tcg_dump_op_count(void)
4535 TCGProfile prof = {};
4536 int i;
4538 tcg_profile_snapshot_table(&prof);
4539 for (i = 0; i < NB_OPS; i++) {
4540 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4541 prof.table_op_count[i]);
4545 int64_t tcg_cpu_exec_time(void)
4547 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4548 unsigned int i;
4549 int64_t ret = 0;
4551 for (i = 0; i < n_ctxs; i++) {
4552 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4553 const TCGProfile *prof = &s->prof;
4555 ret += qatomic_read(&prof->cpu_exec_time);
4557 return ret;
4559 #else
4560 void tcg_dump_op_count(void)
4562 qemu_printf("[TCG profiler not compiled]\n");
4565 int64_t tcg_cpu_exec_time(void)
4567 error_report("%s: TCG profiler not compiled", __func__);
4568 exit(EXIT_FAILURE);
4570 #endif
4573 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4575 #ifdef CONFIG_PROFILER
4576 TCGProfile *prof = &s->prof;
4577 #endif
4578 int i, num_insns;
4579 TCGOp *op;
4581 #ifdef CONFIG_PROFILER
4583 int n = 0;
4585 QTAILQ_FOREACH(op, &s->ops, link) {
4586 n++;
4588 qatomic_set(&prof->op_count, prof->op_count + n);
4589 if (n > prof->op_count_max) {
4590 qatomic_set(&prof->op_count_max, n);
4593 n = s->nb_temps;
4594 qatomic_set(&prof->temp_count, prof->temp_count + n);
4595 if (n > prof->temp_count_max) {
4596 qatomic_set(&prof->temp_count_max, n);
4599 #endif
4601 #ifdef DEBUG_DISAS
4602 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4603 && qemu_log_in_addr_range(tb->pc))) {
4604 FILE *logfile = qemu_log_lock();
4605 qemu_log("OP:\n");
4606 tcg_dump_ops(s, false);
4607 qemu_log("\n");
4608 qemu_log_unlock(logfile);
4610 #endif
4612 #ifdef CONFIG_DEBUG_TCG
4613 /* Ensure all labels referenced have been emitted. */
4615 TCGLabel *l;
4616 bool error = false;
4618 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4619 if (unlikely(!l->present) && l->refs) {
4620 qemu_log_mask(CPU_LOG_TB_OP,
4621 "$L%d referenced but not present.\n", l->id);
4622 error = true;
4625 assert(!error);
4627 #endif
4629 #ifdef CONFIG_PROFILER
4630 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4631 #endif
4633 #ifdef USE_TCG_OPTIMIZATIONS
4634 tcg_optimize(s);
4635 #endif
4637 #ifdef CONFIG_PROFILER
4638 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4639 qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4640 #endif
4642 reachable_code_pass(s);
4643 liveness_pass_1(s);
4645 if (s->nb_indirects > 0) {
4646 #ifdef DEBUG_DISAS
4647 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4648 && qemu_log_in_addr_range(tb->pc))) {
4649 FILE *logfile = qemu_log_lock();
4650 qemu_log("OP before indirect lowering:\n");
4651 tcg_dump_ops(s, false);
4652 qemu_log("\n");
4653 qemu_log_unlock(logfile);
4655 #endif
4656 /* Replace indirect temps with direct temps. */
4657 if (liveness_pass_2(s)) {
4658 /* If changes were made, re-run liveness. */
4659 liveness_pass_1(s);
4663 #ifdef CONFIG_PROFILER
4664 qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4665 #endif
4667 #ifdef DEBUG_DISAS
4668 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4669 && qemu_log_in_addr_range(tb->pc))) {
4670 FILE *logfile = qemu_log_lock();
4671 qemu_log("OP after optimization and liveness analysis:\n");
4672 tcg_dump_ops(s, true);
4673 qemu_log("\n");
4674 qemu_log_unlock(logfile);
4676 #endif
4678 tcg_reg_alloc_start(s);
4681 * Reset the buffer pointers when restarting after overflow.
4682 * TODO: Move this into translate-all.c with the rest of the
4683 * buffer management. Having only this done here is confusing.
4685 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4686 s->code_ptr = s->code_buf;
4688 #ifdef TCG_TARGET_NEED_LDST_LABELS
4689 QSIMPLEQ_INIT(&s->ldst_labels);
4690 #endif
4691 #ifdef TCG_TARGET_NEED_POOL_LABELS
4692 s->pool_labels = NULL;
4693 #endif
4695 num_insns = -1;
4696 QTAILQ_FOREACH(op, &s->ops, link) {
4697 TCGOpcode opc = op->opc;
4699 #ifdef CONFIG_PROFILER
4700 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4701 #endif
4703 switch (opc) {
4704 case INDEX_op_mov_i32:
4705 case INDEX_op_mov_i64:
4706 case INDEX_op_mov_vec:
4707 tcg_reg_alloc_mov(s, op);
4708 break;
4709 case INDEX_op_dup_vec:
4710 tcg_reg_alloc_dup(s, op);
4711 break;
4712 case INDEX_op_insn_start:
4713 if (num_insns >= 0) {
4714 size_t off = tcg_current_code_size(s);
4715 s->gen_insn_end_off[num_insns] = off;
4716 /* Assert that we do not overflow our stored offset. */
4717 assert(s->gen_insn_end_off[num_insns] == off);
4719 num_insns++;
4720 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4721 target_ulong a;
4722 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4723 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4724 #else
4725 a = op->args[i];
4726 #endif
4727 s->gen_insn_data[num_insns][i] = a;
4729 break;
4730 case INDEX_op_discard:
4731 temp_dead(s, arg_temp(op->args[0]));
4732 break;
4733 case INDEX_op_set_label:
4734 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4735 tcg_out_label(s, arg_label(op->args[0]));
4736 break;
4737 case INDEX_op_call:
4738 tcg_reg_alloc_call(s, op);
4739 break;
4740 case INDEX_op_dup2_vec:
4741 if (tcg_reg_alloc_dup2(s, op)) {
4742 break;
4744 /* fall through */
4745 default:
4746 /* Sanity check that we've not introduced any unhandled opcodes. */
4747 tcg_debug_assert(tcg_op_supported(opc));
4748 /* Note: in order to speed up the code, it would be much
4749 faster to have specialized register allocator functions for
4750 some common argument patterns */
4751 tcg_reg_alloc_op(s, op);
4752 break;
4754 #ifdef CONFIG_DEBUG_TCG
4755 check_regs(s);
4756 #endif
4757 /* Test for (pending) buffer overflow. The assumption is that any
4758 one operation beginning below the high water mark cannot overrun
4759 the buffer completely. Thus we can test for overflow after
4760 generating code without having to check during generation. */
4761 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4762 return -1;
4764 /* Test for TB overflow, as seen by gen_insn_end_off. */
4765 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4766 return -2;
4769 tcg_debug_assert(num_insns >= 0);
4770 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4772 /* Generate TB finalization at the end of block */
4773 #ifdef TCG_TARGET_NEED_LDST_LABELS
4774 i = tcg_out_ldst_finalize(s);
4775 if (i < 0) {
4776 return i;
4778 #endif
4779 #ifdef TCG_TARGET_NEED_POOL_LABELS
4780 i = tcg_out_pool_finalize(s);
4781 if (i < 0) {
4782 return i;
4784 #endif
4785 if (!tcg_resolve_relocs(s)) {
4786 return -2;
4789 #ifndef CONFIG_TCG_INTERPRETER
4790 /* flush instruction cache */
4791 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4792 (uintptr_t)s->code_buf,
4793 tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4794 #endif
4796 return tcg_current_code_size(s);
4799 #ifdef CONFIG_PROFILER
4800 void tcg_dump_info(void)
4802 TCGProfile prof = {};
4803 const TCGProfile *s;
4804 int64_t tb_count;
4805 int64_t tb_div_count;
4806 int64_t tot;
4808 tcg_profile_snapshot_counters(&prof);
4809 s = &prof;
4810 tb_count = s->tb_count;
4811 tb_div_count = tb_count ? tb_count : 1;
4812 tot = s->interm_time + s->code_time;
4814 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4815 tot, tot / 2.4e9);
4816 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4817 " %0.1f%%)\n",
4818 tb_count, s->tb_count1 - tb_count,
4819 (double)(s->tb_count1 - s->tb_count)
4820 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4821 qemu_printf("avg ops/TB %0.1f max=%d\n",
4822 (double)s->op_count / tb_div_count, s->op_count_max);
4823 qemu_printf("deleted ops/TB %0.2f\n",
4824 (double)s->del_op_count / tb_div_count);
4825 qemu_printf("avg temps/TB %0.2f max=%d\n",
4826 (double)s->temp_count / tb_div_count, s->temp_count_max);
4827 qemu_printf("avg host code/TB %0.1f\n",
4828 (double)s->code_out_len / tb_div_count);
4829 qemu_printf("avg search data/TB %0.1f\n",
4830 (double)s->search_out_len / tb_div_count);
4832 qemu_printf("cycles/op %0.1f\n",
4833 s->op_count ? (double)tot / s->op_count : 0);
4834 qemu_printf("cycles/in byte %0.1f\n",
4835 s->code_in_len ? (double)tot / s->code_in_len : 0);
4836 qemu_printf("cycles/out byte %0.1f\n",
4837 s->code_out_len ? (double)tot / s->code_out_len : 0);
4838 qemu_printf("cycles/search byte %0.1f\n",
4839 s->search_out_len ? (double)tot / s->search_out_len : 0);
4840 if (tot == 0) {
4841 tot = 1;
4843 qemu_printf(" gen_interm time %0.1f%%\n",
4844 (double)s->interm_time / tot * 100.0);
4845 qemu_printf(" gen_code time %0.1f%%\n",
4846 (double)s->code_time / tot * 100.0);
4847 qemu_printf("optim./code time %0.1f%%\n",
4848 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4849 * 100.0);
4850 qemu_printf("liveness/code time %0.1f%%\n",
4851 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4852 qemu_printf("cpu_restore count %" PRId64 "\n",
4853 s->restore_count);
4854 qemu_printf(" avg cycles %0.1f\n",
4855 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4857 #else
4858 void tcg_dump_info(void)
4860 qemu_printf("[TCG profiler not compiled]\n");
4862 #endif
4864 #ifdef ELF_HOST_MACHINE
4865 /* In order to use this feature, the backend needs to do three things:
4867 (1) Define ELF_HOST_MACHINE to indicate both what value to
4868 put into the ELF image and to indicate support for the feature.
4870 (2) Define tcg_register_jit. This should create a buffer containing
4871 the contents of a .debug_frame section that describes the post-
4872 prologue unwind info for the tcg machine.
4874 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4877 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4878 typedef enum {
4879 JIT_NOACTION = 0,
4880 JIT_REGISTER_FN,
4881 JIT_UNREGISTER_FN
4882 } jit_actions_t;
4884 struct jit_code_entry {
4885 struct jit_code_entry *next_entry;
4886 struct jit_code_entry *prev_entry;
4887 const void *symfile_addr;
4888 uint64_t symfile_size;
4891 struct jit_descriptor {
4892 uint32_t version;
4893 uint32_t action_flag;
4894 struct jit_code_entry *relevant_entry;
4895 struct jit_code_entry *first_entry;
4898 void __jit_debug_register_code(void) __attribute__((noinline));
4899 void __jit_debug_register_code(void)
4901 asm("");
4904 /* Must statically initialize the version, because GDB may check
4905 the version before we can set it. */
4906 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4908 /* End GDB interface. */
4910 static int find_string(const char *strtab, const char *str)
4912 const char *p = strtab + 1;
4914 while (1) {
4915 if (strcmp(p, str) == 0) {
4916 return p - strtab;
4918 p += strlen(p) + 1;
4922 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4923 const void *debug_frame,
4924 size_t debug_frame_size)
4926 struct __attribute__((packed)) DebugInfo {
4927 uint32_t len;
4928 uint16_t version;
4929 uint32_t abbrev;
4930 uint8_t ptr_size;
4931 uint8_t cu_die;
4932 uint16_t cu_lang;
4933 uintptr_t cu_low_pc;
4934 uintptr_t cu_high_pc;
4935 uint8_t fn_die;
4936 char fn_name[16];
4937 uintptr_t fn_low_pc;
4938 uintptr_t fn_high_pc;
4939 uint8_t cu_eoc;
4942 struct ElfImage {
4943 ElfW(Ehdr) ehdr;
4944 ElfW(Phdr) phdr;
4945 ElfW(Shdr) shdr[7];
4946 ElfW(Sym) sym[2];
4947 struct DebugInfo di;
4948 uint8_t da[24];
4949 char str[80];
4952 struct ElfImage *img;
4954 static const struct ElfImage img_template = {
4955 .ehdr = {
4956 .e_ident[EI_MAG0] = ELFMAG0,
4957 .e_ident[EI_MAG1] = ELFMAG1,
4958 .e_ident[EI_MAG2] = ELFMAG2,
4959 .e_ident[EI_MAG3] = ELFMAG3,
4960 .e_ident[EI_CLASS] = ELF_CLASS,
4961 .e_ident[EI_DATA] = ELF_DATA,
4962 .e_ident[EI_VERSION] = EV_CURRENT,
4963 .e_type = ET_EXEC,
4964 .e_machine = ELF_HOST_MACHINE,
4965 .e_version = EV_CURRENT,
4966 .e_phoff = offsetof(struct ElfImage, phdr),
4967 .e_shoff = offsetof(struct ElfImage, shdr),
4968 .e_ehsize = sizeof(ElfW(Shdr)),
4969 .e_phentsize = sizeof(ElfW(Phdr)),
4970 .e_phnum = 1,
4971 .e_shentsize = sizeof(ElfW(Shdr)),
4972 .e_shnum = ARRAY_SIZE(img->shdr),
4973 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4974 #ifdef ELF_HOST_FLAGS
4975 .e_flags = ELF_HOST_FLAGS,
4976 #endif
4977 #ifdef ELF_OSABI
4978 .e_ident[EI_OSABI] = ELF_OSABI,
4979 #endif
4981 .phdr = {
4982 .p_type = PT_LOAD,
4983 .p_flags = PF_X,
4985 .shdr = {
4986 [0] = { .sh_type = SHT_NULL },
4987 /* Trick: The contents of code_gen_buffer are not present in
4988 this fake ELF file; that got allocated elsewhere. Therefore
4989 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4990 will not look for contents. We can record any address. */
4991 [1] = { /* .text */
4992 .sh_type = SHT_NOBITS,
4993 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4995 [2] = { /* .debug_info */
4996 .sh_type = SHT_PROGBITS,
4997 .sh_offset = offsetof(struct ElfImage, di),
4998 .sh_size = sizeof(struct DebugInfo),
5000 [3] = { /* .debug_abbrev */
5001 .sh_type = SHT_PROGBITS,
5002 .sh_offset = offsetof(struct ElfImage, da),
5003 .sh_size = sizeof(img->da),
5005 [4] = { /* .debug_frame */
5006 .sh_type = SHT_PROGBITS,
5007 .sh_offset = sizeof(struct ElfImage),
5009 [5] = { /* .symtab */
5010 .sh_type = SHT_SYMTAB,
5011 .sh_offset = offsetof(struct ElfImage, sym),
5012 .sh_size = sizeof(img->sym),
5013 .sh_info = 1,
5014 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5015 .sh_entsize = sizeof(ElfW(Sym)),
5017 [6] = { /* .strtab */
5018 .sh_type = SHT_STRTAB,
5019 .sh_offset = offsetof(struct ElfImage, str),
5020 .sh_size = sizeof(img->str),
5023 .sym = {
5024 [1] = { /* code_gen_buffer */
5025 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5026 .st_shndx = 1,
5029 .di = {
5030 .len = sizeof(struct DebugInfo) - 4,
5031 .version = 2,
5032 .ptr_size = sizeof(void *),
5033 .cu_die = 1,
5034 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
5035 .fn_die = 2,
5036 .fn_name = "code_gen_buffer"
5038 .da = {
5039 1, /* abbrev number (the cu) */
5040 0x11, 1, /* DW_TAG_compile_unit, has children */
5041 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
5042 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5043 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5044 0, 0, /* end of abbrev */
5045 2, /* abbrev number (the fn) */
5046 0x2e, 0, /* DW_TAG_subprogram, no children */
5047 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
5048 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5049 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5050 0, 0, /* end of abbrev */
5051 0 /* no more abbrev */
5053 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5054 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5057 /* We only need a single jit entry; statically allocate it. */
5058 static struct jit_code_entry one_entry;
5060 uintptr_t buf = (uintptr_t)buf_ptr;
5061 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5062 DebugFrameHeader *dfh;
5064 img = g_malloc(img_size);
5065 *img = img_template;
5067 img->phdr.p_vaddr = buf;
5068 img->phdr.p_paddr = buf;
5069 img->phdr.p_memsz = buf_size;
5071 img->shdr[1].sh_name = find_string(img->str, ".text");
5072 img->shdr[1].sh_addr = buf;
5073 img->shdr[1].sh_size = buf_size;
5075 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5076 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5078 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5079 img->shdr[4].sh_size = debug_frame_size;
5081 img->shdr[5].sh_name = find_string(img->str, ".symtab");
5082 img->shdr[6].sh_name = find_string(img->str, ".strtab");
5084 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5085 img->sym[1].st_value = buf;
5086 img->sym[1].st_size = buf_size;
5088 img->di.cu_low_pc = buf;
5089 img->di.cu_high_pc = buf + buf_size;
5090 img->di.fn_low_pc = buf;
5091 img->di.fn_high_pc = buf + buf_size;
5093 dfh = (DebugFrameHeader *)(img + 1);
5094 memcpy(dfh, debug_frame, debug_frame_size);
5095 dfh->fde.func_start = buf;
5096 dfh->fde.func_len = buf_size;
5098 #ifdef DEBUG_JIT
5099 /* Enable this block to be able to debug the ELF image file creation.
5100 One can use readelf, objdump, or other inspection utilities. */
5102 FILE *f = fopen("/tmp/qemu.jit", "w+b");
5103 if (f) {
5104 if (fwrite(img, img_size, 1, f) != img_size) {
5105 /* Avoid stupid unused return value warning for fwrite. */
5107 fclose(f);
5110 #endif
5112 one_entry.symfile_addr = img;
5113 one_entry.symfile_size = img_size;
5115 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5116 __jit_debug_descriptor.relevant_entry = &one_entry;
5117 __jit_debug_descriptor.first_entry = &one_entry;
5118 __jit_debug_register_code();
5120 #else
5121 /* No support for the feature. Provide the entry point expected by exec.c,
5122 and implement the internal function we declared earlier. */
5124 static void tcg_register_jit_int(const void *buf, size_t size,
5125 const void *debug_frame,
5126 size_t debug_frame_size)
5130 void tcg_register_jit(const void *buf, size_t buf_size)
5133 #endif /* ELF_HOST_MACHINE */
5135 #if !TCG_TARGET_MAYBE_vec
5136 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5138 g_assert_not_reached();
5140 #endif