hw/timer/sse-counter: Model the SSE Subsystem System Counter
[qemu/ar7.git] / tcg / tcg.c
blob63a12b197bff179789410966c433b1da47c3e241
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
42 instructions */
43 #define NO_CPU_IO_DEFS
44 #include "cpu.h"
46 #include "exec/exec-all.h"
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
52 #include "tcg/tcg-op.h"
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS ELFCLASS32
56 #else
57 # define ELF_CLASS ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA ELFDATA2MSB
61 #else
62 # define ELF_DATA ELFDATA2LSB
63 #endif
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70 used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74 intptr_t value, intptr_t addend);
76 /* The CIE and FDE header definitions will be common to all hosts. */
77 typedef struct {
78 uint32_t len __attribute__((aligned((sizeof(void *)))));
79 uint32_t id;
80 uint8_t version;
81 char augmentation[1];
82 uint8_t code_align;
83 uint8_t data_align;
84 uint8_t return_column;
85 } DebugFrameCIE;
87 typedef struct QEMU_PACKED {
88 uint32_t len __attribute__((aligned((sizeof(void *)))));
89 uint32_t cie_offset;
90 uintptr_t func_start;
91 uintptr_t func_len;
92 } DebugFrameFDEHeader;
94 typedef struct QEMU_PACKED {
95 DebugFrameCIE cie;
96 DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
99 static void tcg_register_jit_int(const void *buf, size_t size,
100 const void *debug_frame,
101 size_t debug_frame_size)
102 __attribute__((unused));
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106 intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109 TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
111 const int *const_args);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
114 TCGReg dst, TCGReg src);
115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
116 TCGReg dst, TCGReg base, intptr_t offset);
117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
118 TCGReg dst, int64_t arg);
119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
120 unsigned vece, const TCGArg *args,
121 const int *const_args);
122 #else
123 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
124 TCGReg dst, TCGReg src)
126 g_assert_not_reached();
128 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
129 TCGReg dst, TCGReg base, intptr_t offset)
131 g_assert_not_reached();
133 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
134 TCGReg dst, int64_t arg)
136 g_assert_not_reached();
138 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
139 unsigned vece, const TCGArg *args,
140 const int *const_args)
142 g_assert_not_reached();
144 #endif
145 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
146 intptr_t arg2);
147 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
148 TCGReg base, intptr_t ofs);
149 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
150 static int tcg_target_const_match(tcg_target_long val, TCGType type,
151 const TCGArgConstraint *arg_ct);
152 #ifdef TCG_TARGET_NEED_LDST_LABELS
153 static int tcg_out_ldst_finalize(TCGContext *s);
154 #endif
156 #define TCG_HIGHWATER 1024
158 static TCGContext **tcg_ctxs;
159 static unsigned int n_tcg_ctxs;
160 TCGv_env cpu_env = 0;
161 const void *tcg_code_gen_epilogue;
162 uintptr_t tcg_splitwx_diff;
164 #ifndef CONFIG_TCG_INTERPRETER
165 tcg_prologue_fn *tcg_qemu_tb_exec;
166 #endif
168 struct tcg_region_tree {
169 QemuMutex lock;
170 GTree *tree;
171 /* padding to avoid false sharing is computed at run-time */
175 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
176 * dynamically allocate from as demand dictates. Given appropriate region
177 * sizing, this minimizes flushes even when some TCG threads generate a lot
178 * more code than others.
180 struct tcg_region_state {
181 QemuMutex lock;
183 /* fields set at init time */
184 void *start;
185 void *start_aligned;
186 void *end;
187 size_t n;
188 size_t size; /* size of one region */
189 size_t stride; /* .size + guard size */
191 /* fields protected by the lock */
192 size_t current; /* current region index */
193 size_t agg_size_full; /* aggregate size of full regions */
196 static struct tcg_region_state region;
198 * This is an array of struct tcg_region_tree's, with padding.
199 * We use void * to simplify the computation of region_trees[i]; each
200 * struct is found every tree_size bytes.
202 static void *region_trees;
203 static size_t tree_size;
204 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
205 static TCGRegSet tcg_target_call_clobber_regs;
207 #if TCG_TARGET_INSN_UNIT_SIZE == 1
208 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
210 *s->code_ptr++ = v;
213 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
214 uint8_t v)
216 *p = v;
218 #endif
220 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
221 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
223 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
224 *s->code_ptr++ = v;
225 } else {
226 tcg_insn_unit *p = s->code_ptr;
227 memcpy(p, &v, sizeof(v));
228 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
232 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
233 uint16_t v)
235 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
236 *p = v;
237 } else {
238 memcpy(p, &v, sizeof(v));
241 #endif
243 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
244 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
246 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
247 *s->code_ptr++ = v;
248 } else {
249 tcg_insn_unit *p = s->code_ptr;
250 memcpy(p, &v, sizeof(v));
251 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
255 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
256 uint32_t v)
258 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
259 *p = v;
260 } else {
261 memcpy(p, &v, sizeof(v));
264 #endif
266 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
267 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
269 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
270 *s->code_ptr++ = v;
271 } else {
272 tcg_insn_unit *p = s->code_ptr;
273 memcpy(p, &v, sizeof(v));
274 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
278 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
279 uint64_t v)
281 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
282 *p = v;
283 } else {
284 memcpy(p, &v, sizeof(v));
287 #endif
289 /* label relocation processing */
291 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
292 TCGLabel *l, intptr_t addend)
294 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
296 r->type = type;
297 r->ptr = code_ptr;
298 r->addend = addend;
299 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
302 static void tcg_out_label(TCGContext *s, TCGLabel *l)
304 tcg_debug_assert(!l->has_value);
305 l->has_value = 1;
306 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
309 TCGLabel *gen_new_label(void)
311 TCGContext *s = tcg_ctx;
312 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
314 memset(l, 0, sizeof(TCGLabel));
315 l->id = s->nb_labels++;
316 QSIMPLEQ_INIT(&l->relocs);
318 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
320 return l;
323 static bool tcg_resolve_relocs(TCGContext *s)
325 TCGLabel *l;
327 QSIMPLEQ_FOREACH(l, &s->labels, next) {
328 TCGRelocation *r;
329 uintptr_t value = l->u.value;
331 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
332 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
333 return false;
337 return true;
340 static void set_jmp_reset_offset(TCGContext *s, int which)
343 * We will check for overflow at the end of the opcode loop in
344 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
346 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
349 #define C_PFX1(P, A) P##A
350 #define C_PFX2(P, A, B) P##A##_##B
351 #define C_PFX3(P, A, B, C) P##A##_##B##_##C
352 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
353 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
354 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
356 /* Define an enumeration for the various combinations. */
358 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
359 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
360 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
361 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
363 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
364 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
365 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
366 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
368 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
370 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
371 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
372 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
373 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
375 typedef enum {
376 #include "tcg-target-con-set.h"
377 } TCGConstraintSetIndex;
379 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
381 #undef C_O0_I1
382 #undef C_O0_I2
383 #undef C_O0_I3
384 #undef C_O0_I4
385 #undef C_O1_I1
386 #undef C_O1_I2
387 #undef C_O1_I3
388 #undef C_O1_I4
389 #undef C_N1_I2
390 #undef C_O2_I1
391 #undef C_O2_I2
392 #undef C_O2_I3
393 #undef C_O2_I4
395 /* Put all of the constraint sets into an array, indexed by the enum. */
397 #define C_O0_I1(I1) { .args_ct_str = { #I1 } },
398 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
399 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
400 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
402 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
403 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
404 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
405 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
407 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
409 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
410 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
411 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
412 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
414 static const TCGTargetOpDef constraint_sets[] = {
415 #include "tcg-target-con-set.h"
419 #undef C_O0_I1
420 #undef C_O0_I2
421 #undef C_O0_I3
422 #undef C_O0_I4
423 #undef C_O1_I1
424 #undef C_O1_I2
425 #undef C_O1_I3
426 #undef C_O1_I4
427 #undef C_N1_I2
428 #undef C_O2_I1
429 #undef C_O2_I2
430 #undef C_O2_I3
431 #undef C_O2_I4
433 /* Expand the enumerator to be returned from tcg_target_op_def(). */
435 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
436 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
437 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
438 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
440 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
441 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
442 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
443 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
445 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
447 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
448 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
449 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
450 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
452 #include "tcg-target.c.inc"
454 /* compare a pointer @ptr and a tb_tc @s */
455 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
457 if (ptr >= s->ptr + s->size) {
458 return 1;
459 } else if (ptr < s->ptr) {
460 return -1;
462 return 0;
465 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
467 const struct tb_tc *a = ap;
468 const struct tb_tc *b = bp;
471 * When both sizes are set, we know this isn't a lookup.
472 * This is the most likely case: every TB must be inserted; lookups
473 * are a lot less frequent.
475 if (likely(a->size && b->size)) {
476 if (a->ptr > b->ptr) {
477 return 1;
478 } else if (a->ptr < b->ptr) {
479 return -1;
481 /* a->ptr == b->ptr should happen only on deletions */
482 g_assert(a->size == b->size);
483 return 0;
486 * All lookups have either .size field set to 0.
487 * From the glib sources we see that @ap is always the lookup key. However
488 * the docs provide no guarantee, so we just mark this case as likely.
490 if (likely(a->size == 0)) {
491 return ptr_cmp_tb_tc(a->ptr, b);
493 return ptr_cmp_tb_tc(b->ptr, a);
496 static void tcg_region_trees_init(void)
498 size_t i;
500 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
501 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
502 for (i = 0; i < region.n; i++) {
503 struct tcg_region_tree *rt = region_trees + i * tree_size;
505 qemu_mutex_init(&rt->lock);
506 rt->tree = g_tree_new(tb_tc_cmp);
510 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
512 void *p = tcg_splitwx_to_rw(cp);
513 size_t region_idx;
515 if (p < region.start_aligned) {
516 region_idx = 0;
517 } else {
518 ptrdiff_t offset = p - region.start_aligned;
520 if (offset > region.stride * (region.n - 1)) {
521 region_idx = region.n - 1;
522 } else {
523 region_idx = offset / region.stride;
526 return region_trees + region_idx * tree_size;
529 void tcg_tb_insert(TranslationBlock *tb)
531 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
533 qemu_mutex_lock(&rt->lock);
534 g_tree_insert(rt->tree, &tb->tc, tb);
535 qemu_mutex_unlock(&rt->lock);
538 void tcg_tb_remove(TranslationBlock *tb)
540 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
542 qemu_mutex_lock(&rt->lock);
543 g_tree_remove(rt->tree, &tb->tc);
544 qemu_mutex_unlock(&rt->lock);
548 * Find the TB 'tb' such that
549 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
550 * Return NULL if not found.
552 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
554 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
555 TranslationBlock *tb;
556 struct tb_tc s = { .ptr = (void *)tc_ptr };
558 qemu_mutex_lock(&rt->lock);
559 tb = g_tree_lookup(rt->tree, &s);
560 qemu_mutex_unlock(&rt->lock);
561 return tb;
564 static void tcg_region_tree_lock_all(void)
566 size_t i;
568 for (i = 0; i < region.n; i++) {
569 struct tcg_region_tree *rt = region_trees + i * tree_size;
571 qemu_mutex_lock(&rt->lock);
575 static void tcg_region_tree_unlock_all(void)
577 size_t i;
579 for (i = 0; i < region.n; i++) {
580 struct tcg_region_tree *rt = region_trees + i * tree_size;
582 qemu_mutex_unlock(&rt->lock);
586 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
588 size_t i;
590 tcg_region_tree_lock_all();
591 for (i = 0; i < region.n; i++) {
592 struct tcg_region_tree *rt = region_trees + i * tree_size;
594 g_tree_foreach(rt->tree, func, user_data);
596 tcg_region_tree_unlock_all();
599 size_t tcg_nb_tbs(void)
601 size_t nb_tbs = 0;
602 size_t i;
604 tcg_region_tree_lock_all();
605 for (i = 0; i < region.n; i++) {
606 struct tcg_region_tree *rt = region_trees + i * tree_size;
608 nb_tbs += g_tree_nnodes(rt->tree);
610 tcg_region_tree_unlock_all();
611 return nb_tbs;
614 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
616 TranslationBlock *tb = v;
618 tb_destroy(tb);
619 return FALSE;
622 static void tcg_region_tree_reset_all(void)
624 size_t i;
626 tcg_region_tree_lock_all();
627 for (i = 0; i < region.n; i++) {
628 struct tcg_region_tree *rt = region_trees + i * tree_size;
630 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
631 /* Increment the refcount first so that destroy acts as a reset */
632 g_tree_ref(rt->tree);
633 g_tree_destroy(rt->tree);
635 tcg_region_tree_unlock_all();
638 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
640 void *start, *end;
642 start = region.start_aligned + curr_region * region.stride;
643 end = start + region.size;
645 if (curr_region == 0) {
646 start = region.start;
648 if (curr_region == region.n - 1) {
649 end = region.end;
652 *pstart = start;
653 *pend = end;
656 static void tcg_region_assign(TCGContext *s, size_t curr_region)
658 void *start, *end;
660 tcg_region_bounds(curr_region, &start, &end);
662 s->code_gen_buffer = start;
663 s->code_gen_ptr = start;
664 s->code_gen_buffer_size = end - start;
665 s->code_gen_highwater = end - TCG_HIGHWATER;
668 static bool tcg_region_alloc__locked(TCGContext *s)
670 if (region.current == region.n) {
671 return true;
673 tcg_region_assign(s, region.current);
674 region.current++;
675 return false;
679 * Request a new region once the one in use has filled up.
680 * Returns true on error.
682 static bool tcg_region_alloc(TCGContext *s)
684 bool err;
685 /* read the region size now; alloc__locked will overwrite it on success */
686 size_t size_full = s->code_gen_buffer_size;
688 qemu_mutex_lock(&region.lock);
689 err = tcg_region_alloc__locked(s);
690 if (!err) {
691 region.agg_size_full += size_full - TCG_HIGHWATER;
693 qemu_mutex_unlock(&region.lock);
694 return err;
698 * Perform a context's first region allocation.
699 * This function does _not_ increment region.agg_size_full.
701 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
703 return tcg_region_alloc__locked(s);
706 /* Call from a safe-work context */
707 void tcg_region_reset_all(void)
709 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
710 unsigned int i;
712 qemu_mutex_lock(&region.lock);
713 region.current = 0;
714 region.agg_size_full = 0;
716 for (i = 0; i < n_ctxs; i++) {
717 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
718 bool err = tcg_region_initial_alloc__locked(s);
720 g_assert(!err);
722 qemu_mutex_unlock(&region.lock);
724 tcg_region_tree_reset_all();
727 #ifdef CONFIG_USER_ONLY
728 static size_t tcg_n_regions(void)
730 return 1;
732 #else
734 * It is likely that some vCPUs will translate more code than others, so we
735 * first try to set more regions than max_cpus, with those regions being of
736 * reasonable size. If that's not possible we make do by evenly dividing
737 * the code_gen_buffer among the vCPUs.
739 static size_t tcg_n_regions(void)
741 size_t i;
743 /* Use a single region if all we have is one vCPU thread */
744 #if !defined(CONFIG_USER_ONLY)
745 MachineState *ms = MACHINE(qdev_get_machine());
746 unsigned int max_cpus = ms->smp.max_cpus;
747 #endif
748 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
749 return 1;
752 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
753 for (i = 8; i > 0; i--) {
754 size_t regions_per_thread = i;
755 size_t region_size;
757 region_size = tcg_init_ctx.code_gen_buffer_size;
758 region_size /= max_cpus * regions_per_thread;
760 if (region_size >= 2 * 1024u * 1024) {
761 return max_cpus * regions_per_thread;
764 /* If we can't, then just allocate one region per vCPU thread */
765 return max_cpus;
767 #endif
770 * Initializes region partitioning.
772 * Called at init time from the parent thread (i.e. the one calling
773 * tcg_context_init), after the target's TCG globals have been set.
775 * Region partitioning works by splitting code_gen_buffer into separate regions,
776 * and then assigning regions to TCG threads so that the threads can translate
777 * code in parallel without synchronization.
779 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
780 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
781 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
782 * must have been parsed before calling this function, since it calls
783 * qemu_tcg_mttcg_enabled().
785 * In user-mode we use a single region. Having multiple regions in user-mode
786 * is not supported, because the number of vCPU threads (recall that each thread
787 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
788 * OS, and usually this number is huge (tens of thousands is not uncommon).
789 * Thus, given this large bound on the number of vCPU threads and the fact
790 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
791 * that the availability of at least one region per vCPU thread.
793 * However, this user-mode limitation is unlikely to be a significant problem
794 * in practice. Multi-threaded guests share most if not all of their translated
795 * code, which makes parallel code generation less appealing than in softmmu.
797 void tcg_region_init(void)
799 void *buf = tcg_init_ctx.code_gen_buffer;
800 void *aligned;
801 size_t size = tcg_init_ctx.code_gen_buffer_size;
802 size_t page_size = qemu_real_host_page_size;
803 size_t region_size;
804 size_t n_regions;
805 size_t i;
806 uintptr_t splitwx_diff;
808 n_regions = tcg_n_regions();
810 /* The first region will be 'aligned - buf' bytes larger than the others */
811 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
812 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
814 * Make region_size a multiple of page_size, using aligned as the start.
815 * As a result of this we might end up with a few extra pages at the end of
816 * the buffer; we will assign those to the last region.
818 region_size = (size - (aligned - buf)) / n_regions;
819 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
821 /* A region must have at least 2 pages; one code, one guard */
822 g_assert(region_size >= 2 * page_size);
824 /* init the region struct */
825 qemu_mutex_init(&region.lock);
826 region.n = n_regions;
827 region.size = region_size - page_size;
828 region.stride = region_size;
829 region.start = buf;
830 region.start_aligned = aligned;
831 /* page-align the end, since its last page will be a guard page */
832 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
833 /* account for that last guard page */
834 region.end -= page_size;
836 /* set guard pages */
837 splitwx_diff = tcg_splitwx_diff;
838 for (i = 0; i < region.n; i++) {
839 void *start, *end;
840 int rc;
842 tcg_region_bounds(i, &start, &end);
843 rc = qemu_mprotect_none(end, page_size);
844 g_assert(!rc);
845 if (splitwx_diff) {
846 rc = qemu_mprotect_none(end + splitwx_diff, page_size);
847 g_assert(!rc);
851 tcg_region_trees_init();
853 /* In user-mode we support only one ctx, so do the initial allocation now */
854 #ifdef CONFIG_USER_ONLY
856 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
858 g_assert(!err);
860 #endif
863 #ifdef CONFIG_DEBUG_TCG
864 const void *tcg_splitwx_to_rx(void *rw)
866 /* Pass NULL pointers unchanged. */
867 if (rw) {
868 g_assert(in_code_gen_buffer(rw));
869 rw += tcg_splitwx_diff;
871 return rw;
874 void *tcg_splitwx_to_rw(const void *rx)
876 /* Pass NULL pointers unchanged. */
877 if (rx) {
878 rx -= tcg_splitwx_diff;
879 /* Assert that we end with a pointer in the rw region. */
880 g_assert(in_code_gen_buffer(rx));
882 return (void *)rx;
884 #endif /* CONFIG_DEBUG_TCG */
886 static void alloc_tcg_plugin_context(TCGContext *s)
888 #ifdef CONFIG_PLUGIN
889 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
890 s->plugin_tb->insns =
891 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
892 #endif
896 * All TCG threads except the parent (i.e. the one that called tcg_context_init
897 * and registered the target's TCG globals) must register with this function
898 * before initiating translation.
900 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
901 * of tcg_region_init() for the reasoning behind this.
903 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
904 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
905 * is not used anymore for translation once this function is called.
907 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
908 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
910 #ifdef CONFIG_USER_ONLY
911 void tcg_register_thread(void)
913 tcg_ctx = &tcg_init_ctx;
915 #else
916 void tcg_register_thread(void)
918 MachineState *ms = MACHINE(qdev_get_machine());
919 TCGContext *s = g_malloc(sizeof(*s));
920 unsigned int i, n;
921 bool err;
923 *s = tcg_init_ctx;
925 /* Relink mem_base. */
926 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
927 if (tcg_init_ctx.temps[i].mem_base) {
928 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
929 tcg_debug_assert(b >= 0 && b < n);
930 s->temps[i].mem_base = &s->temps[b];
934 /* Claim an entry in tcg_ctxs */
935 n = qatomic_fetch_inc(&n_tcg_ctxs);
936 g_assert(n < ms->smp.max_cpus);
937 qatomic_set(&tcg_ctxs[n], s);
939 if (n > 0) {
940 alloc_tcg_plugin_context(s);
943 tcg_ctx = s;
944 qemu_mutex_lock(&region.lock);
945 err = tcg_region_initial_alloc__locked(tcg_ctx);
946 g_assert(!err);
947 qemu_mutex_unlock(&region.lock);
949 #endif /* !CONFIG_USER_ONLY */
952 * Returns the size (in bytes) of all translated code (i.e. from all regions)
953 * currently in the cache.
954 * See also: tcg_code_capacity()
955 * Do not confuse with tcg_current_code_size(); that one applies to a single
956 * TCG context.
958 size_t tcg_code_size(void)
960 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
961 unsigned int i;
962 size_t total;
964 qemu_mutex_lock(&region.lock);
965 total = region.agg_size_full;
966 for (i = 0; i < n_ctxs; i++) {
967 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
968 size_t size;
970 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
971 g_assert(size <= s->code_gen_buffer_size);
972 total += size;
974 qemu_mutex_unlock(&region.lock);
975 return total;
979 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
980 * regions.
981 * See also: tcg_code_size()
983 size_t tcg_code_capacity(void)
985 size_t guard_size, capacity;
987 /* no need for synchronization; these variables are set at init time */
988 guard_size = region.stride - region.size;
989 capacity = region.end + guard_size - region.start;
990 capacity -= region.n * (guard_size + TCG_HIGHWATER);
991 return capacity;
994 size_t tcg_tb_phys_invalidate_count(void)
996 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
997 unsigned int i;
998 size_t total = 0;
1000 for (i = 0; i < n_ctxs; i++) {
1001 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
1003 total += qatomic_read(&s->tb_phys_invalidate_count);
1005 return total;
1008 /* pool based memory allocation */
1009 void *tcg_malloc_internal(TCGContext *s, int size)
1011 TCGPool *p;
1012 int pool_size;
1014 if (size > TCG_POOL_CHUNK_SIZE) {
1015 /* big malloc: insert a new pool (XXX: could optimize) */
1016 p = g_malloc(sizeof(TCGPool) + size);
1017 p->size = size;
1018 p->next = s->pool_first_large;
1019 s->pool_first_large = p;
1020 return p->data;
1021 } else {
1022 p = s->pool_current;
1023 if (!p) {
1024 p = s->pool_first;
1025 if (!p)
1026 goto new_pool;
1027 } else {
1028 if (!p->next) {
1029 new_pool:
1030 pool_size = TCG_POOL_CHUNK_SIZE;
1031 p = g_malloc(sizeof(TCGPool) + pool_size);
1032 p->size = pool_size;
1033 p->next = NULL;
1034 if (s->pool_current)
1035 s->pool_current->next = p;
1036 else
1037 s->pool_first = p;
1038 } else {
1039 p = p->next;
1043 s->pool_current = p;
1044 s->pool_cur = p->data + size;
1045 s->pool_end = p->data + p->size;
1046 return p->data;
1049 void tcg_pool_reset(TCGContext *s)
1051 TCGPool *p, *t;
1052 for (p = s->pool_first_large; p; p = t) {
1053 t = p->next;
1054 g_free(p);
1056 s->pool_first_large = NULL;
1057 s->pool_cur = s->pool_end = NULL;
1058 s->pool_current = NULL;
1061 typedef struct TCGHelperInfo {
1062 void *func;
1063 const char *name;
1064 unsigned flags;
1065 unsigned sizemask;
1066 } TCGHelperInfo;
1068 #include "exec/helper-proto.h"
1070 static const TCGHelperInfo all_helpers[] = {
1071 #include "exec/helper-tcg.h"
1073 static GHashTable *helper_table;
1075 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1076 static void process_op_defs(TCGContext *s);
1077 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1078 TCGReg reg, const char *name);
1080 void tcg_context_init(TCGContext *s)
1082 int op, total_args, n, i;
1083 TCGOpDef *def;
1084 TCGArgConstraint *args_ct;
1085 TCGTemp *ts;
1087 memset(s, 0, sizeof(*s));
1088 s->nb_globals = 0;
1090 /* Count total number of arguments and allocate the corresponding
1091 space */
1092 total_args = 0;
1093 for(op = 0; op < NB_OPS; op++) {
1094 def = &tcg_op_defs[op];
1095 n = def->nb_iargs + def->nb_oargs;
1096 total_args += n;
1099 args_ct = g_new0(TCGArgConstraint, total_args);
1101 for(op = 0; op < NB_OPS; op++) {
1102 def = &tcg_op_defs[op];
1103 def->args_ct = args_ct;
1104 n = def->nb_iargs + def->nb_oargs;
1105 args_ct += n;
1108 /* Register helpers. */
1109 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
1110 helper_table = g_hash_table_new(NULL, NULL);
1112 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1113 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1114 (gpointer)&all_helpers[i]);
1117 tcg_target_init(s);
1118 process_op_defs(s);
1120 /* Reverse the order of the saved registers, assuming they're all at
1121 the start of tcg_target_reg_alloc_order. */
1122 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1123 int r = tcg_target_reg_alloc_order[n];
1124 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1125 break;
1128 for (i = 0; i < n; ++i) {
1129 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1131 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1132 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1135 alloc_tcg_plugin_context(s);
1137 tcg_ctx = s;
1139 * In user-mode we simply share the init context among threads, since we
1140 * use a single region. See the documentation tcg_region_init() for the
1141 * reasoning behind this.
1142 * In softmmu we will have at most max_cpus TCG threads.
1144 #ifdef CONFIG_USER_ONLY
1145 tcg_ctxs = &tcg_ctx;
1146 n_tcg_ctxs = 1;
1147 #else
1148 MachineState *ms = MACHINE(qdev_get_machine());
1149 unsigned int max_cpus = ms->smp.max_cpus;
1150 tcg_ctxs = g_new(TCGContext *, max_cpus);
1151 #endif
1153 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1154 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1155 cpu_env = temp_tcgv_ptr(ts);
1159 * Allocate TBs right before their corresponding translated code, making
1160 * sure that TBs and code are on different cache lines.
1162 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1164 uintptr_t align = qemu_icache_linesize;
1165 TranslationBlock *tb;
1166 void *next;
1168 retry:
1169 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1170 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1172 if (unlikely(next > s->code_gen_highwater)) {
1173 if (tcg_region_alloc(s)) {
1174 return NULL;
1176 goto retry;
1178 qatomic_set(&s->code_gen_ptr, next);
1179 s->data_gen_ptr = NULL;
1180 return tb;
1183 void tcg_prologue_init(TCGContext *s)
1185 size_t prologue_size, total_size;
1186 void *buf0, *buf1;
1188 /* Put the prologue at the beginning of code_gen_buffer. */
1189 buf0 = s->code_gen_buffer;
1190 total_size = s->code_gen_buffer_size;
1191 s->code_ptr = buf0;
1192 s->code_buf = buf0;
1193 s->data_gen_ptr = NULL;
1196 * The region trees are not yet configured, but tcg_splitwx_to_rx
1197 * needs the bounds for an assert.
1199 region.start = buf0;
1200 region.end = buf0 + total_size;
1202 #ifndef CONFIG_TCG_INTERPRETER
1203 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1204 #endif
1206 /* Compute a high-water mark, at which we voluntarily flush the buffer
1207 and start over. The size here is arbitrary, significantly larger
1208 than we expect the code generation for any one opcode to require. */
1209 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1211 #ifdef TCG_TARGET_NEED_POOL_LABELS
1212 s->pool_labels = NULL;
1213 #endif
1215 qemu_thread_jit_write();
1216 /* Generate the prologue. */
1217 tcg_target_qemu_prologue(s);
1219 #ifdef TCG_TARGET_NEED_POOL_LABELS
1220 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1222 int result = tcg_out_pool_finalize(s);
1223 tcg_debug_assert(result == 0);
1225 #endif
1227 buf1 = s->code_ptr;
1228 #ifndef CONFIG_TCG_INTERPRETER
1229 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1230 tcg_ptr_byte_diff(buf1, buf0));
1231 #endif
1233 /* Deduct the prologue from the buffer. */
1234 prologue_size = tcg_current_code_size(s);
1235 s->code_gen_ptr = buf1;
1236 s->code_gen_buffer = buf1;
1237 s->code_buf = buf1;
1238 total_size -= prologue_size;
1239 s->code_gen_buffer_size = total_size;
1241 tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1243 #ifdef DEBUG_DISAS
1244 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1245 FILE *logfile = qemu_log_lock();
1246 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1247 if (s->data_gen_ptr) {
1248 size_t code_size = s->data_gen_ptr - buf0;
1249 size_t data_size = prologue_size - code_size;
1250 size_t i;
1252 log_disas(buf0, code_size);
1254 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1255 if (sizeof(tcg_target_ulong) == 8) {
1256 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1257 (uintptr_t)s->data_gen_ptr + i,
1258 *(uint64_t *)(s->data_gen_ptr + i));
1259 } else {
1260 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
1261 (uintptr_t)s->data_gen_ptr + i,
1262 *(uint32_t *)(s->data_gen_ptr + i));
1265 } else {
1266 log_disas(buf0, prologue_size);
1268 qemu_log("\n");
1269 qemu_log_flush();
1270 qemu_log_unlock(logfile);
1272 #endif
1274 /* Assert that goto_ptr is implemented completely. */
1275 if (TCG_TARGET_HAS_goto_ptr) {
1276 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1280 void tcg_func_start(TCGContext *s)
1282 tcg_pool_reset(s);
1283 s->nb_temps = s->nb_globals;
1285 /* No temps have been previously allocated for size or locality. */
1286 memset(s->free_temps, 0, sizeof(s->free_temps));
1288 /* No constant temps have been previously allocated. */
1289 for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1290 if (s->const_table[i]) {
1291 g_hash_table_remove_all(s->const_table[i]);
1295 s->nb_ops = 0;
1296 s->nb_labels = 0;
1297 s->current_frame_offset = s->frame_start;
1299 #ifdef CONFIG_DEBUG_TCG
1300 s->goto_tb_issue_mask = 0;
1301 #endif
1303 QTAILQ_INIT(&s->ops);
1304 QTAILQ_INIT(&s->free_ops);
1305 QSIMPLEQ_INIT(&s->labels);
1308 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1310 int n = s->nb_temps++;
1312 if (n >= TCG_MAX_TEMPS) {
1313 /* Signal overflow, starting over with fewer guest insns. */
1314 siglongjmp(s->jmp_trans, -2);
1316 return memset(&s->temps[n], 0, sizeof(TCGTemp));
1319 static TCGTemp *tcg_global_alloc(TCGContext *s)
1321 TCGTemp *ts;
1323 tcg_debug_assert(s->nb_globals == s->nb_temps);
1324 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1325 s->nb_globals++;
1326 ts = tcg_temp_alloc(s);
1327 ts->kind = TEMP_GLOBAL;
1329 return ts;
1332 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1333 TCGReg reg, const char *name)
1335 TCGTemp *ts;
1337 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1338 tcg_abort();
1341 ts = tcg_global_alloc(s);
1342 ts->base_type = type;
1343 ts->type = type;
1344 ts->kind = TEMP_FIXED;
1345 ts->reg = reg;
1346 ts->name = name;
1347 tcg_regset_set_reg(s->reserved_regs, reg);
1349 return ts;
1352 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1354 s->frame_start = start;
1355 s->frame_end = start + size;
1356 s->frame_temp
1357 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1360 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1361 intptr_t offset, const char *name)
1363 TCGContext *s = tcg_ctx;
1364 TCGTemp *base_ts = tcgv_ptr_temp(base);
1365 TCGTemp *ts = tcg_global_alloc(s);
1366 int indirect_reg = 0, bigendian = 0;
1367 #ifdef HOST_WORDS_BIGENDIAN
1368 bigendian = 1;
1369 #endif
1371 switch (base_ts->kind) {
1372 case TEMP_FIXED:
1373 break;
1374 case TEMP_GLOBAL:
1375 /* We do not support double-indirect registers. */
1376 tcg_debug_assert(!base_ts->indirect_reg);
1377 base_ts->indirect_base = 1;
1378 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1379 ? 2 : 1);
1380 indirect_reg = 1;
1381 break;
1382 default:
1383 g_assert_not_reached();
1386 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1387 TCGTemp *ts2 = tcg_global_alloc(s);
1388 char buf[64];
1390 ts->base_type = TCG_TYPE_I64;
1391 ts->type = TCG_TYPE_I32;
1392 ts->indirect_reg = indirect_reg;
1393 ts->mem_allocated = 1;
1394 ts->mem_base = base_ts;
1395 ts->mem_offset = offset + bigendian * 4;
1396 pstrcpy(buf, sizeof(buf), name);
1397 pstrcat(buf, sizeof(buf), "_0");
1398 ts->name = strdup(buf);
1400 tcg_debug_assert(ts2 == ts + 1);
1401 ts2->base_type = TCG_TYPE_I64;
1402 ts2->type = TCG_TYPE_I32;
1403 ts2->indirect_reg = indirect_reg;
1404 ts2->mem_allocated = 1;
1405 ts2->mem_base = base_ts;
1406 ts2->mem_offset = offset + (1 - bigendian) * 4;
1407 pstrcpy(buf, sizeof(buf), name);
1408 pstrcat(buf, sizeof(buf), "_1");
1409 ts2->name = strdup(buf);
1410 } else {
1411 ts->base_type = type;
1412 ts->type = type;
1413 ts->indirect_reg = indirect_reg;
1414 ts->mem_allocated = 1;
1415 ts->mem_base = base_ts;
1416 ts->mem_offset = offset;
1417 ts->name = name;
1419 return ts;
1422 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1424 TCGContext *s = tcg_ctx;
1425 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1426 TCGTemp *ts;
1427 int idx, k;
1429 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1430 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1431 if (idx < TCG_MAX_TEMPS) {
1432 /* There is already an available temp with the right type. */
1433 clear_bit(idx, s->free_temps[k].l);
1435 ts = &s->temps[idx];
1436 ts->temp_allocated = 1;
1437 tcg_debug_assert(ts->base_type == type);
1438 tcg_debug_assert(ts->kind == kind);
1439 } else {
1440 ts = tcg_temp_alloc(s);
1441 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1442 TCGTemp *ts2 = tcg_temp_alloc(s);
1444 ts->base_type = type;
1445 ts->type = TCG_TYPE_I32;
1446 ts->temp_allocated = 1;
1447 ts->kind = kind;
1449 tcg_debug_assert(ts2 == ts + 1);
1450 ts2->base_type = TCG_TYPE_I64;
1451 ts2->type = TCG_TYPE_I32;
1452 ts2->temp_allocated = 1;
1453 ts2->kind = kind;
1454 } else {
1455 ts->base_type = type;
1456 ts->type = type;
1457 ts->temp_allocated = 1;
1458 ts->kind = kind;
1462 #if defined(CONFIG_DEBUG_TCG)
1463 s->temps_in_use++;
1464 #endif
1465 return ts;
1468 TCGv_vec tcg_temp_new_vec(TCGType type)
1470 TCGTemp *t;
1472 #ifdef CONFIG_DEBUG_TCG
1473 switch (type) {
1474 case TCG_TYPE_V64:
1475 assert(TCG_TARGET_HAS_v64);
1476 break;
1477 case TCG_TYPE_V128:
1478 assert(TCG_TARGET_HAS_v128);
1479 break;
1480 case TCG_TYPE_V256:
1481 assert(TCG_TARGET_HAS_v256);
1482 break;
1483 default:
1484 g_assert_not_reached();
1486 #endif
1488 t = tcg_temp_new_internal(type, 0);
1489 return temp_tcgv_vec(t);
1492 /* Create a new temp of the same type as an existing temp. */
1493 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1495 TCGTemp *t = tcgv_vec_temp(match);
1497 tcg_debug_assert(t->temp_allocated != 0);
1499 t = tcg_temp_new_internal(t->base_type, 0);
1500 return temp_tcgv_vec(t);
1503 void tcg_temp_free_internal(TCGTemp *ts)
1505 TCGContext *s = tcg_ctx;
1506 int k, idx;
1508 /* In order to simplify users of tcg_constant_*, silently ignore free. */
1509 if (ts->kind == TEMP_CONST) {
1510 return;
1513 #if defined(CONFIG_DEBUG_TCG)
1514 s->temps_in_use--;
1515 if (s->temps_in_use < 0) {
1516 fprintf(stderr, "More temporaries freed than allocated!\n");
1518 #endif
1520 tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1521 tcg_debug_assert(ts->temp_allocated != 0);
1522 ts->temp_allocated = 0;
1524 idx = temp_idx(ts);
1525 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1526 set_bit(idx, s->free_temps[k].l);
1529 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1531 TCGContext *s = tcg_ctx;
1532 GHashTable *h = s->const_table[type];
1533 TCGTemp *ts;
1535 if (h == NULL) {
1536 h = g_hash_table_new(g_int64_hash, g_int64_equal);
1537 s->const_table[type] = h;
1540 ts = g_hash_table_lookup(h, &val);
1541 if (ts == NULL) {
1542 ts = tcg_temp_alloc(s);
1544 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1545 TCGTemp *ts2 = tcg_temp_alloc(s);
1547 ts->base_type = TCG_TYPE_I64;
1548 ts->type = TCG_TYPE_I32;
1549 ts->kind = TEMP_CONST;
1550 ts->temp_allocated = 1;
1552 * Retain the full value of the 64-bit constant in the low
1553 * part, so that the hash table works. Actual uses will
1554 * truncate the value to the low part.
1556 ts->val = val;
1558 tcg_debug_assert(ts2 == ts + 1);
1559 ts2->base_type = TCG_TYPE_I64;
1560 ts2->type = TCG_TYPE_I32;
1561 ts2->kind = TEMP_CONST;
1562 ts2->temp_allocated = 1;
1563 ts2->val = val >> 32;
1564 } else {
1565 ts->base_type = type;
1566 ts->type = type;
1567 ts->kind = TEMP_CONST;
1568 ts->temp_allocated = 1;
1569 ts->val = val;
1571 g_hash_table_insert(h, &ts->val, ts);
1574 return ts;
1577 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1579 val = dup_const(vece, val);
1580 return temp_tcgv_vec(tcg_constant_internal(type, val));
1583 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1585 TCGTemp *t = tcgv_vec_temp(match);
1587 tcg_debug_assert(t->temp_allocated != 0);
1588 return tcg_constant_vec(t->base_type, vece, val);
1591 TCGv_i32 tcg_const_i32(int32_t val)
1593 TCGv_i32 t0;
1594 t0 = tcg_temp_new_i32();
1595 tcg_gen_movi_i32(t0, val);
1596 return t0;
1599 TCGv_i64 tcg_const_i64(int64_t val)
1601 TCGv_i64 t0;
1602 t0 = tcg_temp_new_i64();
1603 tcg_gen_movi_i64(t0, val);
1604 return t0;
1607 TCGv_i32 tcg_const_local_i32(int32_t val)
1609 TCGv_i32 t0;
1610 t0 = tcg_temp_local_new_i32();
1611 tcg_gen_movi_i32(t0, val);
1612 return t0;
1615 TCGv_i64 tcg_const_local_i64(int64_t val)
1617 TCGv_i64 t0;
1618 t0 = tcg_temp_local_new_i64();
1619 tcg_gen_movi_i64(t0, val);
1620 return t0;
1623 #if defined(CONFIG_DEBUG_TCG)
1624 void tcg_clear_temp_count(void)
1626 TCGContext *s = tcg_ctx;
1627 s->temps_in_use = 0;
1630 int tcg_check_temp_count(void)
1632 TCGContext *s = tcg_ctx;
1633 if (s->temps_in_use) {
1634 /* Clear the count so that we don't give another
1635 * warning immediately next time around.
1637 s->temps_in_use = 0;
1638 return 1;
1640 return 0;
1642 #endif
1644 /* Return true if OP may appear in the opcode stream.
1645 Test the runtime variable that controls each opcode. */
1646 bool tcg_op_supported(TCGOpcode op)
1648 const bool have_vec
1649 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1651 switch (op) {
1652 case INDEX_op_discard:
1653 case INDEX_op_set_label:
1654 case INDEX_op_call:
1655 case INDEX_op_br:
1656 case INDEX_op_mb:
1657 case INDEX_op_insn_start:
1658 case INDEX_op_exit_tb:
1659 case INDEX_op_goto_tb:
1660 case INDEX_op_qemu_ld_i32:
1661 case INDEX_op_qemu_st_i32:
1662 case INDEX_op_qemu_ld_i64:
1663 case INDEX_op_qemu_st_i64:
1664 return true;
1666 case INDEX_op_qemu_st8_i32:
1667 return TCG_TARGET_HAS_qemu_st8_i32;
1669 case INDEX_op_goto_ptr:
1670 return TCG_TARGET_HAS_goto_ptr;
1672 case INDEX_op_mov_i32:
1673 case INDEX_op_setcond_i32:
1674 case INDEX_op_brcond_i32:
1675 case INDEX_op_ld8u_i32:
1676 case INDEX_op_ld8s_i32:
1677 case INDEX_op_ld16u_i32:
1678 case INDEX_op_ld16s_i32:
1679 case INDEX_op_ld_i32:
1680 case INDEX_op_st8_i32:
1681 case INDEX_op_st16_i32:
1682 case INDEX_op_st_i32:
1683 case INDEX_op_add_i32:
1684 case INDEX_op_sub_i32:
1685 case INDEX_op_mul_i32:
1686 case INDEX_op_and_i32:
1687 case INDEX_op_or_i32:
1688 case INDEX_op_xor_i32:
1689 case INDEX_op_shl_i32:
1690 case INDEX_op_shr_i32:
1691 case INDEX_op_sar_i32:
1692 return true;
1694 case INDEX_op_movcond_i32:
1695 return TCG_TARGET_HAS_movcond_i32;
1696 case INDEX_op_div_i32:
1697 case INDEX_op_divu_i32:
1698 return TCG_TARGET_HAS_div_i32;
1699 case INDEX_op_rem_i32:
1700 case INDEX_op_remu_i32:
1701 return TCG_TARGET_HAS_rem_i32;
1702 case INDEX_op_div2_i32:
1703 case INDEX_op_divu2_i32:
1704 return TCG_TARGET_HAS_div2_i32;
1705 case INDEX_op_rotl_i32:
1706 case INDEX_op_rotr_i32:
1707 return TCG_TARGET_HAS_rot_i32;
1708 case INDEX_op_deposit_i32:
1709 return TCG_TARGET_HAS_deposit_i32;
1710 case INDEX_op_extract_i32:
1711 return TCG_TARGET_HAS_extract_i32;
1712 case INDEX_op_sextract_i32:
1713 return TCG_TARGET_HAS_sextract_i32;
1714 case INDEX_op_extract2_i32:
1715 return TCG_TARGET_HAS_extract2_i32;
1716 case INDEX_op_add2_i32:
1717 return TCG_TARGET_HAS_add2_i32;
1718 case INDEX_op_sub2_i32:
1719 return TCG_TARGET_HAS_sub2_i32;
1720 case INDEX_op_mulu2_i32:
1721 return TCG_TARGET_HAS_mulu2_i32;
1722 case INDEX_op_muls2_i32:
1723 return TCG_TARGET_HAS_muls2_i32;
1724 case INDEX_op_muluh_i32:
1725 return TCG_TARGET_HAS_muluh_i32;
1726 case INDEX_op_mulsh_i32:
1727 return TCG_TARGET_HAS_mulsh_i32;
1728 case INDEX_op_ext8s_i32:
1729 return TCG_TARGET_HAS_ext8s_i32;
1730 case INDEX_op_ext16s_i32:
1731 return TCG_TARGET_HAS_ext16s_i32;
1732 case INDEX_op_ext8u_i32:
1733 return TCG_TARGET_HAS_ext8u_i32;
1734 case INDEX_op_ext16u_i32:
1735 return TCG_TARGET_HAS_ext16u_i32;
1736 case INDEX_op_bswap16_i32:
1737 return TCG_TARGET_HAS_bswap16_i32;
1738 case INDEX_op_bswap32_i32:
1739 return TCG_TARGET_HAS_bswap32_i32;
1740 case INDEX_op_not_i32:
1741 return TCG_TARGET_HAS_not_i32;
1742 case INDEX_op_neg_i32:
1743 return TCG_TARGET_HAS_neg_i32;
1744 case INDEX_op_andc_i32:
1745 return TCG_TARGET_HAS_andc_i32;
1746 case INDEX_op_orc_i32:
1747 return TCG_TARGET_HAS_orc_i32;
1748 case INDEX_op_eqv_i32:
1749 return TCG_TARGET_HAS_eqv_i32;
1750 case INDEX_op_nand_i32:
1751 return TCG_TARGET_HAS_nand_i32;
1752 case INDEX_op_nor_i32:
1753 return TCG_TARGET_HAS_nor_i32;
1754 case INDEX_op_clz_i32:
1755 return TCG_TARGET_HAS_clz_i32;
1756 case INDEX_op_ctz_i32:
1757 return TCG_TARGET_HAS_ctz_i32;
1758 case INDEX_op_ctpop_i32:
1759 return TCG_TARGET_HAS_ctpop_i32;
1761 case INDEX_op_brcond2_i32:
1762 case INDEX_op_setcond2_i32:
1763 return TCG_TARGET_REG_BITS == 32;
1765 case INDEX_op_mov_i64:
1766 case INDEX_op_setcond_i64:
1767 case INDEX_op_brcond_i64:
1768 case INDEX_op_ld8u_i64:
1769 case INDEX_op_ld8s_i64:
1770 case INDEX_op_ld16u_i64:
1771 case INDEX_op_ld16s_i64:
1772 case INDEX_op_ld32u_i64:
1773 case INDEX_op_ld32s_i64:
1774 case INDEX_op_ld_i64:
1775 case INDEX_op_st8_i64:
1776 case INDEX_op_st16_i64:
1777 case INDEX_op_st32_i64:
1778 case INDEX_op_st_i64:
1779 case INDEX_op_add_i64:
1780 case INDEX_op_sub_i64:
1781 case INDEX_op_mul_i64:
1782 case INDEX_op_and_i64:
1783 case INDEX_op_or_i64:
1784 case INDEX_op_xor_i64:
1785 case INDEX_op_shl_i64:
1786 case INDEX_op_shr_i64:
1787 case INDEX_op_sar_i64:
1788 case INDEX_op_ext_i32_i64:
1789 case INDEX_op_extu_i32_i64:
1790 return TCG_TARGET_REG_BITS == 64;
1792 case INDEX_op_movcond_i64:
1793 return TCG_TARGET_HAS_movcond_i64;
1794 case INDEX_op_div_i64:
1795 case INDEX_op_divu_i64:
1796 return TCG_TARGET_HAS_div_i64;
1797 case INDEX_op_rem_i64:
1798 case INDEX_op_remu_i64:
1799 return TCG_TARGET_HAS_rem_i64;
1800 case INDEX_op_div2_i64:
1801 case INDEX_op_divu2_i64:
1802 return TCG_TARGET_HAS_div2_i64;
1803 case INDEX_op_rotl_i64:
1804 case INDEX_op_rotr_i64:
1805 return TCG_TARGET_HAS_rot_i64;
1806 case INDEX_op_deposit_i64:
1807 return TCG_TARGET_HAS_deposit_i64;
1808 case INDEX_op_extract_i64:
1809 return TCG_TARGET_HAS_extract_i64;
1810 case INDEX_op_sextract_i64:
1811 return TCG_TARGET_HAS_sextract_i64;
1812 case INDEX_op_extract2_i64:
1813 return TCG_TARGET_HAS_extract2_i64;
1814 case INDEX_op_extrl_i64_i32:
1815 return TCG_TARGET_HAS_extrl_i64_i32;
1816 case INDEX_op_extrh_i64_i32:
1817 return TCG_TARGET_HAS_extrh_i64_i32;
1818 case INDEX_op_ext8s_i64:
1819 return TCG_TARGET_HAS_ext8s_i64;
1820 case INDEX_op_ext16s_i64:
1821 return TCG_TARGET_HAS_ext16s_i64;
1822 case INDEX_op_ext32s_i64:
1823 return TCG_TARGET_HAS_ext32s_i64;
1824 case INDEX_op_ext8u_i64:
1825 return TCG_TARGET_HAS_ext8u_i64;
1826 case INDEX_op_ext16u_i64:
1827 return TCG_TARGET_HAS_ext16u_i64;
1828 case INDEX_op_ext32u_i64:
1829 return TCG_TARGET_HAS_ext32u_i64;
1830 case INDEX_op_bswap16_i64:
1831 return TCG_TARGET_HAS_bswap16_i64;
1832 case INDEX_op_bswap32_i64:
1833 return TCG_TARGET_HAS_bswap32_i64;
1834 case INDEX_op_bswap64_i64:
1835 return TCG_TARGET_HAS_bswap64_i64;
1836 case INDEX_op_not_i64:
1837 return TCG_TARGET_HAS_not_i64;
1838 case INDEX_op_neg_i64:
1839 return TCG_TARGET_HAS_neg_i64;
1840 case INDEX_op_andc_i64:
1841 return TCG_TARGET_HAS_andc_i64;
1842 case INDEX_op_orc_i64:
1843 return TCG_TARGET_HAS_orc_i64;
1844 case INDEX_op_eqv_i64:
1845 return TCG_TARGET_HAS_eqv_i64;
1846 case INDEX_op_nand_i64:
1847 return TCG_TARGET_HAS_nand_i64;
1848 case INDEX_op_nor_i64:
1849 return TCG_TARGET_HAS_nor_i64;
1850 case INDEX_op_clz_i64:
1851 return TCG_TARGET_HAS_clz_i64;
1852 case INDEX_op_ctz_i64:
1853 return TCG_TARGET_HAS_ctz_i64;
1854 case INDEX_op_ctpop_i64:
1855 return TCG_TARGET_HAS_ctpop_i64;
1856 case INDEX_op_add2_i64:
1857 return TCG_TARGET_HAS_add2_i64;
1858 case INDEX_op_sub2_i64:
1859 return TCG_TARGET_HAS_sub2_i64;
1860 case INDEX_op_mulu2_i64:
1861 return TCG_TARGET_HAS_mulu2_i64;
1862 case INDEX_op_muls2_i64:
1863 return TCG_TARGET_HAS_muls2_i64;
1864 case INDEX_op_muluh_i64:
1865 return TCG_TARGET_HAS_muluh_i64;
1866 case INDEX_op_mulsh_i64:
1867 return TCG_TARGET_HAS_mulsh_i64;
1869 case INDEX_op_mov_vec:
1870 case INDEX_op_dup_vec:
1871 case INDEX_op_dupm_vec:
1872 case INDEX_op_ld_vec:
1873 case INDEX_op_st_vec:
1874 case INDEX_op_add_vec:
1875 case INDEX_op_sub_vec:
1876 case INDEX_op_and_vec:
1877 case INDEX_op_or_vec:
1878 case INDEX_op_xor_vec:
1879 case INDEX_op_cmp_vec:
1880 return have_vec;
1881 case INDEX_op_dup2_vec:
1882 return have_vec && TCG_TARGET_REG_BITS == 32;
1883 case INDEX_op_not_vec:
1884 return have_vec && TCG_TARGET_HAS_not_vec;
1885 case INDEX_op_neg_vec:
1886 return have_vec && TCG_TARGET_HAS_neg_vec;
1887 case INDEX_op_abs_vec:
1888 return have_vec && TCG_TARGET_HAS_abs_vec;
1889 case INDEX_op_andc_vec:
1890 return have_vec && TCG_TARGET_HAS_andc_vec;
1891 case INDEX_op_orc_vec:
1892 return have_vec && TCG_TARGET_HAS_orc_vec;
1893 case INDEX_op_mul_vec:
1894 return have_vec && TCG_TARGET_HAS_mul_vec;
1895 case INDEX_op_shli_vec:
1896 case INDEX_op_shri_vec:
1897 case INDEX_op_sari_vec:
1898 return have_vec && TCG_TARGET_HAS_shi_vec;
1899 case INDEX_op_shls_vec:
1900 case INDEX_op_shrs_vec:
1901 case INDEX_op_sars_vec:
1902 return have_vec && TCG_TARGET_HAS_shs_vec;
1903 case INDEX_op_shlv_vec:
1904 case INDEX_op_shrv_vec:
1905 case INDEX_op_sarv_vec:
1906 return have_vec && TCG_TARGET_HAS_shv_vec;
1907 case INDEX_op_rotli_vec:
1908 return have_vec && TCG_TARGET_HAS_roti_vec;
1909 case INDEX_op_rotls_vec:
1910 return have_vec && TCG_TARGET_HAS_rots_vec;
1911 case INDEX_op_rotlv_vec:
1912 case INDEX_op_rotrv_vec:
1913 return have_vec && TCG_TARGET_HAS_rotv_vec;
1914 case INDEX_op_ssadd_vec:
1915 case INDEX_op_usadd_vec:
1916 case INDEX_op_sssub_vec:
1917 case INDEX_op_ussub_vec:
1918 return have_vec && TCG_TARGET_HAS_sat_vec;
1919 case INDEX_op_smin_vec:
1920 case INDEX_op_umin_vec:
1921 case INDEX_op_smax_vec:
1922 case INDEX_op_umax_vec:
1923 return have_vec && TCG_TARGET_HAS_minmax_vec;
1924 case INDEX_op_bitsel_vec:
1925 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1926 case INDEX_op_cmpsel_vec:
1927 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1929 default:
1930 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1931 return true;
1935 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1936 and endian swap. Maybe it would be better to do the alignment
1937 and endian swap in tcg_reg_alloc_call(). */
1938 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1940 int i, real_args, nb_rets, pi;
1941 unsigned sizemask, flags;
1942 TCGHelperInfo *info;
1943 TCGOp *op;
1945 info = g_hash_table_lookup(helper_table, (gpointer)func);
1946 flags = info->flags;
1947 sizemask = info->sizemask;
1949 #ifdef CONFIG_PLUGIN
1950 /* detect non-plugin helpers */
1951 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1952 tcg_ctx->plugin_insn->calls_helpers = true;
1954 #endif
1956 #if defined(__sparc__) && !defined(__arch64__) \
1957 && !defined(CONFIG_TCG_INTERPRETER)
1958 /* We have 64-bit values in one register, but need to pass as two
1959 separate parameters. Split them. */
1960 int orig_sizemask = sizemask;
1961 int orig_nargs = nargs;
1962 TCGv_i64 retl, reth;
1963 TCGTemp *split_args[MAX_OPC_PARAM];
1965 retl = NULL;
1966 reth = NULL;
1967 if (sizemask != 0) {
1968 for (i = real_args = 0; i < nargs; ++i) {
1969 int is_64bit = sizemask & (1 << (i+1)*2);
1970 if (is_64bit) {
1971 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1972 TCGv_i32 h = tcg_temp_new_i32();
1973 TCGv_i32 l = tcg_temp_new_i32();
1974 tcg_gen_extr_i64_i32(l, h, orig);
1975 split_args[real_args++] = tcgv_i32_temp(h);
1976 split_args[real_args++] = tcgv_i32_temp(l);
1977 } else {
1978 split_args[real_args++] = args[i];
1981 nargs = real_args;
1982 args = split_args;
1983 sizemask = 0;
1985 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1986 for (i = 0; i < nargs; ++i) {
1987 int is_64bit = sizemask & (1 << (i+1)*2);
1988 int is_signed = sizemask & (2 << (i+1)*2);
1989 if (!is_64bit) {
1990 TCGv_i64 temp = tcg_temp_new_i64();
1991 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1992 if (is_signed) {
1993 tcg_gen_ext32s_i64(temp, orig);
1994 } else {
1995 tcg_gen_ext32u_i64(temp, orig);
1997 args[i] = tcgv_i64_temp(temp);
2000 #endif /* TCG_TARGET_EXTEND_ARGS */
2002 op = tcg_emit_op(INDEX_op_call);
2004 pi = 0;
2005 if (ret != NULL) {
2006 #if defined(__sparc__) && !defined(__arch64__) \
2007 && !defined(CONFIG_TCG_INTERPRETER)
2008 if (orig_sizemask & 1) {
2009 /* The 32-bit ABI is going to return the 64-bit value in
2010 the %o0/%o1 register pair. Prepare for this by using
2011 two return temporaries, and reassemble below. */
2012 retl = tcg_temp_new_i64();
2013 reth = tcg_temp_new_i64();
2014 op->args[pi++] = tcgv_i64_arg(reth);
2015 op->args[pi++] = tcgv_i64_arg(retl);
2016 nb_rets = 2;
2017 } else {
2018 op->args[pi++] = temp_arg(ret);
2019 nb_rets = 1;
2021 #else
2022 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
2023 #ifdef HOST_WORDS_BIGENDIAN
2024 op->args[pi++] = temp_arg(ret + 1);
2025 op->args[pi++] = temp_arg(ret);
2026 #else
2027 op->args[pi++] = temp_arg(ret);
2028 op->args[pi++] = temp_arg(ret + 1);
2029 #endif
2030 nb_rets = 2;
2031 } else {
2032 op->args[pi++] = temp_arg(ret);
2033 nb_rets = 1;
2035 #endif
2036 } else {
2037 nb_rets = 0;
2039 TCGOP_CALLO(op) = nb_rets;
2041 real_args = 0;
2042 for (i = 0; i < nargs; i++) {
2043 int is_64bit = sizemask & (1 << (i+1)*2);
2044 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
2045 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
2046 /* some targets want aligned 64 bit args */
2047 if (real_args & 1) {
2048 op->args[pi++] = TCG_CALL_DUMMY_ARG;
2049 real_args++;
2051 #endif
2052 /* If stack grows up, then we will be placing successive
2053 arguments at lower addresses, which means we need to
2054 reverse the order compared to how we would normally
2055 treat either big or little-endian. For those arguments
2056 that will wind up in registers, this still works for
2057 HPPA (the only current STACK_GROWSUP target) since the
2058 argument registers are *also* allocated in decreasing
2059 order. If another such target is added, this logic may
2060 have to get more complicated to differentiate between
2061 stack arguments and register arguments. */
2062 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
2063 op->args[pi++] = temp_arg(args[i] + 1);
2064 op->args[pi++] = temp_arg(args[i]);
2065 #else
2066 op->args[pi++] = temp_arg(args[i]);
2067 op->args[pi++] = temp_arg(args[i] + 1);
2068 #endif
2069 real_args += 2;
2070 continue;
2073 op->args[pi++] = temp_arg(args[i]);
2074 real_args++;
2076 op->args[pi++] = (uintptr_t)func;
2077 op->args[pi++] = flags;
2078 TCGOP_CALLI(op) = real_args;
2080 /* Make sure the fields didn't overflow. */
2081 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
2082 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
2084 #if defined(__sparc__) && !defined(__arch64__) \
2085 && !defined(CONFIG_TCG_INTERPRETER)
2086 /* Free all of the parts we allocated above. */
2087 for (i = real_args = 0; i < orig_nargs; ++i) {
2088 int is_64bit = orig_sizemask & (1 << (i+1)*2);
2089 if (is_64bit) {
2090 tcg_temp_free_internal(args[real_args++]);
2091 tcg_temp_free_internal(args[real_args++]);
2092 } else {
2093 real_args++;
2096 if (orig_sizemask & 1) {
2097 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
2098 Note that describing these as TCGv_i64 eliminates an unnecessary
2099 zero-extension that tcg_gen_concat_i32_i64 would create. */
2100 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
2101 tcg_temp_free_i64(retl);
2102 tcg_temp_free_i64(reth);
2104 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2105 for (i = 0; i < nargs; ++i) {
2106 int is_64bit = sizemask & (1 << (i+1)*2);
2107 if (!is_64bit) {
2108 tcg_temp_free_internal(args[i]);
2111 #endif /* TCG_TARGET_EXTEND_ARGS */
2114 static void tcg_reg_alloc_start(TCGContext *s)
2116 int i, n;
2118 for (i = 0, n = s->nb_temps; i < n; i++) {
2119 TCGTemp *ts = &s->temps[i];
2120 TCGTempVal val = TEMP_VAL_MEM;
2122 switch (ts->kind) {
2123 case TEMP_CONST:
2124 val = TEMP_VAL_CONST;
2125 break;
2126 case TEMP_FIXED:
2127 val = TEMP_VAL_REG;
2128 break;
2129 case TEMP_GLOBAL:
2130 break;
2131 case TEMP_NORMAL:
2132 val = TEMP_VAL_DEAD;
2133 /* fall through */
2134 case TEMP_LOCAL:
2135 ts->mem_allocated = 0;
2136 break;
2137 default:
2138 g_assert_not_reached();
2140 ts->val_type = val;
2143 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2146 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2147 TCGTemp *ts)
2149 int idx = temp_idx(ts);
2151 switch (ts->kind) {
2152 case TEMP_FIXED:
2153 case TEMP_GLOBAL:
2154 pstrcpy(buf, buf_size, ts->name);
2155 break;
2156 case TEMP_LOCAL:
2157 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2158 break;
2159 case TEMP_NORMAL:
2160 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2161 break;
2162 case TEMP_CONST:
2163 switch (ts->type) {
2164 case TCG_TYPE_I32:
2165 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2166 break;
2167 #if TCG_TARGET_REG_BITS > 32
2168 case TCG_TYPE_I64:
2169 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2170 break;
2171 #endif
2172 case TCG_TYPE_V64:
2173 case TCG_TYPE_V128:
2174 case TCG_TYPE_V256:
2175 snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2176 64 << (ts->type - TCG_TYPE_V64), ts->val);
2177 break;
2178 default:
2179 g_assert_not_reached();
2181 break;
2183 return buf;
2186 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2187 int buf_size, TCGArg arg)
2189 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2192 /* Find helper name. */
2193 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2195 const char *ret = NULL;
2196 if (helper_table) {
2197 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2198 if (info) {
2199 ret = info->name;
2202 return ret;
2205 static const char * const cond_name[] =
2207 [TCG_COND_NEVER] = "never",
2208 [TCG_COND_ALWAYS] = "always",
2209 [TCG_COND_EQ] = "eq",
2210 [TCG_COND_NE] = "ne",
2211 [TCG_COND_LT] = "lt",
2212 [TCG_COND_GE] = "ge",
2213 [TCG_COND_LE] = "le",
2214 [TCG_COND_GT] = "gt",
2215 [TCG_COND_LTU] = "ltu",
2216 [TCG_COND_GEU] = "geu",
2217 [TCG_COND_LEU] = "leu",
2218 [TCG_COND_GTU] = "gtu"
2221 static const char * const ldst_name[] =
2223 [MO_UB] = "ub",
2224 [MO_SB] = "sb",
2225 [MO_LEUW] = "leuw",
2226 [MO_LESW] = "lesw",
2227 [MO_LEUL] = "leul",
2228 [MO_LESL] = "lesl",
2229 [MO_LEQ] = "leq",
2230 [MO_BEUW] = "beuw",
2231 [MO_BESW] = "besw",
2232 [MO_BEUL] = "beul",
2233 [MO_BESL] = "besl",
2234 [MO_BEQ] = "beq",
2237 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2238 #ifdef TARGET_ALIGNED_ONLY
2239 [MO_UNALN >> MO_ASHIFT] = "un+",
2240 [MO_ALIGN >> MO_ASHIFT] = "",
2241 #else
2242 [MO_UNALN >> MO_ASHIFT] = "",
2243 [MO_ALIGN >> MO_ASHIFT] = "al+",
2244 #endif
2245 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
2246 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
2247 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
2248 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2249 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2250 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2253 static inline bool tcg_regset_single(TCGRegSet d)
2255 return (d & (d - 1)) == 0;
2258 static inline TCGReg tcg_regset_first(TCGRegSet d)
2260 if (TCG_TARGET_NB_REGS <= 32) {
2261 return ctz32(d);
2262 } else {
2263 return ctz64(d);
2267 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2269 char buf[128];
2270 TCGOp *op;
2272 QTAILQ_FOREACH(op, &s->ops, link) {
2273 int i, k, nb_oargs, nb_iargs, nb_cargs;
2274 const TCGOpDef *def;
2275 TCGOpcode c;
2276 int col = 0;
2278 c = op->opc;
2279 def = &tcg_op_defs[c];
2281 if (c == INDEX_op_insn_start) {
2282 nb_oargs = 0;
2283 col += qemu_log("\n ----");
2285 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2286 target_ulong a;
2287 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2288 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2289 #else
2290 a = op->args[i];
2291 #endif
2292 col += qemu_log(" " TARGET_FMT_lx, a);
2294 } else if (c == INDEX_op_call) {
2295 /* variable number of arguments */
2296 nb_oargs = TCGOP_CALLO(op);
2297 nb_iargs = TCGOP_CALLI(op);
2298 nb_cargs = def->nb_cargs;
2300 /* function name, flags, out args */
2301 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2302 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2303 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2304 for (i = 0; i < nb_oargs; i++) {
2305 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2306 op->args[i]));
2308 for (i = 0; i < nb_iargs; i++) {
2309 TCGArg arg = op->args[nb_oargs + i];
2310 const char *t = "<dummy>";
2311 if (arg != TCG_CALL_DUMMY_ARG) {
2312 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2314 col += qemu_log(",%s", t);
2316 } else {
2317 col += qemu_log(" %s ", def->name);
2319 nb_oargs = def->nb_oargs;
2320 nb_iargs = def->nb_iargs;
2321 nb_cargs = def->nb_cargs;
2323 if (def->flags & TCG_OPF_VECTOR) {
2324 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2325 8 << TCGOP_VECE(op));
2328 k = 0;
2329 for (i = 0; i < nb_oargs; i++) {
2330 if (k != 0) {
2331 col += qemu_log(",");
2333 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2334 op->args[k++]));
2336 for (i = 0; i < nb_iargs; i++) {
2337 if (k != 0) {
2338 col += qemu_log(",");
2340 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2341 op->args[k++]));
2343 switch (c) {
2344 case INDEX_op_brcond_i32:
2345 case INDEX_op_setcond_i32:
2346 case INDEX_op_movcond_i32:
2347 case INDEX_op_brcond2_i32:
2348 case INDEX_op_setcond2_i32:
2349 case INDEX_op_brcond_i64:
2350 case INDEX_op_setcond_i64:
2351 case INDEX_op_movcond_i64:
2352 case INDEX_op_cmp_vec:
2353 case INDEX_op_cmpsel_vec:
2354 if (op->args[k] < ARRAY_SIZE(cond_name)
2355 && cond_name[op->args[k]]) {
2356 col += qemu_log(",%s", cond_name[op->args[k++]]);
2357 } else {
2358 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2360 i = 1;
2361 break;
2362 case INDEX_op_qemu_ld_i32:
2363 case INDEX_op_qemu_st_i32:
2364 case INDEX_op_qemu_st8_i32:
2365 case INDEX_op_qemu_ld_i64:
2366 case INDEX_op_qemu_st_i64:
2368 TCGMemOpIdx oi = op->args[k++];
2369 MemOp op = get_memop(oi);
2370 unsigned ix = get_mmuidx(oi);
2372 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2373 col += qemu_log(",$0x%x,%u", op, ix);
2374 } else {
2375 const char *s_al, *s_op;
2376 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2377 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2378 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2380 i = 1;
2382 break;
2383 default:
2384 i = 0;
2385 break;
2387 switch (c) {
2388 case INDEX_op_set_label:
2389 case INDEX_op_br:
2390 case INDEX_op_brcond_i32:
2391 case INDEX_op_brcond_i64:
2392 case INDEX_op_brcond2_i32:
2393 col += qemu_log("%s$L%d", k ? "," : "",
2394 arg_label(op->args[k])->id);
2395 i++, k++;
2396 break;
2397 default:
2398 break;
2400 for (; i < nb_cargs; i++, k++) {
2401 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2405 if (have_prefs || op->life) {
2407 QemuLogFile *logfile;
2409 rcu_read_lock();
2410 logfile = qatomic_rcu_read(&qemu_logfile);
2411 if (logfile) {
2412 for (; col < 40; ++col) {
2413 putc(' ', logfile->fd);
2416 rcu_read_unlock();
2419 if (op->life) {
2420 unsigned life = op->life;
2422 if (life & (SYNC_ARG * 3)) {
2423 qemu_log(" sync:");
2424 for (i = 0; i < 2; ++i) {
2425 if (life & (SYNC_ARG << i)) {
2426 qemu_log(" %d", i);
2430 life /= DEAD_ARG;
2431 if (life) {
2432 qemu_log(" dead:");
2433 for (i = 0; life; ++i, life >>= 1) {
2434 if (life & 1) {
2435 qemu_log(" %d", i);
2441 if (have_prefs) {
2442 for (i = 0; i < nb_oargs; ++i) {
2443 TCGRegSet set = op->output_pref[i];
2445 if (i == 0) {
2446 qemu_log(" pref=");
2447 } else {
2448 qemu_log(",");
2450 if (set == 0) {
2451 qemu_log("none");
2452 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2453 qemu_log("all");
2454 #ifdef CONFIG_DEBUG_TCG
2455 } else if (tcg_regset_single(set)) {
2456 TCGReg reg = tcg_regset_first(set);
2457 qemu_log("%s", tcg_target_reg_names[reg]);
2458 #endif
2459 } else if (TCG_TARGET_NB_REGS <= 32) {
2460 qemu_log("%#x", (uint32_t)set);
2461 } else {
2462 qemu_log("%#" PRIx64, (uint64_t)set);
2467 qemu_log("\n");
2471 /* we give more priority to constraints with less registers */
2472 static int get_constraint_priority(const TCGOpDef *def, int k)
2474 const TCGArgConstraint *arg_ct = &def->args_ct[k];
2475 int n;
2477 if (arg_ct->oalias) {
2478 /* an alias is equivalent to a single register */
2479 n = 1;
2480 } else {
2481 n = ctpop64(arg_ct->regs);
2483 return TCG_TARGET_NB_REGS - n + 1;
2486 /* sort from highest priority to lowest */
2487 static void sort_constraints(TCGOpDef *def, int start, int n)
2489 int i, j;
2490 TCGArgConstraint *a = def->args_ct;
2492 for (i = 0; i < n; i++) {
2493 a[start + i].sort_index = start + i;
2495 if (n <= 1) {
2496 return;
2498 for (i = 0; i < n - 1; i++) {
2499 for (j = i + 1; j < n; j++) {
2500 int p1 = get_constraint_priority(def, a[start + i].sort_index);
2501 int p2 = get_constraint_priority(def, a[start + j].sort_index);
2502 if (p1 < p2) {
2503 int tmp = a[start + i].sort_index;
2504 a[start + i].sort_index = a[start + j].sort_index;
2505 a[start + j].sort_index = tmp;
2511 static void process_op_defs(TCGContext *s)
2513 TCGOpcode op;
2515 for (op = 0; op < NB_OPS; op++) {
2516 TCGOpDef *def = &tcg_op_defs[op];
2517 const TCGTargetOpDef *tdefs;
2518 int i, nb_args;
2520 if (def->flags & TCG_OPF_NOT_PRESENT) {
2521 continue;
2524 nb_args = def->nb_iargs + def->nb_oargs;
2525 if (nb_args == 0) {
2526 continue;
2530 * Macro magic should make it impossible, but double-check that
2531 * the array index is in range. Since the signness of an enum
2532 * is implementation defined, force the result to unsigned.
2534 unsigned con_set = tcg_target_op_def(op);
2535 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2536 tdefs = &constraint_sets[con_set];
2538 for (i = 0; i < nb_args; i++) {
2539 const char *ct_str = tdefs->args_ct_str[i];
2540 /* Incomplete TCGTargetOpDef entry. */
2541 tcg_debug_assert(ct_str != NULL);
2543 while (*ct_str != '\0') {
2544 switch(*ct_str) {
2545 case '0' ... '9':
2547 int oarg = *ct_str - '0';
2548 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2549 tcg_debug_assert(oarg < def->nb_oargs);
2550 tcg_debug_assert(def->args_ct[oarg].regs != 0);
2551 def->args_ct[i] = def->args_ct[oarg];
2552 /* The output sets oalias. */
2553 def->args_ct[oarg].oalias = true;
2554 def->args_ct[oarg].alias_index = i;
2555 /* The input sets ialias. */
2556 def->args_ct[i].ialias = true;
2557 def->args_ct[i].alias_index = oarg;
2559 ct_str++;
2560 break;
2561 case '&':
2562 def->args_ct[i].newreg = true;
2563 ct_str++;
2564 break;
2565 case 'i':
2566 def->args_ct[i].ct |= TCG_CT_CONST;
2567 ct_str++;
2568 break;
2570 /* Include all of the target-specific constraints. */
2572 #undef CONST
2573 #define CONST(CASE, MASK) \
2574 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2575 #define REGS(CASE, MASK) \
2576 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2578 #include "tcg-target-con-str.h"
2580 #undef REGS
2581 #undef CONST
2582 default:
2583 /* Typo in TCGTargetOpDef constraint. */
2584 g_assert_not_reached();
2589 /* TCGTargetOpDef entry with too much information? */
2590 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2592 /* sort the constraints (XXX: this is just an heuristic) */
2593 sort_constraints(def, 0, def->nb_oargs);
2594 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2598 void tcg_op_remove(TCGContext *s, TCGOp *op)
2600 TCGLabel *label;
2602 switch (op->opc) {
2603 case INDEX_op_br:
2604 label = arg_label(op->args[0]);
2605 label->refs--;
2606 break;
2607 case INDEX_op_brcond_i32:
2608 case INDEX_op_brcond_i64:
2609 label = arg_label(op->args[3]);
2610 label->refs--;
2611 break;
2612 case INDEX_op_brcond2_i32:
2613 label = arg_label(op->args[5]);
2614 label->refs--;
2615 break;
2616 default:
2617 break;
2620 QTAILQ_REMOVE(&s->ops, op, link);
2621 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2622 s->nb_ops--;
2624 #ifdef CONFIG_PROFILER
2625 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2626 #endif
2629 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2631 TCGContext *s = tcg_ctx;
2632 TCGOp *op;
2634 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2635 op = tcg_malloc(sizeof(TCGOp));
2636 } else {
2637 op = QTAILQ_FIRST(&s->free_ops);
2638 QTAILQ_REMOVE(&s->free_ops, op, link);
2640 memset(op, 0, offsetof(TCGOp, link));
2641 op->opc = opc;
2642 s->nb_ops++;
2644 return op;
2647 TCGOp *tcg_emit_op(TCGOpcode opc)
2649 TCGOp *op = tcg_op_alloc(opc);
2650 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2651 return op;
2654 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2656 TCGOp *new_op = tcg_op_alloc(opc);
2657 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2658 return new_op;
2661 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2663 TCGOp *new_op = tcg_op_alloc(opc);
2664 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2665 return new_op;
2668 /* Reachable analysis : remove unreachable code. */
2669 static void reachable_code_pass(TCGContext *s)
2671 TCGOp *op, *op_next;
2672 bool dead = false;
2674 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2675 bool remove = dead;
2676 TCGLabel *label;
2677 int call_flags;
2679 switch (op->opc) {
2680 case INDEX_op_set_label:
2681 label = arg_label(op->args[0]);
2682 if (label->refs == 0) {
2684 * While there is an occasional backward branch, virtually
2685 * all branches generated by the translators are forward.
2686 * Which means that generally we will have already removed
2687 * all references to the label that will be, and there is
2688 * little to be gained by iterating.
2690 remove = true;
2691 } else {
2692 /* Once we see a label, insns become live again. */
2693 dead = false;
2694 remove = false;
2697 * Optimization can fold conditional branches to unconditional.
2698 * If we find a label with one reference which is preceded by
2699 * an unconditional branch to it, remove both. This needed to
2700 * wait until the dead code in between them was removed.
2702 if (label->refs == 1) {
2703 TCGOp *op_prev = QTAILQ_PREV(op, link);
2704 if (op_prev->opc == INDEX_op_br &&
2705 label == arg_label(op_prev->args[0])) {
2706 tcg_op_remove(s, op_prev);
2707 remove = true;
2711 break;
2713 case INDEX_op_br:
2714 case INDEX_op_exit_tb:
2715 case INDEX_op_goto_ptr:
2716 /* Unconditional branches; everything following is dead. */
2717 dead = true;
2718 break;
2720 case INDEX_op_call:
2721 /* Notice noreturn helper calls, raising exceptions. */
2722 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2723 if (call_flags & TCG_CALL_NO_RETURN) {
2724 dead = true;
2726 break;
2728 case INDEX_op_insn_start:
2729 /* Never remove -- we need to keep these for unwind. */
2730 remove = false;
2731 break;
2733 default:
2734 break;
2737 if (remove) {
2738 tcg_op_remove(s, op);
2743 #define TS_DEAD 1
2744 #define TS_MEM 2
2746 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2747 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2749 /* For liveness_pass_1, the register preferences for a given temp. */
2750 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2752 return ts->state_ptr;
2755 /* For liveness_pass_1, reset the preferences for a given temp to the
2756 * maximal regset for its type.
2758 static inline void la_reset_pref(TCGTemp *ts)
2760 *la_temp_pref(ts)
2761 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2764 /* liveness analysis: end of function: all temps are dead, and globals
2765 should be in memory. */
2766 static void la_func_end(TCGContext *s, int ng, int nt)
2768 int i;
2770 for (i = 0; i < ng; ++i) {
2771 s->temps[i].state = TS_DEAD | TS_MEM;
2772 la_reset_pref(&s->temps[i]);
2774 for (i = ng; i < nt; ++i) {
2775 s->temps[i].state = TS_DEAD;
2776 la_reset_pref(&s->temps[i]);
2780 /* liveness analysis: end of basic block: all temps are dead, globals
2781 and local temps should be in memory. */
2782 static void la_bb_end(TCGContext *s, int ng, int nt)
2784 int i;
2786 for (i = 0; i < nt; ++i) {
2787 TCGTemp *ts = &s->temps[i];
2788 int state;
2790 switch (ts->kind) {
2791 case TEMP_FIXED:
2792 case TEMP_GLOBAL:
2793 case TEMP_LOCAL:
2794 state = TS_DEAD | TS_MEM;
2795 break;
2796 case TEMP_NORMAL:
2797 case TEMP_CONST:
2798 state = TS_DEAD;
2799 break;
2800 default:
2801 g_assert_not_reached();
2803 ts->state = state;
2804 la_reset_pref(ts);
2808 /* liveness analysis: sync globals back to memory. */
2809 static void la_global_sync(TCGContext *s, int ng)
2811 int i;
2813 for (i = 0; i < ng; ++i) {
2814 int state = s->temps[i].state;
2815 s->temps[i].state = state | TS_MEM;
2816 if (state == TS_DEAD) {
2817 /* If the global was previously dead, reset prefs. */
2818 la_reset_pref(&s->temps[i]);
2824 * liveness analysis: conditional branch: all temps are dead,
2825 * globals and local temps should be synced.
2827 static void la_bb_sync(TCGContext *s, int ng, int nt)
2829 la_global_sync(s, ng);
2831 for (int i = ng; i < nt; ++i) {
2832 TCGTemp *ts = &s->temps[i];
2833 int state;
2835 switch (ts->kind) {
2836 case TEMP_LOCAL:
2837 state = ts->state;
2838 ts->state = state | TS_MEM;
2839 if (state != TS_DEAD) {
2840 continue;
2842 break;
2843 case TEMP_NORMAL:
2844 s->temps[i].state = TS_DEAD;
2845 break;
2846 case TEMP_CONST:
2847 continue;
2848 default:
2849 g_assert_not_reached();
2851 la_reset_pref(&s->temps[i]);
2855 /* liveness analysis: sync globals back to memory and kill. */
2856 static void la_global_kill(TCGContext *s, int ng)
2858 int i;
2860 for (i = 0; i < ng; i++) {
2861 s->temps[i].state = TS_DEAD | TS_MEM;
2862 la_reset_pref(&s->temps[i]);
2866 /* liveness analysis: note live globals crossing calls. */
2867 static void la_cross_call(TCGContext *s, int nt)
2869 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2870 int i;
2872 for (i = 0; i < nt; i++) {
2873 TCGTemp *ts = &s->temps[i];
2874 if (!(ts->state & TS_DEAD)) {
2875 TCGRegSet *pset = la_temp_pref(ts);
2876 TCGRegSet set = *pset;
2878 set &= mask;
2879 /* If the combination is not possible, restart. */
2880 if (set == 0) {
2881 set = tcg_target_available_regs[ts->type] & mask;
2883 *pset = set;
2888 /* Liveness analysis : update the opc_arg_life array to tell if a
2889 given input arguments is dead. Instructions updating dead
2890 temporaries are removed. */
2891 static void liveness_pass_1(TCGContext *s)
2893 int nb_globals = s->nb_globals;
2894 int nb_temps = s->nb_temps;
2895 TCGOp *op, *op_prev;
2896 TCGRegSet *prefs;
2897 int i;
2899 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2900 for (i = 0; i < nb_temps; ++i) {
2901 s->temps[i].state_ptr = prefs + i;
2904 /* ??? Should be redundant with the exit_tb that ends the TB. */
2905 la_func_end(s, nb_globals, nb_temps);
2907 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2908 int nb_iargs, nb_oargs;
2909 TCGOpcode opc_new, opc_new2;
2910 bool have_opc_new2;
2911 TCGLifeData arg_life = 0;
2912 TCGTemp *ts;
2913 TCGOpcode opc = op->opc;
2914 const TCGOpDef *def = &tcg_op_defs[opc];
2916 switch (opc) {
2917 case INDEX_op_call:
2919 int call_flags;
2920 int nb_call_regs;
2922 nb_oargs = TCGOP_CALLO(op);
2923 nb_iargs = TCGOP_CALLI(op);
2924 call_flags = op->args[nb_oargs + nb_iargs + 1];
2926 /* pure functions can be removed if their result is unused */
2927 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2928 for (i = 0; i < nb_oargs; i++) {
2929 ts = arg_temp(op->args[i]);
2930 if (ts->state != TS_DEAD) {
2931 goto do_not_remove_call;
2934 goto do_remove;
2936 do_not_remove_call:
2938 /* Output args are dead. */
2939 for (i = 0; i < nb_oargs; i++) {
2940 ts = arg_temp(op->args[i]);
2941 if (ts->state & TS_DEAD) {
2942 arg_life |= DEAD_ARG << i;
2944 if (ts->state & TS_MEM) {
2945 arg_life |= SYNC_ARG << i;
2947 ts->state = TS_DEAD;
2948 la_reset_pref(ts);
2950 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2951 op->output_pref[i] = 0;
2954 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2955 TCG_CALL_NO_READ_GLOBALS))) {
2956 la_global_kill(s, nb_globals);
2957 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2958 la_global_sync(s, nb_globals);
2961 /* Record arguments that die in this helper. */
2962 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2963 ts = arg_temp(op->args[i]);
2964 if (ts && ts->state & TS_DEAD) {
2965 arg_life |= DEAD_ARG << i;
2969 /* For all live registers, remove call-clobbered prefs. */
2970 la_cross_call(s, nb_temps);
2972 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2974 /* Input arguments are live for preceding opcodes. */
2975 for (i = 0; i < nb_iargs; i++) {
2976 ts = arg_temp(op->args[i + nb_oargs]);
2977 if (ts && ts->state & TS_DEAD) {
2978 /* For those arguments that die, and will be allocated
2979 * in registers, clear the register set for that arg,
2980 * to be filled in below. For args that will be on
2981 * the stack, reset to any available reg.
2983 *la_temp_pref(ts)
2984 = (i < nb_call_regs ? 0 :
2985 tcg_target_available_regs[ts->type]);
2986 ts->state &= ~TS_DEAD;
2990 /* For each input argument, add its input register to prefs.
2991 If a temp is used once, this produces a single set bit. */
2992 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2993 ts = arg_temp(op->args[i + nb_oargs]);
2994 if (ts) {
2995 tcg_regset_set_reg(*la_temp_pref(ts),
2996 tcg_target_call_iarg_regs[i]);
3000 break;
3001 case INDEX_op_insn_start:
3002 break;
3003 case INDEX_op_discard:
3004 /* mark the temporary as dead */
3005 ts = arg_temp(op->args[0]);
3006 ts->state = TS_DEAD;
3007 la_reset_pref(ts);
3008 break;
3010 case INDEX_op_add2_i32:
3011 opc_new = INDEX_op_add_i32;
3012 goto do_addsub2;
3013 case INDEX_op_sub2_i32:
3014 opc_new = INDEX_op_sub_i32;
3015 goto do_addsub2;
3016 case INDEX_op_add2_i64:
3017 opc_new = INDEX_op_add_i64;
3018 goto do_addsub2;
3019 case INDEX_op_sub2_i64:
3020 opc_new = INDEX_op_sub_i64;
3021 do_addsub2:
3022 nb_iargs = 4;
3023 nb_oargs = 2;
3024 /* Test if the high part of the operation is dead, but not
3025 the low part. The result can be optimized to a simple
3026 add or sub. This happens often for x86_64 guest when the
3027 cpu mode is set to 32 bit. */
3028 if (arg_temp(op->args[1])->state == TS_DEAD) {
3029 if (arg_temp(op->args[0])->state == TS_DEAD) {
3030 goto do_remove;
3032 /* Replace the opcode and adjust the args in place,
3033 leaving 3 unused args at the end. */
3034 op->opc = opc = opc_new;
3035 op->args[1] = op->args[2];
3036 op->args[2] = op->args[4];
3037 /* Fall through and mark the single-word operation live. */
3038 nb_iargs = 2;
3039 nb_oargs = 1;
3041 goto do_not_remove;
3043 case INDEX_op_mulu2_i32:
3044 opc_new = INDEX_op_mul_i32;
3045 opc_new2 = INDEX_op_muluh_i32;
3046 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3047 goto do_mul2;
3048 case INDEX_op_muls2_i32:
3049 opc_new = INDEX_op_mul_i32;
3050 opc_new2 = INDEX_op_mulsh_i32;
3051 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3052 goto do_mul2;
3053 case INDEX_op_mulu2_i64:
3054 opc_new = INDEX_op_mul_i64;
3055 opc_new2 = INDEX_op_muluh_i64;
3056 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3057 goto do_mul2;
3058 case INDEX_op_muls2_i64:
3059 opc_new = INDEX_op_mul_i64;
3060 opc_new2 = INDEX_op_mulsh_i64;
3061 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3062 goto do_mul2;
3063 do_mul2:
3064 nb_iargs = 2;
3065 nb_oargs = 2;
3066 if (arg_temp(op->args[1])->state == TS_DEAD) {
3067 if (arg_temp(op->args[0])->state == TS_DEAD) {
3068 /* Both parts of the operation are dead. */
3069 goto do_remove;
3071 /* The high part of the operation is dead; generate the low. */
3072 op->opc = opc = opc_new;
3073 op->args[1] = op->args[2];
3074 op->args[2] = op->args[3];
3075 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3076 /* The low part of the operation is dead; generate the high. */
3077 op->opc = opc = opc_new2;
3078 op->args[0] = op->args[1];
3079 op->args[1] = op->args[2];
3080 op->args[2] = op->args[3];
3081 } else {
3082 goto do_not_remove;
3084 /* Mark the single-word operation live. */
3085 nb_oargs = 1;
3086 goto do_not_remove;
3088 default:
3089 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3090 nb_iargs = def->nb_iargs;
3091 nb_oargs = def->nb_oargs;
3093 /* Test if the operation can be removed because all
3094 its outputs are dead. We assume that nb_oargs == 0
3095 implies side effects */
3096 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3097 for (i = 0; i < nb_oargs; i++) {
3098 if (arg_temp(op->args[i])->state != TS_DEAD) {
3099 goto do_not_remove;
3102 goto do_remove;
3104 goto do_not_remove;
3106 do_remove:
3107 tcg_op_remove(s, op);
3108 break;
3110 do_not_remove:
3111 for (i = 0; i < nb_oargs; i++) {
3112 ts = arg_temp(op->args[i]);
3114 /* Remember the preference of the uses that followed. */
3115 op->output_pref[i] = *la_temp_pref(ts);
3117 /* Output args are dead. */
3118 if (ts->state & TS_DEAD) {
3119 arg_life |= DEAD_ARG << i;
3121 if (ts->state & TS_MEM) {
3122 arg_life |= SYNC_ARG << i;
3124 ts->state = TS_DEAD;
3125 la_reset_pref(ts);
3128 /* If end of basic block, update. */
3129 if (def->flags & TCG_OPF_BB_EXIT) {
3130 la_func_end(s, nb_globals, nb_temps);
3131 } else if (def->flags & TCG_OPF_COND_BRANCH) {
3132 la_bb_sync(s, nb_globals, nb_temps);
3133 } else if (def->flags & TCG_OPF_BB_END) {
3134 la_bb_end(s, nb_globals, nb_temps);
3135 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3136 la_global_sync(s, nb_globals);
3137 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3138 la_cross_call(s, nb_temps);
3142 /* Record arguments that die in this opcode. */
3143 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3144 ts = arg_temp(op->args[i]);
3145 if (ts->state & TS_DEAD) {
3146 arg_life |= DEAD_ARG << i;
3150 /* Input arguments are live for preceding opcodes. */
3151 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3152 ts = arg_temp(op->args[i]);
3153 if (ts->state & TS_DEAD) {
3154 /* For operands that were dead, initially allow
3155 all regs for the type. */
3156 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3157 ts->state &= ~TS_DEAD;
3161 /* Incorporate constraints for this operand. */
3162 switch (opc) {
3163 case INDEX_op_mov_i32:
3164 case INDEX_op_mov_i64:
3165 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3166 have proper constraints. That said, special case
3167 moves to propagate preferences backward. */
3168 if (IS_DEAD_ARG(1)) {
3169 *la_temp_pref(arg_temp(op->args[0]))
3170 = *la_temp_pref(arg_temp(op->args[1]));
3172 break;
3174 default:
3175 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3176 const TCGArgConstraint *ct = &def->args_ct[i];
3177 TCGRegSet set, *pset;
3179 ts = arg_temp(op->args[i]);
3180 pset = la_temp_pref(ts);
3181 set = *pset;
3183 set &= ct->regs;
3184 if (ct->ialias) {
3185 set &= op->output_pref[ct->alias_index];
3187 /* If the combination is not possible, restart. */
3188 if (set == 0) {
3189 set = ct->regs;
3191 *pset = set;
3193 break;
3195 break;
3197 op->life = arg_life;
3201 /* Liveness analysis: Convert indirect regs to direct temporaries. */
3202 static bool liveness_pass_2(TCGContext *s)
3204 int nb_globals = s->nb_globals;
3205 int nb_temps, i;
3206 bool changes = false;
3207 TCGOp *op, *op_next;
3209 /* Create a temporary for each indirect global. */
3210 for (i = 0; i < nb_globals; ++i) {
3211 TCGTemp *its = &s->temps[i];
3212 if (its->indirect_reg) {
3213 TCGTemp *dts = tcg_temp_alloc(s);
3214 dts->type = its->type;
3215 dts->base_type = its->base_type;
3216 its->state_ptr = dts;
3217 } else {
3218 its->state_ptr = NULL;
3220 /* All globals begin dead. */
3221 its->state = TS_DEAD;
3223 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3224 TCGTemp *its = &s->temps[i];
3225 its->state_ptr = NULL;
3226 its->state = TS_DEAD;
3229 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3230 TCGOpcode opc = op->opc;
3231 const TCGOpDef *def = &tcg_op_defs[opc];
3232 TCGLifeData arg_life = op->life;
3233 int nb_iargs, nb_oargs, call_flags;
3234 TCGTemp *arg_ts, *dir_ts;
3236 if (opc == INDEX_op_call) {
3237 nb_oargs = TCGOP_CALLO(op);
3238 nb_iargs = TCGOP_CALLI(op);
3239 call_flags = op->args[nb_oargs + nb_iargs + 1];
3240 } else {
3241 nb_iargs = def->nb_iargs;
3242 nb_oargs = def->nb_oargs;
3244 /* Set flags similar to how calls require. */
3245 if (def->flags & TCG_OPF_COND_BRANCH) {
3246 /* Like reading globals: sync_globals */
3247 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3248 } else if (def->flags & TCG_OPF_BB_END) {
3249 /* Like writing globals: save_globals */
3250 call_flags = 0;
3251 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3252 /* Like reading globals: sync_globals */
3253 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3254 } else {
3255 /* No effect on globals. */
3256 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3257 TCG_CALL_NO_WRITE_GLOBALS);
3261 /* Make sure that input arguments are available. */
3262 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3263 arg_ts = arg_temp(op->args[i]);
3264 if (arg_ts) {
3265 dir_ts = arg_ts->state_ptr;
3266 if (dir_ts && arg_ts->state == TS_DEAD) {
3267 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3268 ? INDEX_op_ld_i32
3269 : INDEX_op_ld_i64);
3270 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3272 lop->args[0] = temp_arg(dir_ts);
3273 lop->args[1] = temp_arg(arg_ts->mem_base);
3274 lop->args[2] = arg_ts->mem_offset;
3276 /* Loaded, but synced with memory. */
3277 arg_ts->state = TS_MEM;
3282 /* Perform input replacement, and mark inputs that became dead.
3283 No action is required except keeping temp_state up to date
3284 so that we reload when needed. */
3285 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3286 arg_ts = arg_temp(op->args[i]);
3287 if (arg_ts) {
3288 dir_ts = arg_ts->state_ptr;
3289 if (dir_ts) {
3290 op->args[i] = temp_arg(dir_ts);
3291 changes = true;
3292 if (IS_DEAD_ARG(i)) {
3293 arg_ts->state = TS_DEAD;
3299 /* Liveness analysis should ensure that the following are
3300 all correct, for call sites and basic block end points. */
3301 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3302 /* Nothing to do */
3303 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3304 for (i = 0; i < nb_globals; ++i) {
3305 /* Liveness should see that globals are synced back,
3306 that is, either TS_DEAD or TS_MEM. */
3307 arg_ts = &s->temps[i];
3308 tcg_debug_assert(arg_ts->state_ptr == 0
3309 || arg_ts->state != 0);
3311 } else {
3312 for (i = 0; i < nb_globals; ++i) {
3313 /* Liveness should see that globals are saved back,
3314 that is, TS_DEAD, waiting to be reloaded. */
3315 arg_ts = &s->temps[i];
3316 tcg_debug_assert(arg_ts->state_ptr == 0
3317 || arg_ts->state == TS_DEAD);
3321 /* Outputs become available. */
3322 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3323 arg_ts = arg_temp(op->args[0]);
3324 dir_ts = arg_ts->state_ptr;
3325 if (dir_ts) {
3326 op->args[0] = temp_arg(dir_ts);
3327 changes = true;
3329 /* The output is now live and modified. */
3330 arg_ts->state = 0;
3332 if (NEED_SYNC_ARG(0)) {
3333 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3334 ? INDEX_op_st_i32
3335 : INDEX_op_st_i64);
3336 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3337 TCGTemp *out_ts = dir_ts;
3339 if (IS_DEAD_ARG(0)) {
3340 out_ts = arg_temp(op->args[1]);
3341 arg_ts->state = TS_DEAD;
3342 tcg_op_remove(s, op);
3343 } else {
3344 arg_ts->state = TS_MEM;
3347 sop->args[0] = temp_arg(out_ts);
3348 sop->args[1] = temp_arg(arg_ts->mem_base);
3349 sop->args[2] = arg_ts->mem_offset;
3350 } else {
3351 tcg_debug_assert(!IS_DEAD_ARG(0));
3354 } else {
3355 for (i = 0; i < nb_oargs; i++) {
3356 arg_ts = arg_temp(op->args[i]);
3357 dir_ts = arg_ts->state_ptr;
3358 if (!dir_ts) {
3359 continue;
3361 op->args[i] = temp_arg(dir_ts);
3362 changes = true;
3364 /* The output is now live and modified. */
3365 arg_ts->state = 0;
3367 /* Sync outputs upon their last write. */
3368 if (NEED_SYNC_ARG(i)) {
3369 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3370 ? INDEX_op_st_i32
3371 : INDEX_op_st_i64);
3372 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3374 sop->args[0] = temp_arg(dir_ts);
3375 sop->args[1] = temp_arg(arg_ts->mem_base);
3376 sop->args[2] = arg_ts->mem_offset;
3378 arg_ts->state = TS_MEM;
3380 /* Drop outputs that are dead. */
3381 if (IS_DEAD_ARG(i)) {
3382 arg_ts->state = TS_DEAD;
3388 return changes;
3391 #ifdef CONFIG_DEBUG_TCG
3392 static void dump_regs(TCGContext *s)
3394 TCGTemp *ts;
3395 int i;
3396 char buf[64];
3398 for(i = 0; i < s->nb_temps; i++) {
3399 ts = &s->temps[i];
3400 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3401 switch(ts->val_type) {
3402 case TEMP_VAL_REG:
3403 printf("%s", tcg_target_reg_names[ts->reg]);
3404 break;
3405 case TEMP_VAL_MEM:
3406 printf("%d(%s)", (int)ts->mem_offset,
3407 tcg_target_reg_names[ts->mem_base->reg]);
3408 break;
3409 case TEMP_VAL_CONST:
3410 printf("$0x%" PRIx64, ts->val);
3411 break;
3412 case TEMP_VAL_DEAD:
3413 printf("D");
3414 break;
3415 default:
3416 printf("???");
3417 break;
3419 printf("\n");
3422 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3423 if (s->reg_to_temp[i] != NULL) {
3424 printf("%s: %s\n",
3425 tcg_target_reg_names[i],
3426 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3431 static void check_regs(TCGContext *s)
3433 int reg;
3434 int k;
3435 TCGTemp *ts;
3436 char buf[64];
3438 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3439 ts = s->reg_to_temp[reg];
3440 if (ts != NULL) {
3441 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3442 printf("Inconsistency for register %s:\n",
3443 tcg_target_reg_names[reg]);
3444 goto fail;
3448 for (k = 0; k < s->nb_temps; k++) {
3449 ts = &s->temps[k];
3450 if (ts->val_type == TEMP_VAL_REG
3451 && ts->kind != TEMP_FIXED
3452 && s->reg_to_temp[ts->reg] != ts) {
3453 printf("Inconsistency for temp %s:\n",
3454 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3455 fail:
3456 printf("reg state:\n");
3457 dump_regs(s);
3458 tcg_abort();
3462 #endif
3464 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3466 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3467 /* Sparc64 stack is accessed with offset of 2047 */
3468 s->current_frame_offset = (s->current_frame_offset +
3469 (tcg_target_long)sizeof(tcg_target_long) - 1) &
3470 ~(sizeof(tcg_target_long) - 1);
3471 #endif
3472 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3473 s->frame_end) {
3474 tcg_abort();
3476 ts->mem_offset = s->current_frame_offset;
3477 ts->mem_base = s->frame_temp;
3478 ts->mem_allocated = 1;
3479 s->current_frame_offset += sizeof(tcg_target_long);
3482 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3484 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3485 mark it free; otherwise mark it dead. */
3486 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3488 TCGTempVal new_type;
3490 switch (ts->kind) {
3491 case TEMP_FIXED:
3492 return;
3493 case TEMP_GLOBAL:
3494 case TEMP_LOCAL:
3495 new_type = TEMP_VAL_MEM;
3496 break;
3497 case TEMP_NORMAL:
3498 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3499 break;
3500 case TEMP_CONST:
3501 new_type = TEMP_VAL_CONST;
3502 break;
3503 default:
3504 g_assert_not_reached();
3506 if (ts->val_type == TEMP_VAL_REG) {
3507 s->reg_to_temp[ts->reg] = NULL;
3509 ts->val_type = new_type;
3512 /* Mark a temporary as dead. */
3513 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3515 temp_free_or_dead(s, ts, 1);
3518 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3519 registers needs to be allocated to store a constant. If 'free_or_dead'
3520 is non-zero, subsequently release the temporary; if it is positive, the
3521 temp is dead; if it is negative, the temp is free. */
3522 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3523 TCGRegSet preferred_regs, int free_or_dead)
3525 if (!temp_readonly(ts) && !ts->mem_coherent) {
3526 if (!ts->mem_allocated) {
3527 temp_allocate_frame(s, ts);
3529 switch (ts->val_type) {
3530 case TEMP_VAL_CONST:
3531 /* If we're going to free the temp immediately, then we won't
3532 require it later in a register, so attempt to store the
3533 constant to memory directly. */
3534 if (free_or_dead
3535 && tcg_out_sti(s, ts->type, ts->val,
3536 ts->mem_base->reg, ts->mem_offset)) {
3537 break;
3539 temp_load(s, ts, tcg_target_available_regs[ts->type],
3540 allocated_regs, preferred_regs);
3541 /* fallthrough */
3543 case TEMP_VAL_REG:
3544 tcg_out_st(s, ts->type, ts->reg,
3545 ts->mem_base->reg, ts->mem_offset);
3546 break;
3548 case TEMP_VAL_MEM:
3549 break;
3551 case TEMP_VAL_DEAD:
3552 default:
3553 tcg_abort();
3555 ts->mem_coherent = 1;
3557 if (free_or_dead) {
3558 temp_free_or_dead(s, ts, free_or_dead);
3562 /* free register 'reg' by spilling the corresponding temporary if necessary */
3563 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3565 TCGTemp *ts = s->reg_to_temp[reg];
3566 if (ts != NULL) {
3567 temp_sync(s, ts, allocated_regs, 0, -1);
3572 * tcg_reg_alloc:
3573 * @required_regs: Set of registers in which we must allocate.
3574 * @allocated_regs: Set of registers which must be avoided.
3575 * @preferred_regs: Set of registers we should prefer.
3576 * @rev: True if we search the registers in "indirect" order.
3578 * The allocated register must be in @required_regs & ~@allocated_regs,
3579 * but if we can put it in @preferred_regs we may save a move later.
3581 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3582 TCGRegSet allocated_regs,
3583 TCGRegSet preferred_regs, bool rev)
3585 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3586 TCGRegSet reg_ct[2];
3587 const int *order;
3589 reg_ct[1] = required_regs & ~allocated_regs;
3590 tcg_debug_assert(reg_ct[1] != 0);
3591 reg_ct[0] = reg_ct[1] & preferred_regs;
3593 /* Skip the preferred_regs option if it cannot be satisfied,
3594 or if the preference made no difference. */
3595 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3597 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3599 /* Try free registers, preferences first. */
3600 for (j = f; j < 2; j++) {
3601 TCGRegSet set = reg_ct[j];
3603 if (tcg_regset_single(set)) {
3604 /* One register in the set. */
3605 TCGReg reg = tcg_regset_first(set);
3606 if (s->reg_to_temp[reg] == NULL) {
3607 return reg;
3609 } else {
3610 for (i = 0; i < n; i++) {
3611 TCGReg reg = order[i];
3612 if (s->reg_to_temp[reg] == NULL &&
3613 tcg_regset_test_reg(set, reg)) {
3614 return reg;
3620 /* We must spill something. */
3621 for (j = f; j < 2; j++) {
3622 TCGRegSet set = reg_ct[j];
3624 if (tcg_regset_single(set)) {
3625 /* One register in the set. */
3626 TCGReg reg = tcg_regset_first(set);
3627 tcg_reg_free(s, reg, allocated_regs);
3628 return reg;
3629 } else {
3630 for (i = 0; i < n; i++) {
3631 TCGReg reg = order[i];
3632 if (tcg_regset_test_reg(set, reg)) {
3633 tcg_reg_free(s, reg, allocated_regs);
3634 return reg;
3640 tcg_abort();
3643 /* Make sure the temporary is in a register. If needed, allocate the register
3644 from DESIRED while avoiding ALLOCATED. */
3645 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3646 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3648 TCGReg reg;
3650 switch (ts->val_type) {
3651 case TEMP_VAL_REG:
3652 return;
3653 case TEMP_VAL_CONST:
3654 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3655 preferred_regs, ts->indirect_base);
3656 if (ts->type <= TCG_TYPE_I64) {
3657 tcg_out_movi(s, ts->type, reg, ts->val);
3658 } else {
3659 uint64_t val = ts->val;
3660 MemOp vece = MO_64;
3663 * Find the minimal vector element that matches the constant.
3664 * The targets will, in general, have to do this search anyway,
3665 * do this generically.
3667 if (val == dup_const(MO_8, val)) {
3668 vece = MO_8;
3669 } else if (val == dup_const(MO_16, val)) {
3670 vece = MO_16;
3671 } else if (val == dup_const(MO_32, val)) {
3672 vece = MO_32;
3675 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3677 ts->mem_coherent = 0;
3678 break;
3679 case TEMP_VAL_MEM:
3680 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3681 preferred_regs, ts->indirect_base);
3682 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3683 ts->mem_coherent = 1;
3684 break;
3685 case TEMP_VAL_DEAD:
3686 default:
3687 tcg_abort();
3689 ts->reg = reg;
3690 ts->val_type = TEMP_VAL_REG;
3691 s->reg_to_temp[reg] = ts;
3694 /* Save a temporary to memory. 'allocated_regs' is used in case a
3695 temporary registers needs to be allocated to store a constant. */
3696 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3698 /* The liveness analysis already ensures that globals are back
3699 in memory. Keep an tcg_debug_assert for safety. */
3700 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3703 /* save globals to their canonical location and assume they can be
3704 modified be the following code. 'allocated_regs' is used in case a
3705 temporary registers needs to be allocated to store a constant. */
3706 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3708 int i, n;
3710 for (i = 0, n = s->nb_globals; i < n; i++) {
3711 temp_save(s, &s->temps[i], allocated_regs);
3715 /* sync globals to their canonical location and assume they can be
3716 read by the following code. 'allocated_regs' is used in case a
3717 temporary registers needs to be allocated to store a constant. */
3718 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3720 int i, n;
3722 for (i = 0, n = s->nb_globals; i < n; i++) {
3723 TCGTemp *ts = &s->temps[i];
3724 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3725 || ts->kind == TEMP_FIXED
3726 || ts->mem_coherent);
3730 /* at the end of a basic block, we assume all temporaries are dead and
3731 all globals are stored at their canonical location. */
3732 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3734 int i;
3736 for (i = s->nb_globals; i < s->nb_temps; i++) {
3737 TCGTemp *ts = &s->temps[i];
3739 switch (ts->kind) {
3740 case TEMP_LOCAL:
3741 temp_save(s, ts, allocated_regs);
3742 break;
3743 case TEMP_NORMAL:
3744 /* The liveness analysis already ensures that temps are dead.
3745 Keep an tcg_debug_assert for safety. */
3746 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3747 break;
3748 case TEMP_CONST:
3749 /* Similarly, we should have freed any allocated register. */
3750 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3751 break;
3752 default:
3753 g_assert_not_reached();
3757 save_globals(s, allocated_regs);
3761 * At a conditional branch, we assume all temporaries are dead and
3762 * all globals and local temps are synced to their location.
3764 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3766 sync_globals(s, allocated_regs);
3768 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3769 TCGTemp *ts = &s->temps[i];
3771 * The liveness analysis already ensures that temps are dead.
3772 * Keep tcg_debug_asserts for safety.
3774 switch (ts->kind) {
3775 case TEMP_LOCAL:
3776 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3777 break;
3778 case TEMP_NORMAL:
3779 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3780 break;
3781 case TEMP_CONST:
3782 break;
3783 default:
3784 g_assert_not_reached();
3790 * Specialized code generation for INDEX_op_mov_* with a constant.
3792 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3793 tcg_target_ulong val, TCGLifeData arg_life,
3794 TCGRegSet preferred_regs)
3796 /* ENV should not be modified. */
3797 tcg_debug_assert(!temp_readonly(ots));
3799 /* The movi is not explicitly generated here. */
3800 if (ots->val_type == TEMP_VAL_REG) {
3801 s->reg_to_temp[ots->reg] = NULL;
3803 ots->val_type = TEMP_VAL_CONST;
3804 ots->val = val;
3805 ots->mem_coherent = 0;
3806 if (NEED_SYNC_ARG(0)) {
3807 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3808 } else if (IS_DEAD_ARG(0)) {
3809 temp_dead(s, ots);
3814 * Specialized code generation for INDEX_op_mov_*.
3816 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3818 const TCGLifeData arg_life = op->life;
3819 TCGRegSet allocated_regs, preferred_regs;
3820 TCGTemp *ts, *ots;
3821 TCGType otype, itype;
3823 allocated_regs = s->reserved_regs;
3824 preferred_regs = op->output_pref[0];
3825 ots = arg_temp(op->args[0]);
3826 ts = arg_temp(op->args[1]);
3828 /* ENV should not be modified. */
3829 tcg_debug_assert(!temp_readonly(ots));
3831 /* Note that otype != itype for no-op truncation. */
3832 otype = ots->type;
3833 itype = ts->type;
3835 if (ts->val_type == TEMP_VAL_CONST) {
3836 /* propagate constant or generate sti */
3837 tcg_target_ulong val = ts->val;
3838 if (IS_DEAD_ARG(1)) {
3839 temp_dead(s, ts);
3841 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3842 return;
3845 /* If the source value is in memory we're going to be forced
3846 to have it in a register in order to perform the copy. Copy
3847 the SOURCE value into its own register first, that way we
3848 don't have to reload SOURCE the next time it is used. */
3849 if (ts->val_type == TEMP_VAL_MEM) {
3850 temp_load(s, ts, tcg_target_available_regs[itype],
3851 allocated_regs, preferred_regs);
3854 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3855 if (IS_DEAD_ARG(0)) {
3856 /* mov to a non-saved dead register makes no sense (even with
3857 liveness analysis disabled). */
3858 tcg_debug_assert(NEED_SYNC_ARG(0));
3859 if (!ots->mem_allocated) {
3860 temp_allocate_frame(s, ots);
3862 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3863 if (IS_DEAD_ARG(1)) {
3864 temp_dead(s, ts);
3866 temp_dead(s, ots);
3867 } else {
3868 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3869 /* the mov can be suppressed */
3870 if (ots->val_type == TEMP_VAL_REG) {
3871 s->reg_to_temp[ots->reg] = NULL;
3873 ots->reg = ts->reg;
3874 temp_dead(s, ts);
3875 } else {
3876 if (ots->val_type != TEMP_VAL_REG) {
3877 /* When allocating a new register, make sure to not spill the
3878 input one. */
3879 tcg_regset_set_reg(allocated_regs, ts->reg);
3880 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3881 allocated_regs, preferred_regs,
3882 ots->indirect_base);
3884 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3886 * Cross register class move not supported.
3887 * Store the source register into the destination slot
3888 * and leave the destination temp as TEMP_VAL_MEM.
3890 assert(!temp_readonly(ots));
3891 if (!ts->mem_allocated) {
3892 temp_allocate_frame(s, ots);
3894 tcg_out_st(s, ts->type, ts->reg,
3895 ots->mem_base->reg, ots->mem_offset);
3896 ots->mem_coherent = 1;
3897 temp_free_or_dead(s, ots, -1);
3898 return;
3901 ots->val_type = TEMP_VAL_REG;
3902 ots->mem_coherent = 0;
3903 s->reg_to_temp[ots->reg] = ots;
3904 if (NEED_SYNC_ARG(0)) {
3905 temp_sync(s, ots, allocated_regs, 0, 0);
3911 * Specialized code generation for INDEX_op_dup_vec.
3913 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3915 const TCGLifeData arg_life = op->life;
3916 TCGRegSet dup_out_regs, dup_in_regs;
3917 TCGTemp *its, *ots;
3918 TCGType itype, vtype;
3919 intptr_t endian_fixup;
3920 unsigned vece;
3921 bool ok;
3923 ots = arg_temp(op->args[0]);
3924 its = arg_temp(op->args[1]);
3926 /* ENV should not be modified. */
3927 tcg_debug_assert(!temp_readonly(ots));
3929 itype = its->type;
3930 vece = TCGOP_VECE(op);
3931 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3933 if (its->val_type == TEMP_VAL_CONST) {
3934 /* Propagate constant via movi -> dupi. */
3935 tcg_target_ulong val = its->val;
3936 if (IS_DEAD_ARG(1)) {
3937 temp_dead(s, its);
3939 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3940 return;
3943 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3944 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3946 /* Allocate the output register now. */
3947 if (ots->val_type != TEMP_VAL_REG) {
3948 TCGRegSet allocated_regs = s->reserved_regs;
3950 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3951 /* Make sure to not spill the input register. */
3952 tcg_regset_set_reg(allocated_regs, its->reg);
3954 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3955 op->output_pref[0], ots->indirect_base);
3956 ots->val_type = TEMP_VAL_REG;
3957 ots->mem_coherent = 0;
3958 s->reg_to_temp[ots->reg] = ots;
3961 switch (its->val_type) {
3962 case TEMP_VAL_REG:
3964 * The dup constriaints must be broad, covering all possible VECE.
3965 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3966 * to fail, indicating that extra moves are required for that case.
3968 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3969 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3970 goto done;
3972 /* Try again from memory or a vector input register. */
3974 if (!its->mem_coherent) {
3976 * The input register is not synced, and so an extra store
3977 * would be required to use memory. Attempt an integer-vector
3978 * register move first. We do not have a TCGRegSet for this.
3980 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3981 break;
3983 /* Sync the temp back to its slot and load from there. */
3984 temp_sync(s, its, s->reserved_regs, 0, 0);
3986 /* fall through */
3988 case TEMP_VAL_MEM:
3989 #ifdef HOST_WORDS_BIGENDIAN
3990 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3991 endian_fixup -= 1 << vece;
3992 #else
3993 endian_fixup = 0;
3994 #endif
3995 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3996 its->mem_offset + endian_fixup)) {
3997 goto done;
3999 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4000 break;
4002 default:
4003 g_assert_not_reached();
4006 /* We now have a vector input register, so dup must succeed. */
4007 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4008 tcg_debug_assert(ok);
4010 done:
4011 if (IS_DEAD_ARG(1)) {
4012 temp_dead(s, its);
4014 if (NEED_SYNC_ARG(0)) {
4015 temp_sync(s, ots, s->reserved_regs, 0, 0);
4017 if (IS_DEAD_ARG(0)) {
4018 temp_dead(s, ots);
4022 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4024 const TCGLifeData arg_life = op->life;
4025 const TCGOpDef * const def = &tcg_op_defs[op->opc];
4026 TCGRegSet i_allocated_regs;
4027 TCGRegSet o_allocated_regs;
4028 int i, k, nb_iargs, nb_oargs;
4029 TCGReg reg;
4030 TCGArg arg;
4031 const TCGArgConstraint *arg_ct;
4032 TCGTemp *ts;
4033 TCGArg new_args[TCG_MAX_OP_ARGS];
4034 int const_args[TCG_MAX_OP_ARGS];
4036 nb_oargs = def->nb_oargs;
4037 nb_iargs = def->nb_iargs;
4039 /* copy constants */
4040 memcpy(new_args + nb_oargs + nb_iargs,
4041 op->args + nb_oargs + nb_iargs,
4042 sizeof(TCGArg) * def->nb_cargs);
4044 i_allocated_regs = s->reserved_regs;
4045 o_allocated_regs = s->reserved_regs;
4047 /* satisfy input constraints */
4048 for (k = 0; k < nb_iargs; k++) {
4049 TCGRegSet i_preferred_regs, o_preferred_regs;
4051 i = def->args_ct[nb_oargs + k].sort_index;
4052 arg = op->args[i];
4053 arg_ct = &def->args_ct[i];
4054 ts = arg_temp(arg);
4056 if (ts->val_type == TEMP_VAL_CONST
4057 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
4058 /* constant is OK for instruction */
4059 const_args[i] = 1;
4060 new_args[i] = ts->val;
4061 continue;
4064 i_preferred_regs = o_preferred_regs = 0;
4065 if (arg_ct->ialias) {
4066 o_preferred_regs = op->output_pref[arg_ct->alias_index];
4069 * If the input is readonly, then it cannot also be an
4070 * output and aliased to itself. If the input is not
4071 * dead after the instruction, we must allocate a new
4072 * register and move it.
4074 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4075 goto allocate_in_reg;
4079 * Check if the current register has already been allocated
4080 * for another input aliased to an output.
4082 if (ts->val_type == TEMP_VAL_REG) {
4083 reg = ts->reg;
4084 for (int k2 = 0; k2 < k; k2++) {
4085 int i2 = def->args_ct[nb_oargs + k2].sort_index;
4086 if (def->args_ct[i2].ialias && reg == new_args[i2]) {
4087 goto allocate_in_reg;
4091 i_preferred_regs = o_preferred_regs;
4094 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
4095 reg = ts->reg;
4097 if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
4098 allocate_in_reg:
4100 * Allocate a new register matching the constraint
4101 * and move the temporary register into it.
4103 temp_load(s, ts, tcg_target_available_regs[ts->type],
4104 i_allocated_regs, 0);
4105 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
4106 o_preferred_regs, ts->indirect_base);
4107 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4109 * Cross register class move not supported. Sync the
4110 * temp back to its slot and load from there.
4112 temp_sync(s, ts, i_allocated_regs, 0, 0);
4113 tcg_out_ld(s, ts->type, reg,
4114 ts->mem_base->reg, ts->mem_offset);
4117 new_args[i] = reg;
4118 const_args[i] = 0;
4119 tcg_regset_set_reg(i_allocated_regs, reg);
4122 /* mark dead temporaries and free the associated registers */
4123 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4124 if (IS_DEAD_ARG(i)) {
4125 temp_dead(s, arg_temp(op->args[i]));
4129 if (def->flags & TCG_OPF_COND_BRANCH) {
4130 tcg_reg_alloc_cbranch(s, i_allocated_regs);
4131 } else if (def->flags & TCG_OPF_BB_END) {
4132 tcg_reg_alloc_bb_end(s, i_allocated_regs);
4133 } else {
4134 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4135 /* XXX: permit generic clobber register list ? */
4136 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4137 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4138 tcg_reg_free(s, i, i_allocated_regs);
4142 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4143 /* sync globals if the op has side effects and might trigger
4144 an exception. */
4145 sync_globals(s, i_allocated_regs);
4148 /* satisfy the output constraints */
4149 for(k = 0; k < nb_oargs; k++) {
4150 i = def->args_ct[k].sort_index;
4151 arg = op->args[i];
4152 arg_ct = &def->args_ct[i];
4153 ts = arg_temp(arg);
4155 /* ENV should not be modified. */
4156 tcg_debug_assert(!temp_readonly(ts));
4158 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4159 reg = new_args[arg_ct->alias_index];
4160 } else if (arg_ct->newreg) {
4161 reg = tcg_reg_alloc(s, arg_ct->regs,
4162 i_allocated_regs | o_allocated_regs,
4163 op->output_pref[k], ts->indirect_base);
4164 } else {
4165 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4166 op->output_pref[k], ts->indirect_base);
4168 tcg_regset_set_reg(o_allocated_regs, reg);
4169 if (ts->val_type == TEMP_VAL_REG) {
4170 s->reg_to_temp[ts->reg] = NULL;
4172 ts->val_type = TEMP_VAL_REG;
4173 ts->reg = reg;
4175 * Temp value is modified, so the value kept in memory is
4176 * potentially not the same.
4178 ts->mem_coherent = 0;
4179 s->reg_to_temp[reg] = ts;
4180 new_args[i] = reg;
4184 /* emit instruction */
4185 if (def->flags & TCG_OPF_VECTOR) {
4186 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4187 new_args, const_args);
4188 } else {
4189 tcg_out_op(s, op->opc, new_args, const_args);
4192 /* move the outputs in the correct register if needed */
4193 for(i = 0; i < nb_oargs; i++) {
4194 ts = arg_temp(op->args[i]);
4196 /* ENV should not be modified. */
4197 tcg_debug_assert(!temp_readonly(ts));
4199 if (NEED_SYNC_ARG(i)) {
4200 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4201 } else if (IS_DEAD_ARG(i)) {
4202 temp_dead(s, ts);
4207 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4209 const TCGLifeData arg_life = op->life;
4210 TCGTemp *ots, *itsl, *itsh;
4211 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4213 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4214 tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4215 tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4217 ots = arg_temp(op->args[0]);
4218 itsl = arg_temp(op->args[1]);
4219 itsh = arg_temp(op->args[2]);
4221 /* ENV should not be modified. */
4222 tcg_debug_assert(!temp_readonly(ots));
4224 /* Allocate the output register now. */
4225 if (ots->val_type != TEMP_VAL_REG) {
4226 TCGRegSet allocated_regs = s->reserved_regs;
4227 TCGRegSet dup_out_regs =
4228 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4230 /* Make sure to not spill the input registers. */
4231 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4232 tcg_regset_set_reg(allocated_regs, itsl->reg);
4234 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4235 tcg_regset_set_reg(allocated_regs, itsh->reg);
4238 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4239 op->output_pref[0], ots->indirect_base);
4240 ots->val_type = TEMP_VAL_REG;
4241 ots->mem_coherent = 0;
4242 s->reg_to_temp[ots->reg] = ots;
4245 /* Promote dup2 of immediates to dupi_vec. */
4246 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4247 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4248 MemOp vece = MO_64;
4250 if (val == dup_const(MO_8, val)) {
4251 vece = MO_8;
4252 } else if (val == dup_const(MO_16, val)) {
4253 vece = MO_16;
4254 } else if (val == dup_const(MO_32, val)) {
4255 vece = MO_32;
4258 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4259 goto done;
4262 /* If the two inputs form one 64-bit value, try dupm_vec. */
4263 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4264 if (!itsl->mem_coherent) {
4265 temp_sync(s, itsl, s->reserved_regs, 0, 0);
4267 if (!itsh->mem_coherent) {
4268 temp_sync(s, itsh, s->reserved_regs, 0, 0);
4270 #ifdef HOST_WORDS_BIGENDIAN
4271 TCGTemp *its = itsh;
4272 #else
4273 TCGTemp *its = itsl;
4274 #endif
4275 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4276 its->mem_base->reg, its->mem_offset)) {
4277 goto done;
4281 /* Fall back to generic expansion. */
4282 return false;
4284 done:
4285 if (IS_DEAD_ARG(1)) {
4286 temp_dead(s, itsl);
4288 if (IS_DEAD_ARG(2)) {
4289 temp_dead(s, itsh);
4291 if (NEED_SYNC_ARG(0)) {
4292 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4293 } else if (IS_DEAD_ARG(0)) {
4294 temp_dead(s, ots);
4296 return true;
4299 #ifdef TCG_TARGET_STACK_GROWSUP
4300 #define STACK_DIR(x) (-(x))
4301 #else
4302 #define STACK_DIR(x) (x)
4303 #endif
4305 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4307 const int nb_oargs = TCGOP_CALLO(op);
4308 const int nb_iargs = TCGOP_CALLI(op);
4309 const TCGLifeData arg_life = op->life;
4310 int flags, nb_regs, i;
4311 TCGReg reg;
4312 TCGArg arg;
4313 TCGTemp *ts;
4314 intptr_t stack_offset;
4315 size_t call_stack_size;
4316 tcg_insn_unit *func_addr;
4317 int allocate_args;
4318 TCGRegSet allocated_regs;
4320 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4321 flags = op->args[nb_oargs + nb_iargs + 1];
4323 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4324 if (nb_regs > nb_iargs) {
4325 nb_regs = nb_iargs;
4328 /* assign stack slots first */
4329 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4330 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4331 ~(TCG_TARGET_STACK_ALIGN - 1);
4332 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4333 if (allocate_args) {
4334 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4335 preallocate call stack */
4336 tcg_abort();
4339 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4340 for (i = nb_regs; i < nb_iargs; i++) {
4341 arg = op->args[nb_oargs + i];
4342 #ifdef TCG_TARGET_STACK_GROWSUP
4343 stack_offset -= sizeof(tcg_target_long);
4344 #endif
4345 if (arg != TCG_CALL_DUMMY_ARG) {
4346 ts = arg_temp(arg);
4347 temp_load(s, ts, tcg_target_available_regs[ts->type],
4348 s->reserved_regs, 0);
4349 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4351 #ifndef TCG_TARGET_STACK_GROWSUP
4352 stack_offset += sizeof(tcg_target_long);
4353 #endif
4356 /* assign input registers */
4357 allocated_regs = s->reserved_regs;
4358 for (i = 0; i < nb_regs; i++) {
4359 arg = op->args[nb_oargs + i];
4360 if (arg != TCG_CALL_DUMMY_ARG) {
4361 ts = arg_temp(arg);
4362 reg = tcg_target_call_iarg_regs[i];
4364 if (ts->val_type == TEMP_VAL_REG) {
4365 if (ts->reg != reg) {
4366 tcg_reg_free(s, reg, allocated_regs);
4367 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4369 * Cross register class move not supported. Sync the
4370 * temp back to its slot and load from there.
4372 temp_sync(s, ts, allocated_regs, 0, 0);
4373 tcg_out_ld(s, ts->type, reg,
4374 ts->mem_base->reg, ts->mem_offset);
4377 } else {
4378 TCGRegSet arg_set = 0;
4380 tcg_reg_free(s, reg, allocated_regs);
4381 tcg_regset_set_reg(arg_set, reg);
4382 temp_load(s, ts, arg_set, allocated_regs, 0);
4385 tcg_regset_set_reg(allocated_regs, reg);
4389 /* mark dead temporaries and free the associated registers */
4390 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4391 if (IS_DEAD_ARG(i)) {
4392 temp_dead(s, arg_temp(op->args[i]));
4396 /* clobber call registers */
4397 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4398 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4399 tcg_reg_free(s, i, allocated_regs);
4403 /* Save globals if they might be written by the helper, sync them if
4404 they might be read. */
4405 if (flags & TCG_CALL_NO_READ_GLOBALS) {
4406 /* Nothing to do */
4407 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4408 sync_globals(s, allocated_regs);
4409 } else {
4410 save_globals(s, allocated_regs);
4413 tcg_out_call(s, func_addr);
4415 /* assign output registers and emit moves if needed */
4416 for(i = 0; i < nb_oargs; i++) {
4417 arg = op->args[i];
4418 ts = arg_temp(arg);
4420 /* ENV should not be modified. */
4421 tcg_debug_assert(!temp_readonly(ts));
4423 reg = tcg_target_call_oarg_regs[i];
4424 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4425 if (ts->val_type == TEMP_VAL_REG) {
4426 s->reg_to_temp[ts->reg] = NULL;
4428 ts->val_type = TEMP_VAL_REG;
4429 ts->reg = reg;
4430 ts->mem_coherent = 0;
4431 s->reg_to_temp[reg] = ts;
4432 if (NEED_SYNC_ARG(i)) {
4433 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4434 } else if (IS_DEAD_ARG(i)) {
4435 temp_dead(s, ts);
4440 #ifdef CONFIG_PROFILER
4442 /* avoid copy/paste errors */
4443 #define PROF_ADD(to, from, field) \
4444 do { \
4445 (to)->field += qatomic_read(&((from)->field)); \
4446 } while (0)
4448 #define PROF_MAX(to, from, field) \
4449 do { \
4450 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4451 if (val__ > (to)->field) { \
4452 (to)->field = val__; \
4454 } while (0)
4456 /* Pass in a zero'ed @prof */
4457 static inline
4458 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4460 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4461 unsigned int i;
4463 for (i = 0; i < n_ctxs; i++) {
4464 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4465 const TCGProfile *orig = &s->prof;
4467 if (counters) {
4468 PROF_ADD(prof, orig, cpu_exec_time);
4469 PROF_ADD(prof, orig, tb_count1);
4470 PROF_ADD(prof, orig, tb_count);
4471 PROF_ADD(prof, orig, op_count);
4472 PROF_MAX(prof, orig, op_count_max);
4473 PROF_ADD(prof, orig, temp_count);
4474 PROF_MAX(prof, orig, temp_count_max);
4475 PROF_ADD(prof, orig, del_op_count);
4476 PROF_ADD(prof, orig, code_in_len);
4477 PROF_ADD(prof, orig, code_out_len);
4478 PROF_ADD(prof, orig, search_out_len);
4479 PROF_ADD(prof, orig, interm_time);
4480 PROF_ADD(prof, orig, code_time);
4481 PROF_ADD(prof, orig, la_time);
4482 PROF_ADD(prof, orig, opt_time);
4483 PROF_ADD(prof, orig, restore_count);
4484 PROF_ADD(prof, orig, restore_time);
4486 if (table) {
4487 int i;
4489 for (i = 0; i < NB_OPS; i++) {
4490 PROF_ADD(prof, orig, table_op_count[i]);
4496 #undef PROF_ADD
4497 #undef PROF_MAX
4499 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4501 tcg_profile_snapshot(prof, true, false);
4504 static void tcg_profile_snapshot_table(TCGProfile *prof)
4506 tcg_profile_snapshot(prof, false, true);
4509 void tcg_dump_op_count(void)
4511 TCGProfile prof = {};
4512 int i;
4514 tcg_profile_snapshot_table(&prof);
4515 for (i = 0; i < NB_OPS; i++) {
4516 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4517 prof.table_op_count[i]);
4521 int64_t tcg_cpu_exec_time(void)
4523 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4524 unsigned int i;
4525 int64_t ret = 0;
4527 for (i = 0; i < n_ctxs; i++) {
4528 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4529 const TCGProfile *prof = &s->prof;
4531 ret += qatomic_read(&prof->cpu_exec_time);
4533 return ret;
4535 #else
4536 void tcg_dump_op_count(void)
4538 qemu_printf("[TCG profiler not compiled]\n");
4541 int64_t tcg_cpu_exec_time(void)
4543 error_report("%s: TCG profiler not compiled", __func__);
4544 exit(EXIT_FAILURE);
4546 #endif
4549 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4551 #ifdef CONFIG_PROFILER
4552 TCGProfile *prof = &s->prof;
4553 #endif
4554 int i, num_insns;
4555 TCGOp *op;
4557 #ifdef CONFIG_PROFILER
4559 int n = 0;
4561 QTAILQ_FOREACH(op, &s->ops, link) {
4562 n++;
4564 qatomic_set(&prof->op_count, prof->op_count + n);
4565 if (n > prof->op_count_max) {
4566 qatomic_set(&prof->op_count_max, n);
4569 n = s->nb_temps;
4570 qatomic_set(&prof->temp_count, prof->temp_count + n);
4571 if (n > prof->temp_count_max) {
4572 qatomic_set(&prof->temp_count_max, n);
4575 #endif
4577 #ifdef DEBUG_DISAS
4578 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4579 && qemu_log_in_addr_range(tb->pc))) {
4580 FILE *logfile = qemu_log_lock();
4581 qemu_log("OP:\n");
4582 tcg_dump_ops(s, false);
4583 qemu_log("\n");
4584 qemu_log_unlock(logfile);
4586 #endif
4588 #ifdef CONFIG_DEBUG_TCG
4589 /* Ensure all labels referenced have been emitted. */
4591 TCGLabel *l;
4592 bool error = false;
4594 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4595 if (unlikely(!l->present) && l->refs) {
4596 qemu_log_mask(CPU_LOG_TB_OP,
4597 "$L%d referenced but not present.\n", l->id);
4598 error = true;
4601 assert(!error);
4603 #endif
4605 #ifdef CONFIG_PROFILER
4606 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4607 #endif
4609 #ifdef USE_TCG_OPTIMIZATIONS
4610 tcg_optimize(s);
4611 #endif
4613 #ifdef CONFIG_PROFILER
4614 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4615 qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4616 #endif
4618 reachable_code_pass(s);
4619 liveness_pass_1(s);
4621 if (s->nb_indirects > 0) {
4622 #ifdef DEBUG_DISAS
4623 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4624 && qemu_log_in_addr_range(tb->pc))) {
4625 FILE *logfile = qemu_log_lock();
4626 qemu_log("OP before indirect lowering:\n");
4627 tcg_dump_ops(s, false);
4628 qemu_log("\n");
4629 qemu_log_unlock(logfile);
4631 #endif
4632 /* Replace indirect temps with direct temps. */
4633 if (liveness_pass_2(s)) {
4634 /* If changes were made, re-run liveness. */
4635 liveness_pass_1(s);
4639 #ifdef CONFIG_PROFILER
4640 qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4641 #endif
4643 #ifdef DEBUG_DISAS
4644 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4645 && qemu_log_in_addr_range(tb->pc))) {
4646 FILE *logfile = qemu_log_lock();
4647 qemu_log("OP after optimization and liveness analysis:\n");
4648 tcg_dump_ops(s, true);
4649 qemu_log("\n");
4650 qemu_log_unlock(logfile);
4652 #endif
4654 tcg_reg_alloc_start(s);
4657 * Reset the buffer pointers when restarting after overflow.
4658 * TODO: Move this into translate-all.c with the rest of the
4659 * buffer management. Having only this done here is confusing.
4661 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4662 s->code_ptr = s->code_buf;
4664 #ifdef TCG_TARGET_NEED_LDST_LABELS
4665 QSIMPLEQ_INIT(&s->ldst_labels);
4666 #endif
4667 #ifdef TCG_TARGET_NEED_POOL_LABELS
4668 s->pool_labels = NULL;
4669 #endif
4671 num_insns = -1;
4672 QTAILQ_FOREACH(op, &s->ops, link) {
4673 TCGOpcode opc = op->opc;
4675 #ifdef CONFIG_PROFILER
4676 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4677 #endif
4679 switch (opc) {
4680 case INDEX_op_mov_i32:
4681 case INDEX_op_mov_i64:
4682 case INDEX_op_mov_vec:
4683 tcg_reg_alloc_mov(s, op);
4684 break;
4685 case INDEX_op_dup_vec:
4686 tcg_reg_alloc_dup(s, op);
4687 break;
4688 case INDEX_op_insn_start:
4689 if (num_insns >= 0) {
4690 size_t off = tcg_current_code_size(s);
4691 s->gen_insn_end_off[num_insns] = off;
4692 /* Assert that we do not overflow our stored offset. */
4693 assert(s->gen_insn_end_off[num_insns] == off);
4695 num_insns++;
4696 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4697 target_ulong a;
4698 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4699 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4700 #else
4701 a = op->args[i];
4702 #endif
4703 s->gen_insn_data[num_insns][i] = a;
4705 break;
4706 case INDEX_op_discard:
4707 temp_dead(s, arg_temp(op->args[0]));
4708 break;
4709 case INDEX_op_set_label:
4710 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4711 tcg_out_label(s, arg_label(op->args[0]));
4712 break;
4713 case INDEX_op_call:
4714 tcg_reg_alloc_call(s, op);
4715 break;
4716 case INDEX_op_dup2_vec:
4717 if (tcg_reg_alloc_dup2(s, op)) {
4718 break;
4720 /* fall through */
4721 default:
4722 /* Sanity check that we've not introduced any unhandled opcodes. */
4723 tcg_debug_assert(tcg_op_supported(opc));
4724 /* Note: in order to speed up the code, it would be much
4725 faster to have specialized register allocator functions for
4726 some common argument patterns */
4727 tcg_reg_alloc_op(s, op);
4728 break;
4730 #ifdef CONFIG_DEBUG_TCG
4731 check_regs(s);
4732 #endif
4733 /* Test for (pending) buffer overflow. The assumption is that any
4734 one operation beginning below the high water mark cannot overrun
4735 the buffer completely. Thus we can test for overflow after
4736 generating code without having to check during generation. */
4737 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4738 return -1;
4740 /* Test for TB overflow, as seen by gen_insn_end_off. */
4741 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4742 return -2;
4745 tcg_debug_assert(num_insns >= 0);
4746 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4748 /* Generate TB finalization at the end of block */
4749 #ifdef TCG_TARGET_NEED_LDST_LABELS
4750 i = tcg_out_ldst_finalize(s);
4751 if (i < 0) {
4752 return i;
4754 #endif
4755 #ifdef TCG_TARGET_NEED_POOL_LABELS
4756 i = tcg_out_pool_finalize(s);
4757 if (i < 0) {
4758 return i;
4760 #endif
4761 if (!tcg_resolve_relocs(s)) {
4762 return -2;
4765 #ifndef CONFIG_TCG_INTERPRETER
4766 /* flush instruction cache */
4767 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4768 (uintptr_t)s->code_buf,
4769 tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4770 #endif
4772 return tcg_current_code_size(s);
4775 #ifdef CONFIG_PROFILER
4776 void tcg_dump_info(void)
4778 TCGProfile prof = {};
4779 const TCGProfile *s;
4780 int64_t tb_count;
4781 int64_t tb_div_count;
4782 int64_t tot;
4784 tcg_profile_snapshot_counters(&prof);
4785 s = &prof;
4786 tb_count = s->tb_count;
4787 tb_div_count = tb_count ? tb_count : 1;
4788 tot = s->interm_time + s->code_time;
4790 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4791 tot, tot / 2.4e9);
4792 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4793 " %0.1f%%)\n",
4794 tb_count, s->tb_count1 - tb_count,
4795 (double)(s->tb_count1 - s->tb_count)
4796 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4797 qemu_printf("avg ops/TB %0.1f max=%d\n",
4798 (double)s->op_count / tb_div_count, s->op_count_max);
4799 qemu_printf("deleted ops/TB %0.2f\n",
4800 (double)s->del_op_count / tb_div_count);
4801 qemu_printf("avg temps/TB %0.2f max=%d\n",
4802 (double)s->temp_count / tb_div_count, s->temp_count_max);
4803 qemu_printf("avg host code/TB %0.1f\n",
4804 (double)s->code_out_len / tb_div_count);
4805 qemu_printf("avg search data/TB %0.1f\n",
4806 (double)s->search_out_len / tb_div_count);
4808 qemu_printf("cycles/op %0.1f\n",
4809 s->op_count ? (double)tot / s->op_count : 0);
4810 qemu_printf("cycles/in byte %0.1f\n",
4811 s->code_in_len ? (double)tot / s->code_in_len : 0);
4812 qemu_printf("cycles/out byte %0.1f\n",
4813 s->code_out_len ? (double)tot / s->code_out_len : 0);
4814 qemu_printf("cycles/search byte %0.1f\n",
4815 s->search_out_len ? (double)tot / s->search_out_len : 0);
4816 if (tot == 0) {
4817 tot = 1;
4819 qemu_printf(" gen_interm time %0.1f%%\n",
4820 (double)s->interm_time / tot * 100.0);
4821 qemu_printf(" gen_code time %0.1f%%\n",
4822 (double)s->code_time / tot * 100.0);
4823 qemu_printf("optim./code time %0.1f%%\n",
4824 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4825 * 100.0);
4826 qemu_printf("liveness/code time %0.1f%%\n",
4827 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4828 qemu_printf("cpu_restore count %" PRId64 "\n",
4829 s->restore_count);
4830 qemu_printf(" avg cycles %0.1f\n",
4831 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4833 #else
4834 void tcg_dump_info(void)
4836 qemu_printf("[TCG profiler not compiled]\n");
4838 #endif
4840 #ifdef ELF_HOST_MACHINE
4841 /* In order to use this feature, the backend needs to do three things:
4843 (1) Define ELF_HOST_MACHINE to indicate both what value to
4844 put into the ELF image and to indicate support for the feature.
4846 (2) Define tcg_register_jit. This should create a buffer containing
4847 the contents of a .debug_frame section that describes the post-
4848 prologue unwind info for the tcg machine.
4850 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4853 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4854 typedef enum {
4855 JIT_NOACTION = 0,
4856 JIT_REGISTER_FN,
4857 JIT_UNREGISTER_FN
4858 } jit_actions_t;
4860 struct jit_code_entry {
4861 struct jit_code_entry *next_entry;
4862 struct jit_code_entry *prev_entry;
4863 const void *symfile_addr;
4864 uint64_t symfile_size;
4867 struct jit_descriptor {
4868 uint32_t version;
4869 uint32_t action_flag;
4870 struct jit_code_entry *relevant_entry;
4871 struct jit_code_entry *first_entry;
4874 void __jit_debug_register_code(void) __attribute__((noinline));
4875 void __jit_debug_register_code(void)
4877 asm("");
4880 /* Must statically initialize the version, because GDB may check
4881 the version before we can set it. */
4882 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4884 /* End GDB interface. */
4886 static int find_string(const char *strtab, const char *str)
4888 const char *p = strtab + 1;
4890 while (1) {
4891 if (strcmp(p, str) == 0) {
4892 return p - strtab;
4894 p += strlen(p) + 1;
4898 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4899 const void *debug_frame,
4900 size_t debug_frame_size)
4902 struct __attribute__((packed)) DebugInfo {
4903 uint32_t len;
4904 uint16_t version;
4905 uint32_t abbrev;
4906 uint8_t ptr_size;
4907 uint8_t cu_die;
4908 uint16_t cu_lang;
4909 uintptr_t cu_low_pc;
4910 uintptr_t cu_high_pc;
4911 uint8_t fn_die;
4912 char fn_name[16];
4913 uintptr_t fn_low_pc;
4914 uintptr_t fn_high_pc;
4915 uint8_t cu_eoc;
4918 struct ElfImage {
4919 ElfW(Ehdr) ehdr;
4920 ElfW(Phdr) phdr;
4921 ElfW(Shdr) shdr[7];
4922 ElfW(Sym) sym[2];
4923 struct DebugInfo di;
4924 uint8_t da[24];
4925 char str[80];
4928 struct ElfImage *img;
4930 static const struct ElfImage img_template = {
4931 .ehdr = {
4932 .e_ident[EI_MAG0] = ELFMAG0,
4933 .e_ident[EI_MAG1] = ELFMAG1,
4934 .e_ident[EI_MAG2] = ELFMAG2,
4935 .e_ident[EI_MAG3] = ELFMAG3,
4936 .e_ident[EI_CLASS] = ELF_CLASS,
4937 .e_ident[EI_DATA] = ELF_DATA,
4938 .e_ident[EI_VERSION] = EV_CURRENT,
4939 .e_type = ET_EXEC,
4940 .e_machine = ELF_HOST_MACHINE,
4941 .e_version = EV_CURRENT,
4942 .e_phoff = offsetof(struct ElfImage, phdr),
4943 .e_shoff = offsetof(struct ElfImage, shdr),
4944 .e_ehsize = sizeof(ElfW(Shdr)),
4945 .e_phentsize = sizeof(ElfW(Phdr)),
4946 .e_phnum = 1,
4947 .e_shentsize = sizeof(ElfW(Shdr)),
4948 .e_shnum = ARRAY_SIZE(img->shdr),
4949 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4950 #ifdef ELF_HOST_FLAGS
4951 .e_flags = ELF_HOST_FLAGS,
4952 #endif
4953 #ifdef ELF_OSABI
4954 .e_ident[EI_OSABI] = ELF_OSABI,
4955 #endif
4957 .phdr = {
4958 .p_type = PT_LOAD,
4959 .p_flags = PF_X,
4961 .shdr = {
4962 [0] = { .sh_type = SHT_NULL },
4963 /* Trick: The contents of code_gen_buffer are not present in
4964 this fake ELF file; that got allocated elsewhere. Therefore
4965 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4966 will not look for contents. We can record any address. */
4967 [1] = { /* .text */
4968 .sh_type = SHT_NOBITS,
4969 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4971 [2] = { /* .debug_info */
4972 .sh_type = SHT_PROGBITS,
4973 .sh_offset = offsetof(struct ElfImage, di),
4974 .sh_size = sizeof(struct DebugInfo),
4976 [3] = { /* .debug_abbrev */
4977 .sh_type = SHT_PROGBITS,
4978 .sh_offset = offsetof(struct ElfImage, da),
4979 .sh_size = sizeof(img->da),
4981 [4] = { /* .debug_frame */
4982 .sh_type = SHT_PROGBITS,
4983 .sh_offset = sizeof(struct ElfImage),
4985 [5] = { /* .symtab */
4986 .sh_type = SHT_SYMTAB,
4987 .sh_offset = offsetof(struct ElfImage, sym),
4988 .sh_size = sizeof(img->sym),
4989 .sh_info = 1,
4990 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4991 .sh_entsize = sizeof(ElfW(Sym)),
4993 [6] = { /* .strtab */
4994 .sh_type = SHT_STRTAB,
4995 .sh_offset = offsetof(struct ElfImage, str),
4996 .sh_size = sizeof(img->str),
4999 .sym = {
5000 [1] = { /* code_gen_buffer */
5001 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5002 .st_shndx = 1,
5005 .di = {
5006 .len = sizeof(struct DebugInfo) - 4,
5007 .version = 2,
5008 .ptr_size = sizeof(void *),
5009 .cu_die = 1,
5010 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
5011 .fn_die = 2,
5012 .fn_name = "code_gen_buffer"
5014 .da = {
5015 1, /* abbrev number (the cu) */
5016 0x11, 1, /* DW_TAG_compile_unit, has children */
5017 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
5018 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5019 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5020 0, 0, /* end of abbrev */
5021 2, /* abbrev number (the fn) */
5022 0x2e, 0, /* DW_TAG_subprogram, no children */
5023 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
5024 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5025 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5026 0, 0, /* end of abbrev */
5027 0 /* no more abbrev */
5029 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5030 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5033 /* We only need a single jit entry; statically allocate it. */
5034 static struct jit_code_entry one_entry;
5036 uintptr_t buf = (uintptr_t)buf_ptr;
5037 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5038 DebugFrameHeader *dfh;
5040 img = g_malloc(img_size);
5041 *img = img_template;
5043 img->phdr.p_vaddr = buf;
5044 img->phdr.p_paddr = buf;
5045 img->phdr.p_memsz = buf_size;
5047 img->shdr[1].sh_name = find_string(img->str, ".text");
5048 img->shdr[1].sh_addr = buf;
5049 img->shdr[1].sh_size = buf_size;
5051 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5052 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5054 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5055 img->shdr[4].sh_size = debug_frame_size;
5057 img->shdr[5].sh_name = find_string(img->str, ".symtab");
5058 img->shdr[6].sh_name = find_string(img->str, ".strtab");
5060 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5061 img->sym[1].st_value = buf;
5062 img->sym[1].st_size = buf_size;
5064 img->di.cu_low_pc = buf;
5065 img->di.cu_high_pc = buf + buf_size;
5066 img->di.fn_low_pc = buf;
5067 img->di.fn_high_pc = buf + buf_size;
5069 dfh = (DebugFrameHeader *)(img + 1);
5070 memcpy(dfh, debug_frame, debug_frame_size);
5071 dfh->fde.func_start = buf;
5072 dfh->fde.func_len = buf_size;
5074 #ifdef DEBUG_JIT
5075 /* Enable this block to be able to debug the ELF image file creation.
5076 One can use readelf, objdump, or other inspection utilities. */
5078 FILE *f = fopen("/tmp/qemu.jit", "w+b");
5079 if (f) {
5080 if (fwrite(img, img_size, 1, f) != img_size) {
5081 /* Avoid stupid unused return value warning for fwrite. */
5083 fclose(f);
5086 #endif
5088 one_entry.symfile_addr = img;
5089 one_entry.symfile_size = img_size;
5091 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5092 __jit_debug_descriptor.relevant_entry = &one_entry;
5093 __jit_debug_descriptor.first_entry = &one_entry;
5094 __jit_debug_register_code();
5096 #else
5097 /* No support for the feature. Provide the entry point expected by exec.c,
5098 and implement the internal function we declared earlier. */
5100 static void tcg_register_jit_int(const void *buf, size_t size,
5101 const void *debug_frame,
5102 size_t debug_frame_size)
5106 void tcg_register_jit(const void *buf, size_t buf_size)
5109 #endif /* ELF_HOST_MACHINE */
5111 #if !TCG_TARGET_MAYBE_vec
5112 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5114 g_assert_not_reached();
5116 #endif