accel/tcg: Pass down max_cpus to tcg_init
[qemu/ar7.git] / tcg / tcg.c
blob5cc384e205c6e70de82b3a5e38640072fd28a560
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
42 instructions */
43 #define NO_CPU_IO_DEFS
45 #include "exec/exec-all.h"
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
49 #endif
51 #include "tcg/tcg-op.h"
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS ELFCLASS32
55 #else
56 # define ELF_CLASS ELFCLASS64
57 #endif
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA ELFDATA2MSB
60 #else
61 # define ELF_DATA ELFDATA2LSB
62 #endif
64 #include "elf.h"
65 #include "exec/log.h"
66 #include "tcg-internal.h"
68 /* Forward declarations for functions declared in tcg-target.c.inc and
69 used here. */
70 static void tcg_target_init(TCGContext *s);
71 static void tcg_target_qemu_prologue(TCGContext *s);
72 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
73 intptr_t value, intptr_t addend);
75 /* The CIE and FDE header definitions will be common to all hosts. */
76 typedef struct {
77 uint32_t len __attribute__((aligned((sizeof(void *)))));
78 uint32_t id;
79 uint8_t version;
80 char augmentation[1];
81 uint8_t code_align;
82 uint8_t data_align;
83 uint8_t return_column;
84 } DebugFrameCIE;
86 typedef struct QEMU_PACKED {
87 uint32_t len __attribute__((aligned((sizeof(void *)))));
88 uint32_t cie_offset;
89 uintptr_t func_start;
90 uintptr_t func_len;
91 } DebugFrameFDEHeader;
93 typedef struct QEMU_PACKED {
94 DebugFrameCIE cie;
95 DebugFrameFDEHeader fde;
96 } DebugFrameHeader;
98 static void tcg_register_jit_int(const void *buf, size_t size,
99 const void *debug_frame,
100 size_t debug_frame_size)
101 __attribute__((unused));
103 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
104 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
105 intptr_t arg2);
106 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
107 static void tcg_out_movi(TCGContext *s, TCGType type,
108 TCGReg ret, tcg_target_long arg);
109 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
110 const TCGArg args[TCG_MAX_OP_ARGS],
111 const int const_args[TCG_MAX_OP_ARGS]);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
114 TCGReg dst, TCGReg src);
115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
116 TCGReg dst, TCGReg base, intptr_t offset);
117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
118 TCGReg dst, int64_t arg);
119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
120 unsigned vecl, unsigned vece,
121 const TCGArg args[TCG_MAX_OP_ARGS],
122 const int const_args[TCG_MAX_OP_ARGS]);
123 #else
124 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
125 TCGReg dst, TCGReg src)
127 g_assert_not_reached();
129 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
130 TCGReg dst, TCGReg base, intptr_t offset)
132 g_assert_not_reached();
134 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
135 TCGReg dst, int64_t arg)
137 g_assert_not_reached();
139 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140 unsigned vecl, unsigned vece,
141 const TCGArg args[TCG_MAX_OP_ARGS],
142 const int const_args[TCG_MAX_OP_ARGS])
144 g_assert_not_reached();
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148 intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150 TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
152 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
153 #ifdef TCG_TARGET_NEED_LDST_LABELS
154 static int tcg_out_ldst_finalize(TCGContext *s);
155 #endif
157 TCGContext **tcg_ctxs;
158 unsigned int n_tcg_ctxs;
159 TCGv_env cpu_env = 0;
160 const void *tcg_code_gen_epilogue;
161 uintptr_t tcg_splitwx_diff;
163 #ifndef CONFIG_TCG_INTERPRETER
164 tcg_prologue_fn *tcg_qemu_tb_exec;
165 #endif
167 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
168 static TCGRegSet tcg_target_call_clobber_regs;
170 #if TCG_TARGET_INSN_UNIT_SIZE == 1
171 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
173 *s->code_ptr++ = v;
176 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
177 uint8_t v)
179 *p = v;
181 #endif
183 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
184 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
186 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
187 *s->code_ptr++ = v;
188 } else {
189 tcg_insn_unit *p = s->code_ptr;
190 memcpy(p, &v, sizeof(v));
191 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
195 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
196 uint16_t v)
198 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
199 *p = v;
200 } else {
201 memcpy(p, &v, sizeof(v));
204 #endif
206 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
207 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
209 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
210 *s->code_ptr++ = v;
211 } else {
212 tcg_insn_unit *p = s->code_ptr;
213 memcpy(p, &v, sizeof(v));
214 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
218 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
219 uint32_t v)
221 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
222 *p = v;
223 } else {
224 memcpy(p, &v, sizeof(v));
227 #endif
229 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
230 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
232 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
233 *s->code_ptr++ = v;
234 } else {
235 tcg_insn_unit *p = s->code_ptr;
236 memcpy(p, &v, sizeof(v));
237 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
241 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
242 uint64_t v)
244 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
245 *p = v;
246 } else {
247 memcpy(p, &v, sizeof(v));
250 #endif
252 /* label relocation processing */
254 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
255 TCGLabel *l, intptr_t addend)
257 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
259 r->type = type;
260 r->ptr = code_ptr;
261 r->addend = addend;
262 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
265 static void tcg_out_label(TCGContext *s, TCGLabel *l)
267 tcg_debug_assert(!l->has_value);
268 l->has_value = 1;
269 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
272 TCGLabel *gen_new_label(void)
274 TCGContext *s = tcg_ctx;
275 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
277 memset(l, 0, sizeof(TCGLabel));
278 l->id = s->nb_labels++;
279 QSIMPLEQ_INIT(&l->relocs);
281 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
283 return l;
286 static bool tcg_resolve_relocs(TCGContext *s)
288 TCGLabel *l;
290 QSIMPLEQ_FOREACH(l, &s->labels, next) {
291 TCGRelocation *r;
292 uintptr_t value = l->u.value;
294 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
295 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
296 return false;
300 return true;
303 static void set_jmp_reset_offset(TCGContext *s, int which)
306 * We will check for overflow at the end of the opcode loop in
307 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
309 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
312 /* Signal overflow, starting over with fewer guest insns. */
313 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
315 siglongjmp(s->jmp_trans, -2);
318 #define C_PFX1(P, A) P##A
319 #define C_PFX2(P, A, B) P##A##_##B
320 #define C_PFX3(P, A, B, C) P##A##_##B##_##C
321 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
322 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
323 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
325 /* Define an enumeration for the various combinations. */
327 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
328 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
329 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
330 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
332 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
333 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
334 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
335 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
337 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
339 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
340 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
341 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
342 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
344 typedef enum {
345 #include "tcg-target-con-set.h"
346 } TCGConstraintSetIndex;
348 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
350 #undef C_O0_I1
351 #undef C_O0_I2
352 #undef C_O0_I3
353 #undef C_O0_I4
354 #undef C_O1_I1
355 #undef C_O1_I2
356 #undef C_O1_I3
357 #undef C_O1_I4
358 #undef C_N1_I2
359 #undef C_O2_I1
360 #undef C_O2_I2
361 #undef C_O2_I3
362 #undef C_O2_I4
364 /* Put all of the constraint sets into an array, indexed by the enum. */
366 #define C_O0_I1(I1) { .args_ct_str = { #I1 } },
367 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
368 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
369 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
371 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
372 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
373 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
374 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
376 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
378 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
379 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
380 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
381 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
383 static const TCGTargetOpDef constraint_sets[] = {
384 #include "tcg-target-con-set.h"
388 #undef C_O0_I1
389 #undef C_O0_I2
390 #undef C_O0_I3
391 #undef C_O0_I4
392 #undef C_O1_I1
393 #undef C_O1_I2
394 #undef C_O1_I3
395 #undef C_O1_I4
396 #undef C_N1_I2
397 #undef C_O2_I1
398 #undef C_O2_I2
399 #undef C_O2_I3
400 #undef C_O2_I4
402 /* Expand the enumerator to be returned from tcg_target_op_def(). */
404 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
405 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
406 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
407 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
409 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
410 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
411 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
412 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
414 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
416 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
417 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
418 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
419 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
421 #include "tcg-target.c.inc"
423 #ifdef CONFIG_DEBUG_TCG
424 const void *tcg_splitwx_to_rx(void *rw)
426 /* Pass NULL pointers unchanged. */
427 if (rw) {
428 g_assert(in_code_gen_buffer(rw));
429 rw += tcg_splitwx_diff;
431 return rw;
434 void *tcg_splitwx_to_rw(const void *rx)
436 /* Pass NULL pointers unchanged. */
437 if (rx) {
438 rx -= tcg_splitwx_diff;
439 /* Assert that we end with a pointer in the rw region. */
440 g_assert(in_code_gen_buffer(rx));
442 return (void *)rx;
444 #endif /* CONFIG_DEBUG_TCG */
446 static void alloc_tcg_plugin_context(TCGContext *s)
448 #ifdef CONFIG_PLUGIN
449 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
450 s->plugin_tb->insns =
451 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
452 #endif
456 * All TCG threads except the parent (i.e. the one that called tcg_context_init
457 * and registered the target's TCG globals) must register with this function
458 * before initiating translation.
460 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
461 * of tcg_region_init() for the reasoning behind this.
463 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
464 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
465 * is not used anymore for translation once this function is called.
467 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
468 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
470 #ifdef CONFIG_USER_ONLY
471 void tcg_register_thread(void)
473 tcg_ctx = &tcg_init_ctx;
475 #else
476 void tcg_register_thread(void)
478 MachineState *ms = MACHINE(qdev_get_machine());
479 TCGContext *s = g_malloc(sizeof(*s));
480 unsigned int i, n;
482 *s = tcg_init_ctx;
484 /* Relink mem_base. */
485 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
486 if (tcg_init_ctx.temps[i].mem_base) {
487 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
488 tcg_debug_assert(b >= 0 && b < n);
489 s->temps[i].mem_base = &s->temps[b];
493 /* Claim an entry in tcg_ctxs */
494 n = qatomic_fetch_inc(&n_tcg_ctxs);
495 g_assert(n < ms->smp.max_cpus);
496 qatomic_set(&tcg_ctxs[n], s);
498 if (n > 0) {
499 alloc_tcg_plugin_context(s);
500 tcg_region_initial_alloc(s);
503 tcg_ctx = s;
505 #endif /* !CONFIG_USER_ONLY */
507 /* pool based memory allocation */
508 void *tcg_malloc_internal(TCGContext *s, int size)
510 TCGPool *p;
511 int pool_size;
513 if (size > TCG_POOL_CHUNK_SIZE) {
514 /* big malloc: insert a new pool (XXX: could optimize) */
515 p = g_malloc(sizeof(TCGPool) + size);
516 p->size = size;
517 p->next = s->pool_first_large;
518 s->pool_first_large = p;
519 return p->data;
520 } else {
521 p = s->pool_current;
522 if (!p) {
523 p = s->pool_first;
524 if (!p)
525 goto new_pool;
526 } else {
527 if (!p->next) {
528 new_pool:
529 pool_size = TCG_POOL_CHUNK_SIZE;
530 p = g_malloc(sizeof(TCGPool) + pool_size);
531 p->size = pool_size;
532 p->next = NULL;
533 if (s->pool_current)
534 s->pool_current->next = p;
535 else
536 s->pool_first = p;
537 } else {
538 p = p->next;
542 s->pool_current = p;
543 s->pool_cur = p->data + size;
544 s->pool_end = p->data + p->size;
545 return p->data;
548 void tcg_pool_reset(TCGContext *s)
550 TCGPool *p, *t;
551 for (p = s->pool_first_large; p; p = t) {
552 t = p->next;
553 g_free(p);
555 s->pool_first_large = NULL;
556 s->pool_cur = s->pool_end = NULL;
557 s->pool_current = NULL;
560 typedef struct TCGHelperInfo {
561 void *func;
562 const char *name;
563 unsigned flags;
564 unsigned sizemask;
565 } TCGHelperInfo;
567 #include "exec/helper-proto.h"
569 static const TCGHelperInfo all_helpers[] = {
570 #include "exec/helper-tcg.h"
572 static GHashTable *helper_table;
574 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
575 static void process_op_defs(TCGContext *s);
576 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
577 TCGReg reg, const char *name);
579 static void tcg_context_init(unsigned max_cpus)
581 TCGContext *s = &tcg_init_ctx;
582 int op, total_args, n, i;
583 TCGOpDef *def;
584 TCGArgConstraint *args_ct;
585 TCGTemp *ts;
587 memset(s, 0, sizeof(*s));
588 s->nb_globals = 0;
590 /* Count total number of arguments and allocate the corresponding
591 space */
592 total_args = 0;
593 for(op = 0; op < NB_OPS; op++) {
594 def = &tcg_op_defs[op];
595 n = def->nb_iargs + def->nb_oargs;
596 total_args += n;
599 args_ct = g_new0(TCGArgConstraint, total_args);
601 for(op = 0; op < NB_OPS; op++) {
602 def = &tcg_op_defs[op];
603 def->args_ct = args_ct;
604 n = def->nb_iargs + def->nb_oargs;
605 args_ct += n;
608 /* Register helpers. */
609 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
610 helper_table = g_hash_table_new(NULL, NULL);
612 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
613 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
614 (gpointer)&all_helpers[i]);
617 tcg_target_init(s);
618 process_op_defs(s);
620 /* Reverse the order of the saved registers, assuming they're all at
621 the start of tcg_target_reg_alloc_order. */
622 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
623 int r = tcg_target_reg_alloc_order[n];
624 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
625 break;
628 for (i = 0; i < n; ++i) {
629 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
631 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
632 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
635 alloc_tcg_plugin_context(s);
637 tcg_ctx = s;
639 * In user-mode we simply share the init context among threads, since we
640 * use a single region. See the documentation tcg_region_init() for the
641 * reasoning behind this.
642 * In softmmu we will have at most max_cpus TCG threads.
644 #ifdef CONFIG_USER_ONLY
645 tcg_ctxs = &tcg_ctx;
646 n_tcg_ctxs = 1;
647 #else
648 tcg_ctxs = g_new(TCGContext *, max_cpus);
649 #endif
651 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
652 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
653 cpu_env = temp_tcgv_ptr(ts);
656 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
658 tcg_context_init(max_cpus);
659 tcg_region_init(tb_size, splitwx, max_cpus);
663 * Allocate TBs right before their corresponding translated code, making
664 * sure that TBs and code are on different cache lines.
666 TranslationBlock *tcg_tb_alloc(TCGContext *s)
668 uintptr_t align = qemu_icache_linesize;
669 TranslationBlock *tb;
670 void *next;
672 retry:
673 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
674 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
676 if (unlikely(next > s->code_gen_highwater)) {
677 if (tcg_region_alloc(s)) {
678 return NULL;
680 goto retry;
682 qatomic_set(&s->code_gen_ptr, next);
683 s->data_gen_ptr = NULL;
684 return tb;
687 void tcg_prologue_init(TCGContext *s)
689 size_t prologue_size;
691 s->code_ptr = s->code_gen_ptr;
692 s->code_buf = s->code_gen_ptr;
693 s->data_gen_ptr = NULL;
695 #ifndef CONFIG_TCG_INTERPRETER
696 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
697 #endif
699 #ifdef TCG_TARGET_NEED_POOL_LABELS
700 s->pool_labels = NULL;
701 #endif
703 qemu_thread_jit_write();
704 /* Generate the prologue. */
705 tcg_target_qemu_prologue(s);
707 #ifdef TCG_TARGET_NEED_POOL_LABELS
708 /* Allow the prologue to put e.g. guest_base into a pool entry. */
710 int result = tcg_out_pool_finalize(s);
711 tcg_debug_assert(result == 0);
713 #endif
715 prologue_size = tcg_current_code_size(s);
717 #ifndef CONFIG_TCG_INTERPRETER
718 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
719 (uintptr_t)s->code_buf, prologue_size);
720 #endif
722 tcg_region_prologue_set(s);
724 #ifdef DEBUG_DISAS
725 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
726 FILE *logfile = qemu_log_lock();
727 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
728 if (s->data_gen_ptr) {
729 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
730 size_t data_size = prologue_size - code_size;
731 size_t i;
733 log_disas(s->code_gen_ptr, code_size);
735 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
736 if (sizeof(tcg_target_ulong) == 8) {
737 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
738 (uintptr_t)s->data_gen_ptr + i,
739 *(uint64_t *)(s->data_gen_ptr + i));
740 } else {
741 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
742 (uintptr_t)s->data_gen_ptr + i,
743 *(uint32_t *)(s->data_gen_ptr + i));
746 } else {
747 log_disas(s->code_gen_ptr, prologue_size);
749 qemu_log("\n");
750 qemu_log_flush();
751 qemu_log_unlock(logfile);
753 #endif
755 /* Assert that goto_ptr is implemented completely. */
756 if (TCG_TARGET_HAS_goto_ptr) {
757 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
761 void tcg_func_start(TCGContext *s)
763 tcg_pool_reset(s);
764 s->nb_temps = s->nb_globals;
766 /* No temps have been previously allocated for size or locality. */
767 memset(s->free_temps, 0, sizeof(s->free_temps));
769 /* No constant temps have been previously allocated. */
770 for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
771 if (s->const_table[i]) {
772 g_hash_table_remove_all(s->const_table[i]);
776 s->nb_ops = 0;
777 s->nb_labels = 0;
778 s->current_frame_offset = s->frame_start;
780 #ifdef CONFIG_DEBUG_TCG
781 s->goto_tb_issue_mask = 0;
782 #endif
784 QTAILQ_INIT(&s->ops);
785 QTAILQ_INIT(&s->free_ops);
786 QSIMPLEQ_INIT(&s->labels);
789 static TCGTemp *tcg_temp_alloc(TCGContext *s)
791 int n = s->nb_temps++;
793 if (n >= TCG_MAX_TEMPS) {
794 tcg_raise_tb_overflow(s);
796 return memset(&s->temps[n], 0, sizeof(TCGTemp));
799 static TCGTemp *tcg_global_alloc(TCGContext *s)
801 TCGTemp *ts;
803 tcg_debug_assert(s->nb_globals == s->nb_temps);
804 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
805 s->nb_globals++;
806 ts = tcg_temp_alloc(s);
807 ts->kind = TEMP_GLOBAL;
809 return ts;
812 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
813 TCGReg reg, const char *name)
815 TCGTemp *ts;
817 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
818 tcg_abort();
821 ts = tcg_global_alloc(s);
822 ts->base_type = type;
823 ts->type = type;
824 ts->kind = TEMP_FIXED;
825 ts->reg = reg;
826 ts->name = name;
827 tcg_regset_set_reg(s->reserved_regs, reg);
829 return ts;
832 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
834 s->frame_start = start;
835 s->frame_end = start + size;
836 s->frame_temp
837 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
840 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
841 intptr_t offset, const char *name)
843 TCGContext *s = tcg_ctx;
844 TCGTemp *base_ts = tcgv_ptr_temp(base);
845 TCGTemp *ts = tcg_global_alloc(s);
846 int indirect_reg = 0, bigendian = 0;
847 #ifdef HOST_WORDS_BIGENDIAN
848 bigendian = 1;
849 #endif
851 switch (base_ts->kind) {
852 case TEMP_FIXED:
853 break;
854 case TEMP_GLOBAL:
855 /* We do not support double-indirect registers. */
856 tcg_debug_assert(!base_ts->indirect_reg);
857 base_ts->indirect_base = 1;
858 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
859 ? 2 : 1);
860 indirect_reg = 1;
861 break;
862 default:
863 g_assert_not_reached();
866 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
867 TCGTemp *ts2 = tcg_global_alloc(s);
868 char buf[64];
870 ts->base_type = TCG_TYPE_I64;
871 ts->type = TCG_TYPE_I32;
872 ts->indirect_reg = indirect_reg;
873 ts->mem_allocated = 1;
874 ts->mem_base = base_ts;
875 ts->mem_offset = offset + bigendian * 4;
876 pstrcpy(buf, sizeof(buf), name);
877 pstrcat(buf, sizeof(buf), "_0");
878 ts->name = strdup(buf);
880 tcg_debug_assert(ts2 == ts + 1);
881 ts2->base_type = TCG_TYPE_I64;
882 ts2->type = TCG_TYPE_I32;
883 ts2->indirect_reg = indirect_reg;
884 ts2->mem_allocated = 1;
885 ts2->mem_base = base_ts;
886 ts2->mem_offset = offset + (1 - bigendian) * 4;
887 pstrcpy(buf, sizeof(buf), name);
888 pstrcat(buf, sizeof(buf), "_1");
889 ts2->name = strdup(buf);
890 } else {
891 ts->base_type = type;
892 ts->type = type;
893 ts->indirect_reg = indirect_reg;
894 ts->mem_allocated = 1;
895 ts->mem_base = base_ts;
896 ts->mem_offset = offset;
897 ts->name = name;
899 return ts;
902 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
904 TCGContext *s = tcg_ctx;
905 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
906 TCGTemp *ts;
907 int idx, k;
909 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
910 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
911 if (idx < TCG_MAX_TEMPS) {
912 /* There is already an available temp with the right type. */
913 clear_bit(idx, s->free_temps[k].l);
915 ts = &s->temps[idx];
916 ts->temp_allocated = 1;
917 tcg_debug_assert(ts->base_type == type);
918 tcg_debug_assert(ts->kind == kind);
919 } else {
920 ts = tcg_temp_alloc(s);
921 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
922 TCGTemp *ts2 = tcg_temp_alloc(s);
924 ts->base_type = type;
925 ts->type = TCG_TYPE_I32;
926 ts->temp_allocated = 1;
927 ts->kind = kind;
929 tcg_debug_assert(ts2 == ts + 1);
930 ts2->base_type = TCG_TYPE_I64;
931 ts2->type = TCG_TYPE_I32;
932 ts2->temp_allocated = 1;
933 ts2->kind = kind;
934 } else {
935 ts->base_type = type;
936 ts->type = type;
937 ts->temp_allocated = 1;
938 ts->kind = kind;
942 #if defined(CONFIG_DEBUG_TCG)
943 s->temps_in_use++;
944 #endif
945 return ts;
948 TCGv_vec tcg_temp_new_vec(TCGType type)
950 TCGTemp *t;
952 #ifdef CONFIG_DEBUG_TCG
953 switch (type) {
954 case TCG_TYPE_V64:
955 assert(TCG_TARGET_HAS_v64);
956 break;
957 case TCG_TYPE_V128:
958 assert(TCG_TARGET_HAS_v128);
959 break;
960 case TCG_TYPE_V256:
961 assert(TCG_TARGET_HAS_v256);
962 break;
963 default:
964 g_assert_not_reached();
966 #endif
968 t = tcg_temp_new_internal(type, 0);
969 return temp_tcgv_vec(t);
972 /* Create a new temp of the same type as an existing temp. */
973 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
975 TCGTemp *t = tcgv_vec_temp(match);
977 tcg_debug_assert(t->temp_allocated != 0);
979 t = tcg_temp_new_internal(t->base_type, 0);
980 return temp_tcgv_vec(t);
983 void tcg_temp_free_internal(TCGTemp *ts)
985 TCGContext *s = tcg_ctx;
986 int k, idx;
988 /* In order to simplify users of tcg_constant_*, silently ignore free. */
989 if (ts->kind == TEMP_CONST) {
990 return;
993 #if defined(CONFIG_DEBUG_TCG)
994 s->temps_in_use--;
995 if (s->temps_in_use < 0) {
996 fprintf(stderr, "More temporaries freed than allocated!\n");
998 #endif
1000 tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1001 tcg_debug_assert(ts->temp_allocated != 0);
1002 ts->temp_allocated = 0;
1004 idx = temp_idx(ts);
1005 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1006 set_bit(idx, s->free_temps[k].l);
1009 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1011 TCGContext *s = tcg_ctx;
1012 GHashTable *h = s->const_table[type];
1013 TCGTemp *ts;
1015 if (h == NULL) {
1016 h = g_hash_table_new(g_int64_hash, g_int64_equal);
1017 s->const_table[type] = h;
1020 ts = g_hash_table_lookup(h, &val);
1021 if (ts == NULL) {
1022 ts = tcg_temp_alloc(s);
1024 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1025 TCGTemp *ts2 = tcg_temp_alloc(s);
1027 ts->base_type = TCG_TYPE_I64;
1028 ts->type = TCG_TYPE_I32;
1029 ts->kind = TEMP_CONST;
1030 ts->temp_allocated = 1;
1032 * Retain the full value of the 64-bit constant in the low
1033 * part, so that the hash table works. Actual uses will
1034 * truncate the value to the low part.
1036 ts->val = val;
1038 tcg_debug_assert(ts2 == ts + 1);
1039 ts2->base_type = TCG_TYPE_I64;
1040 ts2->type = TCG_TYPE_I32;
1041 ts2->kind = TEMP_CONST;
1042 ts2->temp_allocated = 1;
1043 ts2->val = val >> 32;
1044 } else {
1045 ts->base_type = type;
1046 ts->type = type;
1047 ts->kind = TEMP_CONST;
1048 ts->temp_allocated = 1;
1049 ts->val = val;
1051 g_hash_table_insert(h, &ts->val, ts);
1054 return ts;
1057 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1059 val = dup_const(vece, val);
1060 return temp_tcgv_vec(tcg_constant_internal(type, val));
1063 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1065 TCGTemp *t = tcgv_vec_temp(match);
1067 tcg_debug_assert(t->temp_allocated != 0);
1068 return tcg_constant_vec(t->base_type, vece, val);
1071 TCGv_i32 tcg_const_i32(int32_t val)
1073 TCGv_i32 t0;
1074 t0 = tcg_temp_new_i32();
1075 tcg_gen_movi_i32(t0, val);
1076 return t0;
1079 TCGv_i64 tcg_const_i64(int64_t val)
1081 TCGv_i64 t0;
1082 t0 = tcg_temp_new_i64();
1083 tcg_gen_movi_i64(t0, val);
1084 return t0;
1087 TCGv_i32 tcg_const_local_i32(int32_t val)
1089 TCGv_i32 t0;
1090 t0 = tcg_temp_local_new_i32();
1091 tcg_gen_movi_i32(t0, val);
1092 return t0;
1095 TCGv_i64 tcg_const_local_i64(int64_t val)
1097 TCGv_i64 t0;
1098 t0 = tcg_temp_local_new_i64();
1099 tcg_gen_movi_i64(t0, val);
1100 return t0;
1103 #if defined(CONFIG_DEBUG_TCG)
1104 void tcg_clear_temp_count(void)
1106 TCGContext *s = tcg_ctx;
1107 s->temps_in_use = 0;
1110 int tcg_check_temp_count(void)
1112 TCGContext *s = tcg_ctx;
1113 if (s->temps_in_use) {
1114 /* Clear the count so that we don't give another
1115 * warning immediately next time around.
1117 s->temps_in_use = 0;
1118 return 1;
1120 return 0;
1122 #endif
1124 /* Return true if OP may appear in the opcode stream.
1125 Test the runtime variable that controls each opcode. */
1126 bool tcg_op_supported(TCGOpcode op)
1128 const bool have_vec
1129 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1131 switch (op) {
1132 case INDEX_op_discard:
1133 case INDEX_op_set_label:
1134 case INDEX_op_call:
1135 case INDEX_op_br:
1136 case INDEX_op_mb:
1137 case INDEX_op_insn_start:
1138 case INDEX_op_exit_tb:
1139 case INDEX_op_goto_tb:
1140 case INDEX_op_qemu_ld_i32:
1141 case INDEX_op_qemu_st_i32:
1142 case INDEX_op_qemu_ld_i64:
1143 case INDEX_op_qemu_st_i64:
1144 return true;
1146 case INDEX_op_qemu_st8_i32:
1147 return TCG_TARGET_HAS_qemu_st8_i32;
1149 case INDEX_op_goto_ptr:
1150 return TCG_TARGET_HAS_goto_ptr;
1152 case INDEX_op_mov_i32:
1153 case INDEX_op_setcond_i32:
1154 case INDEX_op_brcond_i32:
1155 case INDEX_op_ld8u_i32:
1156 case INDEX_op_ld8s_i32:
1157 case INDEX_op_ld16u_i32:
1158 case INDEX_op_ld16s_i32:
1159 case INDEX_op_ld_i32:
1160 case INDEX_op_st8_i32:
1161 case INDEX_op_st16_i32:
1162 case INDEX_op_st_i32:
1163 case INDEX_op_add_i32:
1164 case INDEX_op_sub_i32:
1165 case INDEX_op_mul_i32:
1166 case INDEX_op_and_i32:
1167 case INDEX_op_or_i32:
1168 case INDEX_op_xor_i32:
1169 case INDEX_op_shl_i32:
1170 case INDEX_op_shr_i32:
1171 case INDEX_op_sar_i32:
1172 return true;
1174 case INDEX_op_movcond_i32:
1175 return TCG_TARGET_HAS_movcond_i32;
1176 case INDEX_op_div_i32:
1177 case INDEX_op_divu_i32:
1178 return TCG_TARGET_HAS_div_i32;
1179 case INDEX_op_rem_i32:
1180 case INDEX_op_remu_i32:
1181 return TCG_TARGET_HAS_rem_i32;
1182 case INDEX_op_div2_i32:
1183 case INDEX_op_divu2_i32:
1184 return TCG_TARGET_HAS_div2_i32;
1185 case INDEX_op_rotl_i32:
1186 case INDEX_op_rotr_i32:
1187 return TCG_TARGET_HAS_rot_i32;
1188 case INDEX_op_deposit_i32:
1189 return TCG_TARGET_HAS_deposit_i32;
1190 case INDEX_op_extract_i32:
1191 return TCG_TARGET_HAS_extract_i32;
1192 case INDEX_op_sextract_i32:
1193 return TCG_TARGET_HAS_sextract_i32;
1194 case INDEX_op_extract2_i32:
1195 return TCG_TARGET_HAS_extract2_i32;
1196 case INDEX_op_add2_i32:
1197 return TCG_TARGET_HAS_add2_i32;
1198 case INDEX_op_sub2_i32:
1199 return TCG_TARGET_HAS_sub2_i32;
1200 case INDEX_op_mulu2_i32:
1201 return TCG_TARGET_HAS_mulu2_i32;
1202 case INDEX_op_muls2_i32:
1203 return TCG_TARGET_HAS_muls2_i32;
1204 case INDEX_op_muluh_i32:
1205 return TCG_TARGET_HAS_muluh_i32;
1206 case INDEX_op_mulsh_i32:
1207 return TCG_TARGET_HAS_mulsh_i32;
1208 case INDEX_op_ext8s_i32:
1209 return TCG_TARGET_HAS_ext8s_i32;
1210 case INDEX_op_ext16s_i32:
1211 return TCG_TARGET_HAS_ext16s_i32;
1212 case INDEX_op_ext8u_i32:
1213 return TCG_TARGET_HAS_ext8u_i32;
1214 case INDEX_op_ext16u_i32:
1215 return TCG_TARGET_HAS_ext16u_i32;
1216 case INDEX_op_bswap16_i32:
1217 return TCG_TARGET_HAS_bswap16_i32;
1218 case INDEX_op_bswap32_i32:
1219 return TCG_TARGET_HAS_bswap32_i32;
1220 case INDEX_op_not_i32:
1221 return TCG_TARGET_HAS_not_i32;
1222 case INDEX_op_neg_i32:
1223 return TCG_TARGET_HAS_neg_i32;
1224 case INDEX_op_andc_i32:
1225 return TCG_TARGET_HAS_andc_i32;
1226 case INDEX_op_orc_i32:
1227 return TCG_TARGET_HAS_orc_i32;
1228 case INDEX_op_eqv_i32:
1229 return TCG_TARGET_HAS_eqv_i32;
1230 case INDEX_op_nand_i32:
1231 return TCG_TARGET_HAS_nand_i32;
1232 case INDEX_op_nor_i32:
1233 return TCG_TARGET_HAS_nor_i32;
1234 case INDEX_op_clz_i32:
1235 return TCG_TARGET_HAS_clz_i32;
1236 case INDEX_op_ctz_i32:
1237 return TCG_TARGET_HAS_ctz_i32;
1238 case INDEX_op_ctpop_i32:
1239 return TCG_TARGET_HAS_ctpop_i32;
1241 case INDEX_op_brcond2_i32:
1242 case INDEX_op_setcond2_i32:
1243 return TCG_TARGET_REG_BITS == 32;
1245 case INDEX_op_mov_i64:
1246 case INDEX_op_setcond_i64:
1247 case INDEX_op_brcond_i64:
1248 case INDEX_op_ld8u_i64:
1249 case INDEX_op_ld8s_i64:
1250 case INDEX_op_ld16u_i64:
1251 case INDEX_op_ld16s_i64:
1252 case INDEX_op_ld32u_i64:
1253 case INDEX_op_ld32s_i64:
1254 case INDEX_op_ld_i64:
1255 case INDEX_op_st8_i64:
1256 case INDEX_op_st16_i64:
1257 case INDEX_op_st32_i64:
1258 case INDEX_op_st_i64:
1259 case INDEX_op_add_i64:
1260 case INDEX_op_sub_i64:
1261 case INDEX_op_mul_i64:
1262 case INDEX_op_and_i64:
1263 case INDEX_op_or_i64:
1264 case INDEX_op_xor_i64:
1265 case INDEX_op_shl_i64:
1266 case INDEX_op_shr_i64:
1267 case INDEX_op_sar_i64:
1268 case INDEX_op_ext_i32_i64:
1269 case INDEX_op_extu_i32_i64:
1270 return TCG_TARGET_REG_BITS == 64;
1272 case INDEX_op_movcond_i64:
1273 return TCG_TARGET_HAS_movcond_i64;
1274 case INDEX_op_div_i64:
1275 case INDEX_op_divu_i64:
1276 return TCG_TARGET_HAS_div_i64;
1277 case INDEX_op_rem_i64:
1278 case INDEX_op_remu_i64:
1279 return TCG_TARGET_HAS_rem_i64;
1280 case INDEX_op_div2_i64:
1281 case INDEX_op_divu2_i64:
1282 return TCG_TARGET_HAS_div2_i64;
1283 case INDEX_op_rotl_i64:
1284 case INDEX_op_rotr_i64:
1285 return TCG_TARGET_HAS_rot_i64;
1286 case INDEX_op_deposit_i64:
1287 return TCG_TARGET_HAS_deposit_i64;
1288 case INDEX_op_extract_i64:
1289 return TCG_TARGET_HAS_extract_i64;
1290 case INDEX_op_sextract_i64:
1291 return TCG_TARGET_HAS_sextract_i64;
1292 case INDEX_op_extract2_i64:
1293 return TCG_TARGET_HAS_extract2_i64;
1294 case INDEX_op_extrl_i64_i32:
1295 return TCG_TARGET_HAS_extrl_i64_i32;
1296 case INDEX_op_extrh_i64_i32:
1297 return TCG_TARGET_HAS_extrh_i64_i32;
1298 case INDEX_op_ext8s_i64:
1299 return TCG_TARGET_HAS_ext8s_i64;
1300 case INDEX_op_ext16s_i64:
1301 return TCG_TARGET_HAS_ext16s_i64;
1302 case INDEX_op_ext32s_i64:
1303 return TCG_TARGET_HAS_ext32s_i64;
1304 case INDEX_op_ext8u_i64:
1305 return TCG_TARGET_HAS_ext8u_i64;
1306 case INDEX_op_ext16u_i64:
1307 return TCG_TARGET_HAS_ext16u_i64;
1308 case INDEX_op_ext32u_i64:
1309 return TCG_TARGET_HAS_ext32u_i64;
1310 case INDEX_op_bswap16_i64:
1311 return TCG_TARGET_HAS_bswap16_i64;
1312 case INDEX_op_bswap32_i64:
1313 return TCG_TARGET_HAS_bswap32_i64;
1314 case INDEX_op_bswap64_i64:
1315 return TCG_TARGET_HAS_bswap64_i64;
1316 case INDEX_op_not_i64:
1317 return TCG_TARGET_HAS_not_i64;
1318 case INDEX_op_neg_i64:
1319 return TCG_TARGET_HAS_neg_i64;
1320 case INDEX_op_andc_i64:
1321 return TCG_TARGET_HAS_andc_i64;
1322 case INDEX_op_orc_i64:
1323 return TCG_TARGET_HAS_orc_i64;
1324 case INDEX_op_eqv_i64:
1325 return TCG_TARGET_HAS_eqv_i64;
1326 case INDEX_op_nand_i64:
1327 return TCG_TARGET_HAS_nand_i64;
1328 case INDEX_op_nor_i64:
1329 return TCG_TARGET_HAS_nor_i64;
1330 case INDEX_op_clz_i64:
1331 return TCG_TARGET_HAS_clz_i64;
1332 case INDEX_op_ctz_i64:
1333 return TCG_TARGET_HAS_ctz_i64;
1334 case INDEX_op_ctpop_i64:
1335 return TCG_TARGET_HAS_ctpop_i64;
1336 case INDEX_op_add2_i64:
1337 return TCG_TARGET_HAS_add2_i64;
1338 case INDEX_op_sub2_i64:
1339 return TCG_TARGET_HAS_sub2_i64;
1340 case INDEX_op_mulu2_i64:
1341 return TCG_TARGET_HAS_mulu2_i64;
1342 case INDEX_op_muls2_i64:
1343 return TCG_TARGET_HAS_muls2_i64;
1344 case INDEX_op_muluh_i64:
1345 return TCG_TARGET_HAS_muluh_i64;
1346 case INDEX_op_mulsh_i64:
1347 return TCG_TARGET_HAS_mulsh_i64;
1349 case INDEX_op_mov_vec:
1350 case INDEX_op_dup_vec:
1351 case INDEX_op_dupm_vec:
1352 case INDEX_op_ld_vec:
1353 case INDEX_op_st_vec:
1354 case INDEX_op_add_vec:
1355 case INDEX_op_sub_vec:
1356 case INDEX_op_and_vec:
1357 case INDEX_op_or_vec:
1358 case INDEX_op_xor_vec:
1359 case INDEX_op_cmp_vec:
1360 return have_vec;
1361 case INDEX_op_dup2_vec:
1362 return have_vec && TCG_TARGET_REG_BITS == 32;
1363 case INDEX_op_not_vec:
1364 return have_vec && TCG_TARGET_HAS_not_vec;
1365 case INDEX_op_neg_vec:
1366 return have_vec && TCG_TARGET_HAS_neg_vec;
1367 case INDEX_op_abs_vec:
1368 return have_vec && TCG_TARGET_HAS_abs_vec;
1369 case INDEX_op_andc_vec:
1370 return have_vec && TCG_TARGET_HAS_andc_vec;
1371 case INDEX_op_orc_vec:
1372 return have_vec && TCG_TARGET_HAS_orc_vec;
1373 case INDEX_op_mul_vec:
1374 return have_vec && TCG_TARGET_HAS_mul_vec;
1375 case INDEX_op_shli_vec:
1376 case INDEX_op_shri_vec:
1377 case INDEX_op_sari_vec:
1378 return have_vec && TCG_TARGET_HAS_shi_vec;
1379 case INDEX_op_shls_vec:
1380 case INDEX_op_shrs_vec:
1381 case INDEX_op_sars_vec:
1382 return have_vec && TCG_TARGET_HAS_shs_vec;
1383 case INDEX_op_shlv_vec:
1384 case INDEX_op_shrv_vec:
1385 case INDEX_op_sarv_vec:
1386 return have_vec && TCG_TARGET_HAS_shv_vec;
1387 case INDEX_op_rotli_vec:
1388 return have_vec && TCG_TARGET_HAS_roti_vec;
1389 case INDEX_op_rotls_vec:
1390 return have_vec && TCG_TARGET_HAS_rots_vec;
1391 case INDEX_op_rotlv_vec:
1392 case INDEX_op_rotrv_vec:
1393 return have_vec && TCG_TARGET_HAS_rotv_vec;
1394 case INDEX_op_ssadd_vec:
1395 case INDEX_op_usadd_vec:
1396 case INDEX_op_sssub_vec:
1397 case INDEX_op_ussub_vec:
1398 return have_vec && TCG_TARGET_HAS_sat_vec;
1399 case INDEX_op_smin_vec:
1400 case INDEX_op_umin_vec:
1401 case INDEX_op_smax_vec:
1402 case INDEX_op_umax_vec:
1403 return have_vec && TCG_TARGET_HAS_minmax_vec;
1404 case INDEX_op_bitsel_vec:
1405 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1406 case INDEX_op_cmpsel_vec:
1407 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1409 default:
1410 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1411 return true;
1415 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1416 and endian swap. Maybe it would be better to do the alignment
1417 and endian swap in tcg_reg_alloc_call(). */
1418 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1420 int i, real_args, nb_rets, pi;
1421 unsigned sizemask, flags;
1422 TCGHelperInfo *info;
1423 TCGOp *op;
1425 info = g_hash_table_lookup(helper_table, (gpointer)func);
1426 flags = info->flags;
1427 sizemask = info->sizemask;
1429 #ifdef CONFIG_PLUGIN
1430 /* detect non-plugin helpers */
1431 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1432 tcg_ctx->plugin_insn->calls_helpers = true;
1434 #endif
1436 #if defined(__sparc__) && !defined(__arch64__) \
1437 && !defined(CONFIG_TCG_INTERPRETER)
1438 /* We have 64-bit values in one register, but need to pass as two
1439 separate parameters. Split them. */
1440 int orig_sizemask = sizemask;
1441 int orig_nargs = nargs;
1442 TCGv_i64 retl, reth;
1443 TCGTemp *split_args[MAX_OPC_PARAM];
1445 retl = NULL;
1446 reth = NULL;
1447 if (sizemask != 0) {
1448 for (i = real_args = 0; i < nargs; ++i) {
1449 int is_64bit = sizemask & (1 << (i+1)*2);
1450 if (is_64bit) {
1451 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1452 TCGv_i32 h = tcg_temp_new_i32();
1453 TCGv_i32 l = tcg_temp_new_i32();
1454 tcg_gen_extr_i64_i32(l, h, orig);
1455 split_args[real_args++] = tcgv_i32_temp(h);
1456 split_args[real_args++] = tcgv_i32_temp(l);
1457 } else {
1458 split_args[real_args++] = args[i];
1461 nargs = real_args;
1462 args = split_args;
1463 sizemask = 0;
1465 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1466 for (i = 0; i < nargs; ++i) {
1467 int is_64bit = sizemask & (1 << (i+1)*2);
1468 int is_signed = sizemask & (2 << (i+1)*2);
1469 if (!is_64bit) {
1470 TCGv_i64 temp = tcg_temp_new_i64();
1471 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1472 if (is_signed) {
1473 tcg_gen_ext32s_i64(temp, orig);
1474 } else {
1475 tcg_gen_ext32u_i64(temp, orig);
1477 args[i] = tcgv_i64_temp(temp);
1480 #endif /* TCG_TARGET_EXTEND_ARGS */
1482 op = tcg_emit_op(INDEX_op_call);
1484 pi = 0;
1485 if (ret != NULL) {
1486 #if defined(__sparc__) && !defined(__arch64__) \
1487 && !defined(CONFIG_TCG_INTERPRETER)
1488 if (orig_sizemask & 1) {
1489 /* The 32-bit ABI is going to return the 64-bit value in
1490 the %o0/%o1 register pair. Prepare for this by using
1491 two return temporaries, and reassemble below. */
1492 retl = tcg_temp_new_i64();
1493 reth = tcg_temp_new_i64();
1494 op->args[pi++] = tcgv_i64_arg(reth);
1495 op->args[pi++] = tcgv_i64_arg(retl);
1496 nb_rets = 2;
1497 } else {
1498 op->args[pi++] = temp_arg(ret);
1499 nb_rets = 1;
1501 #else
1502 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1503 #ifdef HOST_WORDS_BIGENDIAN
1504 op->args[pi++] = temp_arg(ret + 1);
1505 op->args[pi++] = temp_arg(ret);
1506 #else
1507 op->args[pi++] = temp_arg(ret);
1508 op->args[pi++] = temp_arg(ret + 1);
1509 #endif
1510 nb_rets = 2;
1511 } else {
1512 op->args[pi++] = temp_arg(ret);
1513 nb_rets = 1;
1515 #endif
1516 } else {
1517 nb_rets = 0;
1519 TCGOP_CALLO(op) = nb_rets;
1521 real_args = 0;
1522 for (i = 0; i < nargs; i++) {
1523 int is_64bit = sizemask & (1 << (i+1)*2);
1524 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1525 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1526 /* some targets want aligned 64 bit args */
1527 if (real_args & 1) {
1528 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1529 real_args++;
1531 #endif
1532 /* If stack grows up, then we will be placing successive
1533 arguments at lower addresses, which means we need to
1534 reverse the order compared to how we would normally
1535 treat either big or little-endian. For those arguments
1536 that will wind up in registers, this still works for
1537 HPPA (the only current STACK_GROWSUP target) since the
1538 argument registers are *also* allocated in decreasing
1539 order. If another such target is added, this logic may
1540 have to get more complicated to differentiate between
1541 stack arguments and register arguments. */
1542 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1543 op->args[pi++] = temp_arg(args[i] + 1);
1544 op->args[pi++] = temp_arg(args[i]);
1545 #else
1546 op->args[pi++] = temp_arg(args[i]);
1547 op->args[pi++] = temp_arg(args[i] + 1);
1548 #endif
1549 real_args += 2;
1550 continue;
1553 op->args[pi++] = temp_arg(args[i]);
1554 real_args++;
1556 op->args[pi++] = (uintptr_t)func;
1557 op->args[pi++] = flags;
1558 TCGOP_CALLI(op) = real_args;
1560 /* Make sure the fields didn't overflow. */
1561 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1562 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1564 #if defined(__sparc__) && !defined(__arch64__) \
1565 && !defined(CONFIG_TCG_INTERPRETER)
1566 /* Free all of the parts we allocated above. */
1567 for (i = real_args = 0; i < orig_nargs; ++i) {
1568 int is_64bit = orig_sizemask & (1 << (i+1)*2);
1569 if (is_64bit) {
1570 tcg_temp_free_internal(args[real_args++]);
1571 tcg_temp_free_internal(args[real_args++]);
1572 } else {
1573 real_args++;
1576 if (orig_sizemask & 1) {
1577 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1578 Note that describing these as TCGv_i64 eliminates an unnecessary
1579 zero-extension that tcg_gen_concat_i32_i64 would create. */
1580 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1581 tcg_temp_free_i64(retl);
1582 tcg_temp_free_i64(reth);
1584 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1585 for (i = 0; i < nargs; ++i) {
1586 int is_64bit = sizemask & (1 << (i+1)*2);
1587 if (!is_64bit) {
1588 tcg_temp_free_internal(args[i]);
1591 #endif /* TCG_TARGET_EXTEND_ARGS */
1594 static void tcg_reg_alloc_start(TCGContext *s)
1596 int i, n;
1598 for (i = 0, n = s->nb_temps; i < n; i++) {
1599 TCGTemp *ts = &s->temps[i];
1600 TCGTempVal val = TEMP_VAL_MEM;
1602 switch (ts->kind) {
1603 case TEMP_CONST:
1604 val = TEMP_VAL_CONST;
1605 break;
1606 case TEMP_FIXED:
1607 val = TEMP_VAL_REG;
1608 break;
1609 case TEMP_GLOBAL:
1610 break;
1611 case TEMP_NORMAL:
1612 val = TEMP_VAL_DEAD;
1613 /* fall through */
1614 case TEMP_LOCAL:
1615 ts->mem_allocated = 0;
1616 break;
1617 default:
1618 g_assert_not_reached();
1620 ts->val_type = val;
1623 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1626 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1627 TCGTemp *ts)
1629 int idx = temp_idx(ts);
1631 switch (ts->kind) {
1632 case TEMP_FIXED:
1633 case TEMP_GLOBAL:
1634 pstrcpy(buf, buf_size, ts->name);
1635 break;
1636 case TEMP_LOCAL:
1637 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1638 break;
1639 case TEMP_NORMAL:
1640 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1641 break;
1642 case TEMP_CONST:
1643 switch (ts->type) {
1644 case TCG_TYPE_I32:
1645 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1646 break;
1647 #if TCG_TARGET_REG_BITS > 32
1648 case TCG_TYPE_I64:
1649 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1650 break;
1651 #endif
1652 case TCG_TYPE_V64:
1653 case TCG_TYPE_V128:
1654 case TCG_TYPE_V256:
1655 snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1656 64 << (ts->type - TCG_TYPE_V64), ts->val);
1657 break;
1658 default:
1659 g_assert_not_reached();
1661 break;
1663 return buf;
1666 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1667 int buf_size, TCGArg arg)
1669 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1672 /* Find helper name. */
1673 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1675 const char *ret = NULL;
1676 if (helper_table) {
1677 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1678 if (info) {
1679 ret = info->name;
1682 return ret;
1685 static const char * const cond_name[] =
1687 [TCG_COND_NEVER] = "never",
1688 [TCG_COND_ALWAYS] = "always",
1689 [TCG_COND_EQ] = "eq",
1690 [TCG_COND_NE] = "ne",
1691 [TCG_COND_LT] = "lt",
1692 [TCG_COND_GE] = "ge",
1693 [TCG_COND_LE] = "le",
1694 [TCG_COND_GT] = "gt",
1695 [TCG_COND_LTU] = "ltu",
1696 [TCG_COND_GEU] = "geu",
1697 [TCG_COND_LEU] = "leu",
1698 [TCG_COND_GTU] = "gtu"
1701 static const char * const ldst_name[] =
1703 [MO_UB] = "ub",
1704 [MO_SB] = "sb",
1705 [MO_LEUW] = "leuw",
1706 [MO_LESW] = "lesw",
1707 [MO_LEUL] = "leul",
1708 [MO_LESL] = "lesl",
1709 [MO_LEQ] = "leq",
1710 [MO_BEUW] = "beuw",
1711 [MO_BESW] = "besw",
1712 [MO_BEUL] = "beul",
1713 [MO_BESL] = "besl",
1714 [MO_BEQ] = "beq",
1717 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1718 #ifdef TARGET_ALIGNED_ONLY
1719 [MO_UNALN >> MO_ASHIFT] = "un+",
1720 [MO_ALIGN >> MO_ASHIFT] = "",
1721 #else
1722 [MO_UNALN >> MO_ASHIFT] = "",
1723 [MO_ALIGN >> MO_ASHIFT] = "al+",
1724 #endif
1725 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
1726 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
1727 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
1728 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1729 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1730 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1733 static inline bool tcg_regset_single(TCGRegSet d)
1735 return (d & (d - 1)) == 0;
1738 static inline TCGReg tcg_regset_first(TCGRegSet d)
1740 if (TCG_TARGET_NB_REGS <= 32) {
1741 return ctz32(d);
1742 } else {
1743 return ctz64(d);
1747 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1749 char buf[128];
1750 TCGOp *op;
1752 QTAILQ_FOREACH(op, &s->ops, link) {
1753 int i, k, nb_oargs, nb_iargs, nb_cargs;
1754 const TCGOpDef *def;
1755 TCGOpcode c;
1756 int col = 0;
1758 c = op->opc;
1759 def = &tcg_op_defs[c];
1761 if (c == INDEX_op_insn_start) {
1762 nb_oargs = 0;
1763 col += qemu_log("\n ----");
1765 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1766 target_ulong a;
1767 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1768 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1769 #else
1770 a = op->args[i];
1771 #endif
1772 col += qemu_log(" " TARGET_FMT_lx, a);
1774 } else if (c == INDEX_op_call) {
1775 /* variable number of arguments */
1776 nb_oargs = TCGOP_CALLO(op);
1777 nb_iargs = TCGOP_CALLI(op);
1778 nb_cargs = def->nb_cargs;
1780 /* function name, flags, out args */
1781 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1782 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1783 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1784 for (i = 0; i < nb_oargs; i++) {
1785 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1786 op->args[i]));
1788 for (i = 0; i < nb_iargs; i++) {
1789 TCGArg arg = op->args[nb_oargs + i];
1790 const char *t = "<dummy>";
1791 if (arg != TCG_CALL_DUMMY_ARG) {
1792 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1794 col += qemu_log(",%s", t);
1796 } else {
1797 col += qemu_log(" %s ", def->name);
1799 nb_oargs = def->nb_oargs;
1800 nb_iargs = def->nb_iargs;
1801 nb_cargs = def->nb_cargs;
1803 if (def->flags & TCG_OPF_VECTOR) {
1804 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1805 8 << TCGOP_VECE(op));
1808 k = 0;
1809 for (i = 0; i < nb_oargs; i++) {
1810 if (k != 0) {
1811 col += qemu_log(",");
1813 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1814 op->args[k++]));
1816 for (i = 0; i < nb_iargs; i++) {
1817 if (k != 0) {
1818 col += qemu_log(",");
1820 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1821 op->args[k++]));
1823 switch (c) {
1824 case INDEX_op_brcond_i32:
1825 case INDEX_op_setcond_i32:
1826 case INDEX_op_movcond_i32:
1827 case INDEX_op_brcond2_i32:
1828 case INDEX_op_setcond2_i32:
1829 case INDEX_op_brcond_i64:
1830 case INDEX_op_setcond_i64:
1831 case INDEX_op_movcond_i64:
1832 case INDEX_op_cmp_vec:
1833 case INDEX_op_cmpsel_vec:
1834 if (op->args[k] < ARRAY_SIZE(cond_name)
1835 && cond_name[op->args[k]]) {
1836 col += qemu_log(",%s", cond_name[op->args[k++]]);
1837 } else {
1838 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1840 i = 1;
1841 break;
1842 case INDEX_op_qemu_ld_i32:
1843 case INDEX_op_qemu_st_i32:
1844 case INDEX_op_qemu_st8_i32:
1845 case INDEX_op_qemu_ld_i64:
1846 case INDEX_op_qemu_st_i64:
1848 TCGMemOpIdx oi = op->args[k++];
1849 MemOp op = get_memop(oi);
1850 unsigned ix = get_mmuidx(oi);
1852 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1853 col += qemu_log(",$0x%x,%u", op, ix);
1854 } else {
1855 const char *s_al, *s_op;
1856 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1857 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1858 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1860 i = 1;
1862 break;
1863 default:
1864 i = 0;
1865 break;
1867 switch (c) {
1868 case INDEX_op_set_label:
1869 case INDEX_op_br:
1870 case INDEX_op_brcond_i32:
1871 case INDEX_op_brcond_i64:
1872 case INDEX_op_brcond2_i32:
1873 col += qemu_log("%s$L%d", k ? "," : "",
1874 arg_label(op->args[k])->id);
1875 i++, k++;
1876 break;
1877 default:
1878 break;
1880 for (; i < nb_cargs; i++, k++) {
1881 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1885 if (have_prefs || op->life) {
1887 QemuLogFile *logfile;
1889 rcu_read_lock();
1890 logfile = qatomic_rcu_read(&qemu_logfile);
1891 if (logfile) {
1892 for (; col < 40; ++col) {
1893 putc(' ', logfile->fd);
1896 rcu_read_unlock();
1899 if (op->life) {
1900 unsigned life = op->life;
1902 if (life & (SYNC_ARG * 3)) {
1903 qemu_log(" sync:");
1904 for (i = 0; i < 2; ++i) {
1905 if (life & (SYNC_ARG << i)) {
1906 qemu_log(" %d", i);
1910 life /= DEAD_ARG;
1911 if (life) {
1912 qemu_log(" dead:");
1913 for (i = 0; life; ++i, life >>= 1) {
1914 if (life & 1) {
1915 qemu_log(" %d", i);
1921 if (have_prefs) {
1922 for (i = 0; i < nb_oargs; ++i) {
1923 TCGRegSet set = op->output_pref[i];
1925 if (i == 0) {
1926 qemu_log(" pref=");
1927 } else {
1928 qemu_log(",");
1930 if (set == 0) {
1931 qemu_log("none");
1932 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
1933 qemu_log("all");
1934 #ifdef CONFIG_DEBUG_TCG
1935 } else if (tcg_regset_single(set)) {
1936 TCGReg reg = tcg_regset_first(set);
1937 qemu_log("%s", tcg_target_reg_names[reg]);
1938 #endif
1939 } else if (TCG_TARGET_NB_REGS <= 32) {
1940 qemu_log("%#x", (uint32_t)set);
1941 } else {
1942 qemu_log("%#" PRIx64, (uint64_t)set);
1947 qemu_log("\n");
1951 /* we give more priority to constraints with less registers */
1952 static int get_constraint_priority(const TCGOpDef *def, int k)
1954 const TCGArgConstraint *arg_ct = &def->args_ct[k];
1955 int n;
1957 if (arg_ct->oalias) {
1958 /* an alias is equivalent to a single register */
1959 n = 1;
1960 } else {
1961 n = ctpop64(arg_ct->regs);
1963 return TCG_TARGET_NB_REGS - n + 1;
1966 /* sort from highest priority to lowest */
1967 static void sort_constraints(TCGOpDef *def, int start, int n)
1969 int i, j;
1970 TCGArgConstraint *a = def->args_ct;
1972 for (i = 0; i < n; i++) {
1973 a[start + i].sort_index = start + i;
1975 if (n <= 1) {
1976 return;
1978 for (i = 0; i < n - 1; i++) {
1979 for (j = i + 1; j < n; j++) {
1980 int p1 = get_constraint_priority(def, a[start + i].sort_index);
1981 int p2 = get_constraint_priority(def, a[start + j].sort_index);
1982 if (p1 < p2) {
1983 int tmp = a[start + i].sort_index;
1984 a[start + i].sort_index = a[start + j].sort_index;
1985 a[start + j].sort_index = tmp;
1991 static void process_op_defs(TCGContext *s)
1993 TCGOpcode op;
1995 for (op = 0; op < NB_OPS; op++) {
1996 TCGOpDef *def = &tcg_op_defs[op];
1997 const TCGTargetOpDef *tdefs;
1998 int i, nb_args;
2000 if (def->flags & TCG_OPF_NOT_PRESENT) {
2001 continue;
2004 nb_args = def->nb_iargs + def->nb_oargs;
2005 if (nb_args == 0) {
2006 continue;
2010 * Macro magic should make it impossible, but double-check that
2011 * the array index is in range. Since the signness of an enum
2012 * is implementation defined, force the result to unsigned.
2014 unsigned con_set = tcg_target_op_def(op);
2015 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2016 tdefs = &constraint_sets[con_set];
2018 for (i = 0; i < nb_args; i++) {
2019 const char *ct_str = tdefs->args_ct_str[i];
2020 /* Incomplete TCGTargetOpDef entry. */
2021 tcg_debug_assert(ct_str != NULL);
2023 while (*ct_str != '\0') {
2024 switch(*ct_str) {
2025 case '0' ... '9':
2027 int oarg = *ct_str - '0';
2028 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2029 tcg_debug_assert(oarg < def->nb_oargs);
2030 tcg_debug_assert(def->args_ct[oarg].regs != 0);
2031 def->args_ct[i] = def->args_ct[oarg];
2032 /* The output sets oalias. */
2033 def->args_ct[oarg].oalias = true;
2034 def->args_ct[oarg].alias_index = i;
2035 /* The input sets ialias. */
2036 def->args_ct[i].ialias = true;
2037 def->args_ct[i].alias_index = oarg;
2039 ct_str++;
2040 break;
2041 case '&':
2042 def->args_ct[i].newreg = true;
2043 ct_str++;
2044 break;
2045 case 'i':
2046 def->args_ct[i].ct |= TCG_CT_CONST;
2047 ct_str++;
2048 break;
2050 /* Include all of the target-specific constraints. */
2052 #undef CONST
2053 #define CONST(CASE, MASK) \
2054 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2055 #define REGS(CASE, MASK) \
2056 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2058 #include "tcg-target-con-str.h"
2060 #undef REGS
2061 #undef CONST
2062 default:
2063 /* Typo in TCGTargetOpDef constraint. */
2064 g_assert_not_reached();
2069 /* TCGTargetOpDef entry with too much information? */
2070 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2072 /* sort the constraints (XXX: this is just an heuristic) */
2073 sort_constraints(def, 0, def->nb_oargs);
2074 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2078 void tcg_op_remove(TCGContext *s, TCGOp *op)
2080 TCGLabel *label;
2082 switch (op->opc) {
2083 case INDEX_op_br:
2084 label = arg_label(op->args[0]);
2085 label->refs--;
2086 break;
2087 case INDEX_op_brcond_i32:
2088 case INDEX_op_brcond_i64:
2089 label = arg_label(op->args[3]);
2090 label->refs--;
2091 break;
2092 case INDEX_op_brcond2_i32:
2093 label = arg_label(op->args[5]);
2094 label->refs--;
2095 break;
2096 default:
2097 break;
2100 QTAILQ_REMOVE(&s->ops, op, link);
2101 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2102 s->nb_ops--;
2104 #ifdef CONFIG_PROFILER
2105 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2106 #endif
2109 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2111 TCGContext *s = tcg_ctx;
2112 TCGOp *op;
2114 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2115 op = tcg_malloc(sizeof(TCGOp));
2116 } else {
2117 op = QTAILQ_FIRST(&s->free_ops);
2118 QTAILQ_REMOVE(&s->free_ops, op, link);
2120 memset(op, 0, offsetof(TCGOp, link));
2121 op->opc = opc;
2122 s->nb_ops++;
2124 return op;
2127 TCGOp *tcg_emit_op(TCGOpcode opc)
2129 TCGOp *op = tcg_op_alloc(opc);
2130 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2131 return op;
2134 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2136 TCGOp *new_op = tcg_op_alloc(opc);
2137 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2138 return new_op;
2141 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2143 TCGOp *new_op = tcg_op_alloc(opc);
2144 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2145 return new_op;
2148 /* Reachable analysis : remove unreachable code. */
2149 static void reachable_code_pass(TCGContext *s)
2151 TCGOp *op, *op_next;
2152 bool dead = false;
2154 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2155 bool remove = dead;
2156 TCGLabel *label;
2157 int call_flags;
2159 switch (op->opc) {
2160 case INDEX_op_set_label:
2161 label = arg_label(op->args[0]);
2162 if (label->refs == 0) {
2164 * While there is an occasional backward branch, virtually
2165 * all branches generated by the translators are forward.
2166 * Which means that generally we will have already removed
2167 * all references to the label that will be, and there is
2168 * little to be gained by iterating.
2170 remove = true;
2171 } else {
2172 /* Once we see a label, insns become live again. */
2173 dead = false;
2174 remove = false;
2177 * Optimization can fold conditional branches to unconditional.
2178 * If we find a label with one reference which is preceded by
2179 * an unconditional branch to it, remove both. This needed to
2180 * wait until the dead code in between them was removed.
2182 if (label->refs == 1) {
2183 TCGOp *op_prev = QTAILQ_PREV(op, link);
2184 if (op_prev->opc == INDEX_op_br &&
2185 label == arg_label(op_prev->args[0])) {
2186 tcg_op_remove(s, op_prev);
2187 remove = true;
2191 break;
2193 case INDEX_op_br:
2194 case INDEX_op_exit_tb:
2195 case INDEX_op_goto_ptr:
2196 /* Unconditional branches; everything following is dead. */
2197 dead = true;
2198 break;
2200 case INDEX_op_call:
2201 /* Notice noreturn helper calls, raising exceptions. */
2202 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2203 if (call_flags & TCG_CALL_NO_RETURN) {
2204 dead = true;
2206 break;
2208 case INDEX_op_insn_start:
2209 /* Never remove -- we need to keep these for unwind. */
2210 remove = false;
2211 break;
2213 default:
2214 break;
2217 if (remove) {
2218 tcg_op_remove(s, op);
2223 #define TS_DEAD 1
2224 #define TS_MEM 2
2226 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2227 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2229 /* For liveness_pass_1, the register preferences for a given temp. */
2230 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2232 return ts->state_ptr;
2235 /* For liveness_pass_1, reset the preferences for a given temp to the
2236 * maximal regset for its type.
2238 static inline void la_reset_pref(TCGTemp *ts)
2240 *la_temp_pref(ts)
2241 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2244 /* liveness analysis: end of function: all temps are dead, and globals
2245 should be in memory. */
2246 static void la_func_end(TCGContext *s, int ng, int nt)
2248 int i;
2250 for (i = 0; i < ng; ++i) {
2251 s->temps[i].state = TS_DEAD | TS_MEM;
2252 la_reset_pref(&s->temps[i]);
2254 for (i = ng; i < nt; ++i) {
2255 s->temps[i].state = TS_DEAD;
2256 la_reset_pref(&s->temps[i]);
2260 /* liveness analysis: end of basic block: all temps are dead, globals
2261 and local temps should be in memory. */
2262 static void la_bb_end(TCGContext *s, int ng, int nt)
2264 int i;
2266 for (i = 0; i < nt; ++i) {
2267 TCGTemp *ts = &s->temps[i];
2268 int state;
2270 switch (ts->kind) {
2271 case TEMP_FIXED:
2272 case TEMP_GLOBAL:
2273 case TEMP_LOCAL:
2274 state = TS_DEAD | TS_MEM;
2275 break;
2276 case TEMP_NORMAL:
2277 case TEMP_CONST:
2278 state = TS_DEAD;
2279 break;
2280 default:
2281 g_assert_not_reached();
2283 ts->state = state;
2284 la_reset_pref(ts);
2288 /* liveness analysis: sync globals back to memory. */
2289 static void la_global_sync(TCGContext *s, int ng)
2291 int i;
2293 for (i = 0; i < ng; ++i) {
2294 int state = s->temps[i].state;
2295 s->temps[i].state = state | TS_MEM;
2296 if (state == TS_DEAD) {
2297 /* If the global was previously dead, reset prefs. */
2298 la_reset_pref(&s->temps[i]);
2304 * liveness analysis: conditional branch: all temps are dead,
2305 * globals and local temps should be synced.
2307 static void la_bb_sync(TCGContext *s, int ng, int nt)
2309 la_global_sync(s, ng);
2311 for (int i = ng; i < nt; ++i) {
2312 TCGTemp *ts = &s->temps[i];
2313 int state;
2315 switch (ts->kind) {
2316 case TEMP_LOCAL:
2317 state = ts->state;
2318 ts->state = state | TS_MEM;
2319 if (state != TS_DEAD) {
2320 continue;
2322 break;
2323 case TEMP_NORMAL:
2324 s->temps[i].state = TS_DEAD;
2325 break;
2326 case TEMP_CONST:
2327 continue;
2328 default:
2329 g_assert_not_reached();
2331 la_reset_pref(&s->temps[i]);
2335 /* liveness analysis: sync globals back to memory and kill. */
2336 static void la_global_kill(TCGContext *s, int ng)
2338 int i;
2340 for (i = 0; i < ng; i++) {
2341 s->temps[i].state = TS_DEAD | TS_MEM;
2342 la_reset_pref(&s->temps[i]);
2346 /* liveness analysis: note live globals crossing calls. */
2347 static void la_cross_call(TCGContext *s, int nt)
2349 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2350 int i;
2352 for (i = 0; i < nt; i++) {
2353 TCGTemp *ts = &s->temps[i];
2354 if (!(ts->state & TS_DEAD)) {
2355 TCGRegSet *pset = la_temp_pref(ts);
2356 TCGRegSet set = *pset;
2358 set &= mask;
2359 /* If the combination is not possible, restart. */
2360 if (set == 0) {
2361 set = tcg_target_available_regs[ts->type] & mask;
2363 *pset = set;
2368 /* Liveness analysis : update the opc_arg_life array to tell if a
2369 given input arguments is dead. Instructions updating dead
2370 temporaries are removed. */
2371 static void liveness_pass_1(TCGContext *s)
2373 int nb_globals = s->nb_globals;
2374 int nb_temps = s->nb_temps;
2375 TCGOp *op, *op_prev;
2376 TCGRegSet *prefs;
2377 int i;
2379 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2380 for (i = 0; i < nb_temps; ++i) {
2381 s->temps[i].state_ptr = prefs + i;
2384 /* ??? Should be redundant with the exit_tb that ends the TB. */
2385 la_func_end(s, nb_globals, nb_temps);
2387 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2388 int nb_iargs, nb_oargs;
2389 TCGOpcode opc_new, opc_new2;
2390 bool have_opc_new2;
2391 TCGLifeData arg_life = 0;
2392 TCGTemp *ts;
2393 TCGOpcode opc = op->opc;
2394 const TCGOpDef *def = &tcg_op_defs[opc];
2396 switch (opc) {
2397 case INDEX_op_call:
2399 int call_flags;
2400 int nb_call_regs;
2402 nb_oargs = TCGOP_CALLO(op);
2403 nb_iargs = TCGOP_CALLI(op);
2404 call_flags = op->args[nb_oargs + nb_iargs + 1];
2406 /* pure functions can be removed if their result is unused */
2407 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2408 for (i = 0; i < nb_oargs; i++) {
2409 ts = arg_temp(op->args[i]);
2410 if (ts->state != TS_DEAD) {
2411 goto do_not_remove_call;
2414 goto do_remove;
2416 do_not_remove_call:
2418 /* Output args are dead. */
2419 for (i = 0; i < nb_oargs; i++) {
2420 ts = arg_temp(op->args[i]);
2421 if (ts->state & TS_DEAD) {
2422 arg_life |= DEAD_ARG << i;
2424 if (ts->state & TS_MEM) {
2425 arg_life |= SYNC_ARG << i;
2427 ts->state = TS_DEAD;
2428 la_reset_pref(ts);
2430 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2431 op->output_pref[i] = 0;
2434 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2435 TCG_CALL_NO_READ_GLOBALS))) {
2436 la_global_kill(s, nb_globals);
2437 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2438 la_global_sync(s, nb_globals);
2441 /* Record arguments that die in this helper. */
2442 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2443 ts = arg_temp(op->args[i]);
2444 if (ts && ts->state & TS_DEAD) {
2445 arg_life |= DEAD_ARG << i;
2449 /* For all live registers, remove call-clobbered prefs. */
2450 la_cross_call(s, nb_temps);
2452 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2454 /* Input arguments are live for preceding opcodes. */
2455 for (i = 0; i < nb_iargs; i++) {
2456 ts = arg_temp(op->args[i + nb_oargs]);
2457 if (ts && ts->state & TS_DEAD) {
2458 /* For those arguments that die, and will be allocated
2459 * in registers, clear the register set for that arg,
2460 * to be filled in below. For args that will be on
2461 * the stack, reset to any available reg.
2463 *la_temp_pref(ts)
2464 = (i < nb_call_regs ? 0 :
2465 tcg_target_available_regs[ts->type]);
2466 ts->state &= ~TS_DEAD;
2470 /* For each input argument, add its input register to prefs.
2471 If a temp is used once, this produces a single set bit. */
2472 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2473 ts = arg_temp(op->args[i + nb_oargs]);
2474 if (ts) {
2475 tcg_regset_set_reg(*la_temp_pref(ts),
2476 tcg_target_call_iarg_regs[i]);
2480 break;
2481 case INDEX_op_insn_start:
2482 break;
2483 case INDEX_op_discard:
2484 /* mark the temporary as dead */
2485 ts = arg_temp(op->args[0]);
2486 ts->state = TS_DEAD;
2487 la_reset_pref(ts);
2488 break;
2490 case INDEX_op_add2_i32:
2491 opc_new = INDEX_op_add_i32;
2492 goto do_addsub2;
2493 case INDEX_op_sub2_i32:
2494 opc_new = INDEX_op_sub_i32;
2495 goto do_addsub2;
2496 case INDEX_op_add2_i64:
2497 opc_new = INDEX_op_add_i64;
2498 goto do_addsub2;
2499 case INDEX_op_sub2_i64:
2500 opc_new = INDEX_op_sub_i64;
2501 do_addsub2:
2502 nb_iargs = 4;
2503 nb_oargs = 2;
2504 /* Test if the high part of the operation is dead, but not
2505 the low part. The result can be optimized to a simple
2506 add or sub. This happens often for x86_64 guest when the
2507 cpu mode is set to 32 bit. */
2508 if (arg_temp(op->args[1])->state == TS_DEAD) {
2509 if (arg_temp(op->args[0])->state == TS_DEAD) {
2510 goto do_remove;
2512 /* Replace the opcode and adjust the args in place,
2513 leaving 3 unused args at the end. */
2514 op->opc = opc = opc_new;
2515 op->args[1] = op->args[2];
2516 op->args[2] = op->args[4];
2517 /* Fall through and mark the single-word operation live. */
2518 nb_iargs = 2;
2519 nb_oargs = 1;
2521 goto do_not_remove;
2523 case INDEX_op_mulu2_i32:
2524 opc_new = INDEX_op_mul_i32;
2525 opc_new2 = INDEX_op_muluh_i32;
2526 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2527 goto do_mul2;
2528 case INDEX_op_muls2_i32:
2529 opc_new = INDEX_op_mul_i32;
2530 opc_new2 = INDEX_op_mulsh_i32;
2531 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2532 goto do_mul2;
2533 case INDEX_op_mulu2_i64:
2534 opc_new = INDEX_op_mul_i64;
2535 opc_new2 = INDEX_op_muluh_i64;
2536 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2537 goto do_mul2;
2538 case INDEX_op_muls2_i64:
2539 opc_new = INDEX_op_mul_i64;
2540 opc_new2 = INDEX_op_mulsh_i64;
2541 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2542 goto do_mul2;
2543 do_mul2:
2544 nb_iargs = 2;
2545 nb_oargs = 2;
2546 if (arg_temp(op->args[1])->state == TS_DEAD) {
2547 if (arg_temp(op->args[0])->state == TS_DEAD) {
2548 /* Both parts of the operation are dead. */
2549 goto do_remove;
2551 /* The high part of the operation is dead; generate the low. */
2552 op->opc = opc = opc_new;
2553 op->args[1] = op->args[2];
2554 op->args[2] = op->args[3];
2555 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2556 /* The low part of the operation is dead; generate the high. */
2557 op->opc = opc = opc_new2;
2558 op->args[0] = op->args[1];
2559 op->args[1] = op->args[2];
2560 op->args[2] = op->args[3];
2561 } else {
2562 goto do_not_remove;
2564 /* Mark the single-word operation live. */
2565 nb_oargs = 1;
2566 goto do_not_remove;
2568 default:
2569 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2570 nb_iargs = def->nb_iargs;
2571 nb_oargs = def->nb_oargs;
2573 /* Test if the operation can be removed because all
2574 its outputs are dead. We assume that nb_oargs == 0
2575 implies side effects */
2576 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2577 for (i = 0; i < nb_oargs; i++) {
2578 if (arg_temp(op->args[i])->state != TS_DEAD) {
2579 goto do_not_remove;
2582 goto do_remove;
2584 goto do_not_remove;
2586 do_remove:
2587 tcg_op_remove(s, op);
2588 break;
2590 do_not_remove:
2591 for (i = 0; i < nb_oargs; i++) {
2592 ts = arg_temp(op->args[i]);
2594 /* Remember the preference of the uses that followed. */
2595 op->output_pref[i] = *la_temp_pref(ts);
2597 /* Output args are dead. */
2598 if (ts->state & TS_DEAD) {
2599 arg_life |= DEAD_ARG << i;
2601 if (ts->state & TS_MEM) {
2602 arg_life |= SYNC_ARG << i;
2604 ts->state = TS_DEAD;
2605 la_reset_pref(ts);
2608 /* If end of basic block, update. */
2609 if (def->flags & TCG_OPF_BB_EXIT) {
2610 la_func_end(s, nb_globals, nb_temps);
2611 } else if (def->flags & TCG_OPF_COND_BRANCH) {
2612 la_bb_sync(s, nb_globals, nb_temps);
2613 } else if (def->flags & TCG_OPF_BB_END) {
2614 la_bb_end(s, nb_globals, nb_temps);
2615 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2616 la_global_sync(s, nb_globals);
2617 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2618 la_cross_call(s, nb_temps);
2622 /* Record arguments that die in this opcode. */
2623 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2624 ts = arg_temp(op->args[i]);
2625 if (ts->state & TS_DEAD) {
2626 arg_life |= DEAD_ARG << i;
2630 /* Input arguments are live for preceding opcodes. */
2631 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2632 ts = arg_temp(op->args[i]);
2633 if (ts->state & TS_DEAD) {
2634 /* For operands that were dead, initially allow
2635 all regs for the type. */
2636 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2637 ts->state &= ~TS_DEAD;
2641 /* Incorporate constraints for this operand. */
2642 switch (opc) {
2643 case INDEX_op_mov_i32:
2644 case INDEX_op_mov_i64:
2645 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2646 have proper constraints. That said, special case
2647 moves to propagate preferences backward. */
2648 if (IS_DEAD_ARG(1)) {
2649 *la_temp_pref(arg_temp(op->args[0]))
2650 = *la_temp_pref(arg_temp(op->args[1]));
2652 break;
2654 default:
2655 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2656 const TCGArgConstraint *ct = &def->args_ct[i];
2657 TCGRegSet set, *pset;
2659 ts = arg_temp(op->args[i]);
2660 pset = la_temp_pref(ts);
2661 set = *pset;
2663 set &= ct->regs;
2664 if (ct->ialias) {
2665 set &= op->output_pref[ct->alias_index];
2667 /* If the combination is not possible, restart. */
2668 if (set == 0) {
2669 set = ct->regs;
2671 *pset = set;
2673 break;
2675 break;
2677 op->life = arg_life;
2681 /* Liveness analysis: Convert indirect regs to direct temporaries. */
2682 static bool liveness_pass_2(TCGContext *s)
2684 int nb_globals = s->nb_globals;
2685 int nb_temps, i;
2686 bool changes = false;
2687 TCGOp *op, *op_next;
2689 /* Create a temporary for each indirect global. */
2690 for (i = 0; i < nb_globals; ++i) {
2691 TCGTemp *its = &s->temps[i];
2692 if (its->indirect_reg) {
2693 TCGTemp *dts = tcg_temp_alloc(s);
2694 dts->type = its->type;
2695 dts->base_type = its->base_type;
2696 its->state_ptr = dts;
2697 } else {
2698 its->state_ptr = NULL;
2700 /* All globals begin dead. */
2701 its->state = TS_DEAD;
2703 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2704 TCGTemp *its = &s->temps[i];
2705 its->state_ptr = NULL;
2706 its->state = TS_DEAD;
2709 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2710 TCGOpcode opc = op->opc;
2711 const TCGOpDef *def = &tcg_op_defs[opc];
2712 TCGLifeData arg_life = op->life;
2713 int nb_iargs, nb_oargs, call_flags;
2714 TCGTemp *arg_ts, *dir_ts;
2716 if (opc == INDEX_op_call) {
2717 nb_oargs = TCGOP_CALLO(op);
2718 nb_iargs = TCGOP_CALLI(op);
2719 call_flags = op->args[nb_oargs + nb_iargs + 1];
2720 } else {
2721 nb_iargs = def->nb_iargs;
2722 nb_oargs = def->nb_oargs;
2724 /* Set flags similar to how calls require. */
2725 if (def->flags & TCG_OPF_COND_BRANCH) {
2726 /* Like reading globals: sync_globals */
2727 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2728 } else if (def->flags & TCG_OPF_BB_END) {
2729 /* Like writing globals: save_globals */
2730 call_flags = 0;
2731 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2732 /* Like reading globals: sync_globals */
2733 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2734 } else {
2735 /* No effect on globals. */
2736 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2737 TCG_CALL_NO_WRITE_GLOBALS);
2741 /* Make sure that input arguments are available. */
2742 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2743 arg_ts = arg_temp(op->args[i]);
2744 if (arg_ts) {
2745 dir_ts = arg_ts->state_ptr;
2746 if (dir_ts && arg_ts->state == TS_DEAD) {
2747 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2748 ? INDEX_op_ld_i32
2749 : INDEX_op_ld_i64);
2750 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2752 lop->args[0] = temp_arg(dir_ts);
2753 lop->args[1] = temp_arg(arg_ts->mem_base);
2754 lop->args[2] = arg_ts->mem_offset;
2756 /* Loaded, but synced with memory. */
2757 arg_ts->state = TS_MEM;
2762 /* Perform input replacement, and mark inputs that became dead.
2763 No action is required except keeping temp_state up to date
2764 so that we reload when needed. */
2765 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2766 arg_ts = arg_temp(op->args[i]);
2767 if (arg_ts) {
2768 dir_ts = arg_ts->state_ptr;
2769 if (dir_ts) {
2770 op->args[i] = temp_arg(dir_ts);
2771 changes = true;
2772 if (IS_DEAD_ARG(i)) {
2773 arg_ts->state = TS_DEAD;
2779 /* Liveness analysis should ensure that the following are
2780 all correct, for call sites and basic block end points. */
2781 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2782 /* Nothing to do */
2783 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2784 for (i = 0; i < nb_globals; ++i) {
2785 /* Liveness should see that globals are synced back,
2786 that is, either TS_DEAD or TS_MEM. */
2787 arg_ts = &s->temps[i];
2788 tcg_debug_assert(arg_ts->state_ptr == 0
2789 || arg_ts->state != 0);
2791 } else {
2792 for (i = 0; i < nb_globals; ++i) {
2793 /* Liveness should see that globals are saved back,
2794 that is, TS_DEAD, waiting to be reloaded. */
2795 arg_ts = &s->temps[i];
2796 tcg_debug_assert(arg_ts->state_ptr == 0
2797 || arg_ts->state == TS_DEAD);
2801 /* Outputs become available. */
2802 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2803 arg_ts = arg_temp(op->args[0]);
2804 dir_ts = arg_ts->state_ptr;
2805 if (dir_ts) {
2806 op->args[0] = temp_arg(dir_ts);
2807 changes = true;
2809 /* The output is now live and modified. */
2810 arg_ts->state = 0;
2812 if (NEED_SYNC_ARG(0)) {
2813 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2814 ? INDEX_op_st_i32
2815 : INDEX_op_st_i64);
2816 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2817 TCGTemp *out_ts = dir_ts;
2819 if (IS_DEAD_ARG(0)) {
2820 out_ts = arg_temp(op->args[1]);
2821 arg_ts->state = TS_DEAD;
2822 tcg_op_remove(s, op);
2823 } else {
2824 arg_ts->state = TS_MEM;
2827 sop->args[0] = temp_arg(out_ts);
2828 sop->args[1] = temp_arg(arg_ts->mem_base);
2829 sop->args[2] = arg_ts->mem_offset;
2830 } else {
2831 tcg_debug_assert(!IS_DEAD_ARG(0));
2834 } else {
2835 for (i = 0; i < nb_oargs; i++) {
2836 arg_ts = arg_temp(op->args[i]);
2837 dir_ts = arg_ts->state_ptr;
2838 if (!dir_ts) {
2839 continue;
2841 op->args[i] = temp_arg(dir_ts);
2842 changes = true;
2844 /* The output is now live and modified. */
2845 arg_ts->state = 0;
2847 /* Sync outputs upon their last write. */
2848 if (NEED_SYNC_ARG(i)) {
2849 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2850 ? INDEX_op_st_i32
2851 : INDEX_op_st_i64);
2852 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2854 sop->args[0] = temp_arg(dir_ts);
2855 sop->args[1] = temp_arg(arg_ts->mem_base);
2856 sop->args[2] = arg_ts->mem_offset;
2858 arg_ts->state = TS_MEM;
2860 /* Drop outputs that are dead. */
2861 if (IS_DEAD_ARG(i)) {
2862 arg_ts->state = TS_DEAD;
2868 return changes;
2871 #ifdef CONFIG_DEBUG_TCG
2872 static void dump_regs(TCGContext *s)
2874 TCGTemp *ts;
2875 int i;
2876 char buf[64];
2878 for(i = 0; i < s->nb_temps; i++) {
2879 ts = &s->temps[i];
2880 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2881 switch(ts->val_type) {
2882 case TEMP_VAL_REG:
2883 printf("%s", tcg_target_reg_names[ts->reg]);
2884 break;
2885 case TEMP_VAL_MEM:
2886 printf("%d(%s)", (int)ts->mem_offset,
2887 tcg_target_reg_names[ts->mem_base->reg]);
2888 break;
2889 case TEMP_VAL_CONST:
2890 printf("$0x%" PRIx64, ts->val);
2891 break;
2892 case TEMP_VAL_DEAD:
2893 printf("D");
2894 break;
2895 default:
2896 printf("???");
2897 break;
2899 printf("\n");
2902 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2903 if (s->reg_to_temp[i] != NULL) {
2904 printf("%s: %s\n",
2905 tcg_target_reg_names[i],
2906 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2911 static void check_regs(TCGContext *s)
2913 int reg;
2914 int k;
2915 TCGTemp *ts;
2916 char buf[64];
2918 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2919 ts = s->reg_to_temp[reg];
2920 if (ts != NULL) {
2921 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2922 printf("Inconsistency for register %s:\n",
2923 tcg_target_reg_names[reg]);
2924 goto fail;
2928 for (k = 0; k < s->nb_temps; k++) {
2929 ts = &s->temps[k];
2930 if (ts->val_type == TEMP_VAL_REG
2931 && ts->kind != TEMP_FIXED
2932 && s->reg_to_temp[ts->reg] != ts) {
2933 printf("Inconsistency for temp %s:\n",
2934 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2935 fail:
2936 printf("reg state:\n");
2937 dump_regs(s);
2938 tcg_abort();
2942 #endif
2944 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
2946 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
2947 /* Sparc64 stack is accessed with offset of 2047 */
2948 s->current_frame_offset = (s->current_frame_offset +
2949 (tcg_target_long)sizeof(tcg_target_long) - 1) &
2950 ~(sizeof(tcg_target_long) - 1);
2951 #endif
2952 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
2953 s->frame_end) {
2954 tcg_abort();
2956 ts->mem_offset = s->current_frame_offset;
2957 ts->mem_base = s->frame_temp;
2958 ts->mem_allocated = 1;
2959 s->current_frame_offset += sizeof(tcg_target_long);
2962 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
2964 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
2965 mark it free; otherwise mark it dead. */
2966 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
2968 TCGTempVal new_type;
2970 switch (ts->kind) {
2971 case TEMP_FIXED:
2972 return;
2973 case TEMP_GLOBAL:
2974 case TEMP_LOCAL:
2975 new_type = TEMP_VAL_MEM;
2976 break;
2977 case TEMP_NORMAL:
2978 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
2979 break;
2980 case TEMP_CONST:
2981 new_type = TEMP_VAL_CONST;
2982 break;
2983 default:
2984 g_assert_not_reached();
2986 if (ts->val_type == TEMP_VAL_REG) {
2987 s->reg_to_temp[ts->reg] = NULL;
2989 ts->val_type = new_type;
2992 /* Mark a temporary as dead. */
2993 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
2995 temp_free_or_dead(s, ts, 1);
2998 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
2999 registers needs to be allocated to store a constant. If 'free_or_dead'
3000 is non-zero, subsequently release the temporary; if it is positive, the
3001 temp is dead; if it is negative, the temp is free. */
3002 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3003 TCGRegSet preferred_regs, int free_or_dead)
3005 if (!temp_readonly(ts) && !ts->mem_coherent) {
3006 if (!ts->mem_allocated) {
3007 temp_allocate_frame(s, ts);
3009 switch (ts->val_type) {
3010 case TEMP_VAL_CONST:
3011 /* If we're going to free the temp immediately, then we won't
3012 require it later in a register, so attempt to store the
3013 constant to memory directly. */
3014 if (free_or_dead
3015 && tcg_out_sti(s, ts->type, ts->val,
3016 ts->mem_base->reg, ts->mem_offset)) {
3017 break;
3019 temp_load(s, ts, tcg_target_available_regs[ts->type],
3020 allocated_regs, preferred_regs);
3021 /* fallthrough */
3023 case TEMP_VAL_REG:
3024 tcg_out_st(s, ts->type, ts->reg,
3025 ts->mem_base->reg, ts->mem_offset);
3026 break;
3028 case TEMP_VAL_MEM:
3029 break;
3031 case TEMP_VAL_DEAD:
3032 default:
3033 tcg_abort();
3035 ts->mem_coherent = 1;
3037 if (free_or_dead) {
3038 temp_free_or_dead(s, ts, free_or_dead);
3042 /* free register 'reg' by spilling the corresponding temporary if necessary */
3043 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3045 TCGTemp *ts = s->reg_to_temp[reg];
3046 if (ts != NULL) {
3047 temp_sync(s, ts, allocated_regs, 0, -1);
3052 * tcg_reg_alloc:
3053 * @required_regs: Set of registers in which we must allocate.
3054 * @allocated_regs: Set of registers which must be avoided.
3055 * @preferred_regs: Set of registers we should prefer.
3056 * @rev: True if we search the registers in "indirect" order.
3058 * The allocated register must be in @required_regs & ~@allocated_regs,
3059 * but if we can put it in @preferred_regs we may save a move later.
3061 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3062 TCGRegSet allocated_regs,
3063 TCGRegSet preferred_regs, bool rev)
3065 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3066 TCGRegSet reg_ct[2];
3067 const int *order;
3069 reg_ct[1] = required_regs & ~allocated_regs;
3070 tcg_debug_assert(reg_ct[1] != 0);
3071 reg_ct[0] = reg_ct[1] & preferred_regs;
3073 /* Skip the preferred_regs option if it cannot be satisfied,
3074 or if the preference made no difference. */
3075 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3077 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3079 /* Try free registers, preferences first. */
3080 for (j = f; j < 2; j++) {
3081 TCGRegSet set = reg_ct[j];
3083 if (tcg_regset_single(set)) {
3084 /* One register in the set. */
3085 TCGReg reg = tcg_regset_first(set);
3086 if (s->reg_to_temp[reg] == NULL) {
3087 return reg;
3089 } else {
3090 for (i = 0; i < n; i++) {
3091 TCGReg reg = order[i];
3092 if (s->reg_to_temp[reg] == NULL &&
3093 tcg_regset_test_reg(set, reg)) {
3094 return reg;
3100 /* We must spill something. */
3101 for (j = f; j < 2; j++) {
3102 TCGRegSet set = reg_ct[j];
3104 if (tcg_regset_single(set)) {
3105 /* One register in the set. */
3106 TCGReg reg = tcg_regset_first(set);
3107 tcg_reg_free(s, reg, allocated_regs);
3108 return reg;
3109 } else {
3110 for (i = 0; i < n; i++) {
3111 TCGReg reg = order[i];
3112 if (tcg_regset_test_reg(set, reg)) {
3113 tcg_reg_free(s, reg, allocated_regs);
3114 return reg;
3120 tcg_abort();
3123 /* Make sure the temporary is in a register. If needed, allocate the register
3124 from DESIRED while avoiding ALLOCATED. */
3125 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3126 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3128 TCGReg reg;
3130 switch (ts->val_type) {
3131 case TEMP_VAL_REG:
3132 return;
3133 case TEMP_VAL_CONST:
3134 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3135 preferred_regs, ts->indirect_base);
3136 if (ts->type <= TCG_TYPE_I64) {
3137 tcg_out_movi(s, ts->type, reg, ts->val);
3138 } else {
3139 uint64_t val = ts->val;
3140 MemOp vece = MO_64;
3143 * Find the minimal vector element that matches the constant.
3144 * The targets will, in general, have to do this search anyway,
3145 * do this generically.
3147 if (val == dup_const(MO_8, val)) {
3148 vece = MO_8;
3149 } else if (val == dup_const(MO_16, val)) {
3150 vece = MO_16;
3151 } else if (val == dup_const(MO_32, val)) {
3152 vece = MO_32;
3155 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3157 ts->mem_coherent = 0;
3158 break;
3159 case TEMP_VAL_MEM:
3160 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3161 preferred_regs, ts->indirect_base);
3162 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3163 ts->mem_coherent = 1;
3164 break;
3165 case TEMP_VAL_DEAD:
3166 default:
3167 tcg_abort();
3169 ts->reg = reg;
3170 ts->val_type = TEMP_VAL_REG;
3171 s->reg_to_temp[reg] = ts;
3174 /* Save a temporary to memory. 'allocated_regs' is used in case a
3175 temporary registers needs to be allocated to store a constant. */
3176 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3178 /* The liveness analysis already ensures that globals are back
3179 in memory. Keep an tcg_debug_assert for safety. */
3180 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3183 /* save globals to their canonical location and assume they can be
3184 modified be the following code. 'allocated_regs' is used in case a
3185 temporary registers needs to be allocated to store a constant. */
3186 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3188 int i, n;
3190 for (i = 0, n = s->nb_globals; i < n; i++) {
3191 temp_save(s, &s->temps[i], allocated_regs);
3195 /* sync globals to their canonical location and assume they can be
3196 read by the following code. 'allocated_regs' is used in case a
3197 temporary registers needs to be allocated to store a constant. */
3198 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3200 int i, n;
3202 for (i = 0, n = s->nb_globals; i < n; i++) {
3203 TCGTemp *ts = &s->temps[i];
3204 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3205 || ts->kind == TEMP_FIXED
3206 || ts->mem_coherent);
3210 /* at the end of a basic block, we assume all temporaries are dead and
3211 all globals are stored at their canonical location. */
3212 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3214 int i;
3216 for (i = s->nb_globals; i < s->nb_temps; i++) {
3217 TCGTemp *ts = &s->temps[i];
3219 switch (ts->kind) {
3220 case TEMP_LOCAL:
3221 temp_save(s, ts, allocated_regs);
3222 break;
3223 case TEMP_NORMAL:
3224 /* The liveness analysis already ensures that temps are dead.
3225 Keep an tcg_debug_assert for safety. */
3226 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3227 break;
3228 case TEMP_CONST:
3229 /* Similarly, we should have freed any allocated register. */
3230 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3231 break;
3232 default:
3233 g_assert_not_reached();
3237 save_globals(s, allocated_regs);
3241 * At a conditional branch, we assume all temporaries are dead and
3242 * all globals and local temps are synced to their location.
3244 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3246 sync_globals(s, allocated_regs);
3248 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3249 TCGTemp *ts = &s->temps[i];
3251 * The liveness analysis already ensures that temps are dead.
3252 * Keep tcg_debug_asserts for safety.
3254 switch (ts->kind) {
3255 case TEMP_LOCAL:
3256 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3257 break;
3258 case TEMP_NORMAL:
3259 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3260 break;
3261 case TEMP_CONST:
3262 break;
3263 default:
3264 g_assert_not_reached();
3270 * Specialized code generation for INDEX_op_mov_* with a constant.
3272 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3273 tcg_target_ulong val, TCGLifeData arg_life,
3274 TCGRegSet preferred_regs)
3276 /* ENV should not be modified. */
3277 tcg_debug_assert(!temp_readonly(ots));
3279 /* The movi is not explicitly generated here. */
3280 if (ots->val_type == TEMP_VAL_REG) {
3281 s->reg_to_temp[ots->reg] = NULL;
3283 ots->val_type = TEMP_VAL_CONST;
3284 ots->val = val;
3285 ots->mem_coherent = 0;
3286 if (NEED_SYNC_ARG(0)) {
3287 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3288 } else if (IS_DEAD_ARG(0)) {
3289 temp_dead(s, ots);
3294 * Specialized code generation for INDEX_op_mov_*.
3296 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3298 const TCGLifeData arg_life = op->life;
3299 TCGRegSet allocated_regs, preferred_regs;
3300 TCGTemp *ts, *ots;
3301 TCGType otype, itype;
3303 allocated_regs = s->reserved_regs;
3304 preferred_regs = op->output_pref[0];
3305 ots = arg_temp(op->args[0]);
3306 ts = arg_temp(op->args[1]);
3308 /* ENV should not be modified. */
3309 tcg_debug_assert(!temp_readonly(ots));
3311 /* Note that otype != itype for no-op truncation. */
3312 otype = ots->type;
3313 itype = ts->type;
3315 if (ts->val_type == TEMP_VAL_CONST) {
3316 /* propagate constant or generate sti */
3317 tcg_target_ulong val = ts->val;
3318 if (IS_DEAD_ARG(1)) {
3319 temp_dead(s, ts);
3321 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3322 return;
3325 /* If the source value is in memory we're going to be forced
3326 to have it in a register in order to perform the copy. Copy
3327 the SOURCE value into its own register first, that way we
3328 don't have to reload SOURCE the next time it is used. */
3329 if (ts->val_type == TEMP_VAL_MEM) {
3330 temp_load(s, ts, tcg_target_available_regs[itype],
3331 allocated_regs, preferred_regs);
3334 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3335 if (IS_DEAD_ARG(0)) {
3336 /* mov to a non-saved dead register makes no sense (even with
3337 liveness analysis disabled). */
3338 tcg_debug_assert(NEED_SYNC_ARG(0));
3339 if (!ots->mem_allocated) {
3340 temp_allocate_frame(s, ots);
3342 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3343 if (IS_DEAD_ARG(1)) {
3344 temp_dead(s, ts);
3346 temp_dead(s, ots);
3347 } else {
3348 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3349 /* the mov can be suppressed */
3350 if (ots->val_type == TEMP_VAL_REG) {
3351 s->reg_to_temp[ots->reg] = NULL;
3353 ots->reg = ts->reg;
3354 temp_dead(s, ts);
3355 } else {
3356 if (ots->val_type != TEMP_VAL_REG) {
3357 /* When allocating a new register, make sure to not spill the
3358 input one. */
3359 tcg_regset_set_reg(allocated_regs, ts->reg);
3360 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3361 allocated_regs, preferred_regs,
3362 ots->indirect_base);
3364 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3366 * Cross register class move not supported.
3367 * Store the source register into the destination slot
3368 * and leave the destination temp as TEMP_VAL_MEM.
3370 assert(!temp_readonly(ots));
3371 if (!ts->mem_allocated) {
3372 temp_allocate_frame(s, ots);
3374 tcg_out_st(s, ts->type, ts->reg,
3375 ots->mem_base->reg, ots->mem_offset);
3376 ots->mem_coherent = 1;
3377 temp_free_or_dead(s, ots, -1);
3378 return;
3381 ots->val_type = TEMP_VAL_REG;
3382 ots->mem_coherent = 0;
3383 s->reg_to_temp[ots->reg] = ots;
3384 if (NEED_SYNC_ARG(0)) {
3385 temp_sync(s, ots, allocated_regs, 0, 0);
3391 * Specialized code generation for INDEX_op_dup_vec.
3393 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3395 const TCGLifeData arg_life = op->life;
3396 TCGRegSet dup_out_regs, dup_in_regs;
3397 TCGTemp *its, *ots;
3398 TCGType itype, vtype;
3399 intptr_t endian_fixup;
3400 unsigned vece;
3401 bool ok;
3403 ots = arg_temp(op->args[0]);
3404 its = arg_temp(op->args[1]);
3406 /* ENV should not be modified. */
3407 tcg_debug_assert(!temp_readonly(ots));
3409 itype = its->type;
3410 vece = TCGOP_VECE(op);
3411 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3413 if (its->val_type == TEMP_VAL_CONST) {
3414 /* Propagate constant via movi -> dupi. */
3415 tcg_target_ulong val = its->val;
3416 if (IS_DEAD_ARG(1)) {
3417 temp_dead(s, its);
3419 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3420 return;
3423 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3424 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3426 /* Allocate the output register now. */
3427 if (ots->val_type != TEMP_VAL_REG) {
3428 TCGRegSet allocated_regs = s->reserved_regs;
3430 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3431 /* Make sure to not spill the input register. */
3432 tcg_regset_set_reg(allocated_regs, its->reg);
3434 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3435 op->output_pref[0], ots->indirect_base);
3436 ots->val_type = TEMP_VAL_REG;
3437 ots->mem_coherent = 0;
3438 s->reg_to_temp[ots->reg] = ots;
3441 switch (its->val_type) {
3442 case TEMP_VAL_REG:
3444 * The dup constriaints must be broad, covering all possible VECE.
3445 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3446 * to fail, indicating that extra moves are required for that case.
3448 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3449 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3450 goto done;
3452 /* Try again from memory or a vector input register. */
3454 if (!its->mem_coherent) {
3456 * The input register is not synced, and so an extra store
3457 * would be required to use memory. Attempt an integer-vector
3458 * register move first. We do not have a TCGRegSet for this.
3460 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3461 break;
3463 /* Sync the temp back to its slot and load from there. */
3464 temp_sync(s, its, s->reserved_regs, 0, 0);
3466 /* fall through */
3468 case TEMP_VAL_MEM:
3469 #ifdef HOST_WORDS_BIGENDIAN
3470 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3471 endian_fixup -= 1 << vece;
3472 #else
3473 endian_fixup = 0;
3474 #endif
3475 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3476 its->mem_offset + endian_fixup)) {
3477 goto done;
3479 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3480 break;
3482 default:
3483 g_assert_not_reached();
3486 /* We now have a vector input register, so dup must succeed. */
3487 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3488 tcg_debug_assert(ok);
3490 done:
3491 if (IS_DEAD_ARG(1)) {
3492 temp_dead(s, its);
3494 if (NEED_SYNC_ARG(0)) {
3495 temp_sync(s, ots, s->reserved_regs, 0, 0);
3497 if (IS_DEAD_ARG(0)) {
3498 temp_dead(s, ots);
3502 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3504 const TCGLifeData arg_life = op->life;
3505 const TCGOpDef * const def = &tcg_op_defs[op->opc];
3506 TCGRegSet i_allocated_regs;
3507 TCGRegSet o_allocated_regs;
3508 int i, k, nb_iargs, nb_oargs;
3509 TCGReg reg;
3510 TCGArg arg;
3511 const TCGArgConstraint *arg_ct;
3512 TCGTemp *ts;
3513 TCGArg new_args[TCG_MAX_OP_ARGS];
3514 int const_args[TCG_MAX_OP_ARGS];
3516 nb_oargs = def->nb_oargs;
3517 nb_iargs = def->nb_iargs;
3519 /* copy constants */
3520 memcpy(new_args + nb_oargs + nb_iargs,
3521 op->args + nb_oargs + nb_iargs,
3522 sizeof(TCGArg) * def->nb_cargs);
3524 i_allocated_regs = s->reserved_regs;
3525 o_allocated_regs = s->reserved_regs;
3527 /* satisfy input constraints */
3528 for (k = 0; k < nb_iargs; k++) {
3529 TCGRegSet i_preferred_regs, o_preferred_regs;
3531 i = def->args_ct[nb_oargs + k].sort_index;
3532 arg = op->args[i];
3533 arg_ct = &def->args_ct[i];
3534 ts = arg_temp(arg);
3536 if (ts->val_type == TEMP_VAL_CONST
3537 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3538 /* constant is OK for instruction */
3539 const_args[i] = 1;
3540 new_args[i] = ts->val;
3541 continue;
3544 i_preferred_regs = o_preferred_regs = 0;
3545 if (arg_ct->ialias) {
3546 o_preferred_regs = op->output_pref[arg_ct->alias_index];
3549 * If the input is readonly, then it cannot also be an
3550 * output and aliased to itself. If the input is not
3551 * dead after the instruction, we must allocate a new
3552 * register and move it.
3554 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3555 goto allocate_in_reg;
3559 * Check if the current register has already been allocated
3560 * for another input aliased to an output.
3562 if (ts->val_type == TEMP_VAL_REG) {
3563 reg = ts->reg;
3564 for (int k2 = 0; k2 < k; k2++) {
3565 int i2 = def->args_ct[nb_oargs + k2].sort_index;
3566 if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3567 goto allocate_in_reg;
3571 i_preferred_regs = o_preferred_regs;
3574 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3575 reg = ts->reg;
3577 if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3578 allocate_in_reg:
3580 * Allocate a new register matching the constraint
3581 * and move the temporary register into it.
3583 temp_load(s, ts, tcg_target_available_regs[ts->type],
3584 i_allocated_regs, 0);
3585 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3586 o_preferred_regs, ts->indirect_base);
3587 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3589 * Cross register class move not supported. Sync the
3590 * temp back to its slot and load from there.
3592 temp_sync(s, ts, i_allocated_regs, 0, 0);
3593 tcg_out_ld(s, ts->type, reg,
3594 ts->mem_base->reg, ts->mem_offset);
3597 new_args[i] = reg;
3598 const_args[i] = 0;
3599 tcg_regset_set_reg(i_allocated_regs, reg);
3602 /* mark dead temporaries and free the associated registers */
3603 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3604 if (IS_DEAD_ARG(i)) {
3605 temp_dead(s, arg_temp(op->args[i]));
3609 if (def->flags & TCG_OPF_COND_BRANCH) {
3610 tcg_reg_alloc_cbranch(s, i_allocated_regs);
3611 } else if (def->flags & TCG_OPF_BB_END) {
3612 tcg_reg_alloc_bb_end(s, i_allocated_regs);
3613 } else {
3614 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3615 /* XXX: permit generic clobber register list ? */
3616 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3617 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3618 tcg_reg_free(s, i, i_allocated_regs);
3622 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3623 /* sync globals if the op has side effects and might trigger
3624 an exception. */
3625 sync_globals(s, i_allocated_regs);
3628 /* satisfy the output constraints */
3629 for(k = 0; k < nb_oargs; k++) {
3630 i = def->args_ct[k].sort_index;
3631 arg = op->args[i];
3632 arg_ct = &def->args_ct[i];
3633 ts = arg_temp(arg);
3635 /* ENV should not be modified. */
3636 tcg_debug_assert(!temp_readonly(ts));
3638 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3639 reg = new_args[arg_ct->alias_index];
3640 } else if (arg_ct->newreg) {
3641 reg = tcg_reg_alloc(s, arg_ct->regs,
3642 i_allocated_regs | o_allocated_regs,
3643 op->output_pref[k], ts->indirect_base);
3644 } else {
3645 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3646 op->output_pref[k], ts->indirect_base);
3648 tcg_regset_set_reg(o_allocated_regs, reg);
3649 if (ts->val_type == TEMP_VAL_REG) {
3650 s->reg_to_temp[ts->reg] = NULL;
3652 ts->val_type = TEMP_VAL_REG;
3653 ts->reg = reg;
3655 * Temp value is modified, so the value kept in memory is
3656 * potentially not the same.
3658 ts->mem_coherent = 0;
3659 s->reg_to_temp[reg] = ts;
3660 new_args[i] = reg;
3664 /* emit instruction */
3665 if (def->flags & TCG_OPF_VECTOR) {
3666 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3667 new_args, const_args);
3668 } else {
3669 tcg_out_op(s, op->opc, new_args, const_args);
3672 /* move the outputs in the correct register if needed */
3673 for(i = 0; i < nb_oargs; i++) {
3674 ts = arg_temp(op->args[i]);
3676 /* ENV should not be modified. */
3677 tcg_debug_assert(!temp_readonly(ts));
3679 if (NEED_SYNC_ARG(i)) {
3680 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3681 } else if (IS_DEAD_ARG(i)) {
3682 temp_dead(s, ts);
3687 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3689 const TCGLifeData arg_life = op->life;
3690 TCGTemp *ots, *itsl, *itsh;
3691 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3693 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3694 tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3695 tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3697 ots = arg_temp(op->args[0]);
3698 itsl = arg_temp(op->args[1]);
3699 itsh = arg_temp(op->args[2]);
3701 /* ENV should not be modified. */
3702 tcg_debug_assert(!temp_readonly(ots));
3704 /* Allocate the output register now. */
3705 if (ots->val_type != TEMP_VAL_REG) {
3706 TCGRegSet allocated_regs = s->reserved_regs;
3707 TCGRegSet dup_out_regs =
3708 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3710 /* Make sure to not spill the input registers. */
3711 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3712 tcg_regset_set_reg(allocated_regs, itsl->reg);
3714 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3715 tcg_regset_set_reg(allocated_regs, itsh->reg);
3718 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3719 op->output_pref[0], ots->indirect_base);
3720 ots->val_type = TEMP_VAL_REG;
3721 ots->mem_coherent = 0;
3722 s->reg_to_temp[ots->reg] = ots;
3725 /* Promote dup2 of immediates to dupi_vec. */
3726 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3727 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3728 MemOp vece = MO_64;
3730 if (val == dup_const(MO_8, val)) {
3731 vece = MO_8;
3732 } else if (val == dup_const(MO_16, val)) {
3733 vece = MO_16;
3734 } else if (val == dup_const(MO_32, val)) {
3735 vece = MO_32;
3738 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3739 goto done;
3742 /* If the two inputs form one 64-bit value, try dupm_vec. */
3743 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3744 if (!itsl->mem_coherent) {
3745 temp_sync(s, itsl, s->reserved_regs, 0, 0);
3747 if (!itsh->mem_coherent) {
3748 temp_sync(s, itsh, s->reserved_regs, 0, 0);
3750 #ifdef HOST_WORDS_BIGENDIAN
3751 TCGTemp *its = itsh;
3752 #else
3753 TCGTemp *its = itsl;
3754 #endif
3755 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3756 its->mem_base->reg, its->mem_offset)) {
3757 goto done;
3761 /* Fall back to generic expansion. */
3762 return false;
3764 done:
3765 if (IS_DEAD_ARG(1)) {
3766 temp_dead(s, itsl);
3768 if (IS_DEAD_ARG(2)) {
3769 temp_dead(s, itsh);
3771 if (NEED_SYNC_ARG(0)) {
3772 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3773 } else if (IS_DEAD_ARG(0)) {
3774 temp_dead(s, ots);
3776 return true;
3779 #ifdef TCG_TARGET_STACK_GROWSUP
3780 #define STACK_DIR(x) (-(x))
3781 #else
3782 #define STACK_DIR(x) (x)
3783 #endif
3785 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3787 const int nb_oargs = TCGOP_CALLO(op);
3788 const int nb_iargs = TCGOP_CALLI(op);
3789 const TCGLifeData arg_life = op->life;
3790 int flags, nb_regs, i;
3791 TCGReg reg;
3792 TCGArg arg;
3793 TCGTemp *ts;
3794 intptr_t stack_offset;
3795 size_t call_stack_size;
3796 tcg_insn_unit *func_addr;
3797 int allocate_args;
3798 TCGRegSet allocated_regs;
3800 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3801 flags = op->args[nb_oargs + nb_iargs + 1];
3803 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3804 if (nb_regs > nb_iargs) {
3805 nb_regs = nb_iargs;
3808 /* assign stack slots first */
3809 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3810 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3811 ~(TCG_TARGET_STACK_ALIGN - 1);
3812 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3813 if (allocate_args) {
3814 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3815 preallocate call stack */
3816 tcg_abort();
3819 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3820 for (i = nb_regs; i < nb_iargs; i++) {
3821 arg = op->args[nb_oargs + i];
3822 #ifdef TCG_TARGET_STACK_GROWSUP
3823 stack_offset -= sizeof(tcg_target_long);
3824 #endif
3825 if (arg != TCG_CALL_DUMMY_ARG) {
3826 ts = arg_temp(arg);
3827 temp_load(s, ts, tcg_target_available_regs[ts->type],
3828 s->reserved_regs, 0);
3829 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3831 #ifndef TCG_TARGET_STACK_GROWSUP
3832 stack_offset += sizeof(tcg_target_long);
3833 #endif
3836 /* assign input registers */
3837 allocated_regs = s->reserved_regs;
3838 for (i = 0; i < nb_regs; i++) {
3839 arg = op->args[nb_oargs + i];
3840 if (arg != TCG_CALL_DUMMY_ARG) {
3841 ts = arg_temp(arg);
3842 reg = tcg_target_call_iarg_regs[i];
3844 if (ts->val_type == TEMP_VAL_REG) {
3845 if (ts->reg != reg) {
3846 tcg_reg_free(s, reg, allocated_regs);
3847 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3849 * Cross register class move not supported. Sync the
3850 * temp back to its slot and load from there.
3852 temp_sync(s, ts, allocated_regs, 0, 0);
3853 tcg_out_ld(s, ts->type, reg,
3854 ts->mem_base->reg, ts->mem_offset);
3857 } else {
3858 TCGRegSet arg_set = 0;
3860 tcg_reg_free(s, reg, allocated_regs);
3861 tcg_regset_set_reg(arg_set, reg);
3862 temp_load(s, ts, arg_set, allocated_regs, 0);
3865 tcg_regset_set_reg(allocated_regs, reg);
3869 /* mark dead temporaries and free the associated registers */
3870 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3871 if (IS_DEAD_ARG(i)) {
3872 temp_dead(s, arg_temp(op->args[i]));
3876 /* clobber call registers */
3877 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3878 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3879 tcg_reg_free(s, i, allocated_regs);
3883 /* Save globals if they might be written by the helper, sync them if
3884 they might be read. */
3885 if (flags & TCG_CALL_NO_READ_GLOBALS) {
3886 /* Nothing to do */
3887 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3888 sync_globals(s, allocated_regs);
3889 } else {
3890 save_globals(s, allocated_regs);
3893 tcg_out_call(s, func_addr);
3895 /* assign output registers and emit moves if needed */
3896 for(i = 0; i < nb_oargs; i++) {
3897 arg = op->args[i];
3898 ts = arg_temp(arg);
3900 /* ENV should not be modified. */
3901 tcg_debug_assert(!temp_readonly(ts));
3903 reg = tcg_target_call_oarg_regs[i];
3904 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3905 if (ts->val_type == TEMP_VAL_REG) {
3906 s->reg_to_temp[ts->reg] = NULL;
3908 ts->val_type = TEMP_VAL_REG;
3909 ts->reg = reg;
3910 ts->mem_coherent = 0;
3911 s->reg_to_temp[reg] = ts;
3912 if (NEED_SYNC_ARG(i)) {
3913 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3914 } else if (IS_DEAD_ARG(i)) {
3915 temp_dead(s, ts);
3920 #ifdef CONFIG_PROFILER
3922 /* avoid copy/paste errors */
3923 #define PROF_ADD(to, from, field) \
3924 do { \
3925 (to)->field += qatomic_read(&((from)->field)); \
3926 } while (0)
3928 #define PROF_MAX(to, from, field) \
3929 do { \
3930 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
3931 if (val__ > (to)->field) { \
3932 (to)->field = val__; \
3934 } while (0)
3936 /* Pass in a zero'ed @prof */
3937 static inline
3938 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3940 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
3941 unsigned int i;
3943 for (i = 0; i < n_ctxs; i++) {
3944 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
3945 const TCGProfile *orig = &s->prof;
3947 if (counters) {
3948 PROF_ADD(prof, orig, cpu_exec_time);
3949 PROF_ADD(prof, orig, tb_count1);
3950 PROF_ADD(prof, orig, tb_count);
3951 PROF_ADD(prof, orig, op_count);
3952 PROF_MAX(prof, orig, op_count_max);
3953 PROF_ADD(prof, orig, temp_count);
3954 PROF_MAX(prof, orig, temp_count_max);
3955 PROF_ADD(prof, orig, del_op_count);
3956 PROF_ADD(prof, orig, code_in_len);
3957 PROF_ADD(prof, orig, code_out_len);
3958 PROF_ADD(prof, orig, search_out_len);
3959 PROF_ADD(prof, orig, interm_time);
3960 PROF_ADD(prof, orig, code_time);
3961 PROF_ADD(prof, orig, la_time);
3962 PROF_ADD(prof, orig, opt_time);
3963 PROF_ADD(prof, orig, restore_count);
3964 PROF_ADD(prof, orig, restore_time);
3966 if (table) {
3967 int i;
3969 for (i = 0; i < NB_OPS; i++) {
3970 PROF_ADD(prof, orig, table_op_count[i]);
3976 #undef PROF_ADD
3977 #undef PROF_MAX
3979 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3981 tcg_profile_snapshot(prof, true, false);
3984 static void tcg_profile_snapshot_table(TCGProfile *prof)
3986 tcg_profile_snapshot(prof, false, true);
3989 void tcg_dump_op_count(void)
3991 TCGProfile prof = {};
3992 int i;
3994 tcg_profile_snapshot_table(&prof);
3995 for (i = 0; i < NB_OPS; i++) {
3996 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
3997 prof.table_op_count[i]);
4001 int64_t tcg_cpu_exec_time(void)
4003 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4004 unsigned int i;
4005 int64_t ret = 0;
4007 for (i = 0; i < n_ctxs; i++) {
4008 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4009 const TCGProfile *prof = &s->prof;
4011 ret += qatomic_read(&prof->cpu_exec_time);
4013 return ret;
4015 #else
4016 void tcg_dump_op_count(void)
4018 qemu_printf("[TCG profiler not compiled]\n");
4021 int64_t tcg_cpu_exec_time(void)
4023 error_report("%s: TCG profiler not compiled", __func__);
4024 exit(EXIT_FAILURE);
4026 #endif
4029 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4031 #ifdef CONFIG_PROFILER
4032 TCGProfile *prof = &s->prof;
4033 #endif
4034 int i, num_insns;
4035 TCGOp *op;
4037 #ifdef CONFIG_PROFILER
4039 int n = 0;
4041 QTAILQ_FOREACH(op, &s->ops, link) {
4042 n++;
4044 qatomic_set(&prof->op_count, prof->op_count + n);
4045 if (n > prof->op_count_max) {
4046 qatomic_set(&prof->op_count_max, n);
4049 n = s->nb_temps;
4050 qatomic_set(&prof->temp_count, prof->temp_count + n);
4051 if (n > prof->temp_count_max) {
4052 qatomic_set(&prof->temp_count_max, n);
4055 #endif
4057 #ifdef DEBUG_DISAS
4058 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4059 && qemu_log_in_addr_range(tb->pc))) {
4060 FILE *logfile = qemu_log_lock();
4061 qemu_log("OP:\n");
4062 tcg_dump_ops(s, false);
4063 qemu_log("\n");
4064 qemu_log_unlock(logfile);
4066 #endif
4068 #ifdef CONFIG_DEBUG_TCG
4069 /* Ensure all labels referenced have been emitted. */
4071 TCGLabel *l;
4072 bool error = false;
4074 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4075 if (unlikely(!l->present) && l->refs) {
4076 qemu_log_mask(CPU_LOG_TB_OP,
4077 "$L%d referenced but not present.\n", l->id);
4078 error = true;
4081 assert(!error);
4083 #endif
4085 #ifdef CONFIG_PROFILER
4086 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4087 #endif
4089 #ifdef USE_TCG_OPTIMIZATIONS
4090 tcg_optimize(s);
4091 #endif
4093 #ifdef CONFIG_PROFILER
4094 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4095 qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4096 #endif
4098 reachable_code_pass(s);
4099 liveness_pass_1(s);
4101 if (s->nb_indirects > 0) {
4102 #ifdef DEBUG_DISAS
4103 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4104 && qemu_log_in_addr_range(tb->pc))) {
4105 FILE *logfile = qemu_log_lock();
4106 qemu_log("OP before indirect lowering:\n");
4107 tcg_dump_ops(s, false);
4108 qemu_log("\n");
4109 qemu_log_unlock(logfile);
4111 #endif
4112 /* Replace indirect temps with direct temps. */
4113 if (liveness_pass_2(s)) {
4114 /* If changes were made, re-run liveness. */
4115 liveness_pass_1(s);
4119 #ifdef CONFIG_PROFILER
4120 qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4121 #endif
4123 #ifdef DEBUG_DISAS
4124 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4125 && qemu_log_in_addr_range(tb->pc))) {
4126 FILE *logfile = qemu_log_lock();
4127 qemu_log("OP after optimization and liveness analysis:\n");
4128 tcg_dump_ops(s, true);
4129 qemu_log("\n");
4130 qemu_log_unlock(logfile);
4132 #endif
4134 tcg_reg_alloc_start(s);
4137 * Reset the buffer pointers when restarting after overflow.
4138 * TODO: Move this into translate-all.c with the rest of the
4139 * buffer management. Having only this done here is confusing.
4141 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4142 s->code_ptr = s->code_buf;
4144 #ifdef TCG_TARGET_NEED_LDST_LABELS
4145 QSIMPLEQ_INIT(&s->ldst_labels);
4146 #endif
4147 #ifdef TCG_TARGET_NEED_POOL_LABELS
4148 s->pool_labels = NULL;
4149 #endif
4151 num_insns = -1;
4152 QTAILQ_FOREACH(op, &s->ops, link) {
4153 TCGOpcode opc = op->opc;
4155 #ifdef CONFIG_PROFILER
4156 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4157 #endif
4159 switch (opc) {
4160 case INDEX_op_mov_i32:
4161 case INDEX_op_mov_i64:
4162 case INDEX_op_mov_vec:
4163 tcg_reg_alloc_mov(s, op);
4164 break;
4165 case INDEX_op_dup_vec:
4166 tcg_reg_alloc_dup(s, op);
4167 break;
4168 case INDEX_op_insn_start:
4169 if (num_insns >= 0) {
4170 size_t off = tcg_current_code_size(s);
4171 s->gen_insn_end_off[num_insns] = off;
4172 /* Assert that we do not overflow our stored offset. */
4173 assert(s->gen_insn_end_off[num_insns] == off);
4175 num_insns++;
4176 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4177 target_ulong a;
4178 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4179 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4180 #else
4181 a = op->args[i];
4182 #endif
4183 s->gen_insn_data[num_insns][i] = a;
4185 break;
4186 case INDEX_op_discard:
4187 temp_dead(s, arg_temp(op->args[0]));
4188 break;
4189 case INDEX_op_set_label:
4190 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4191 tcg_out_label(s, arg_label(op->args[0]));
4192 break;
4193 case INDEX_op_call:
4194 tcg_reg_alloc_call(s, op);
4195 break;
4196 case INDEX_op_dup2_vec:
4197 if (tcg_reg_alloc_dup2(s, op)) {
4198 break;
4200 /* fall through */
4201 default:
4202 /* Sanity check that we've not introduced any unhandled opcodes. */
4203 tcg_debug_assert(tcg_op_supported(opc));
4204 /* Note: in order to speed up the code, it would be much
4205 faster to have specialized register allocator functions for
4206 some common argument patterns */
4207 tcg_reg_alloc_op(s, op);
4208 break;
4210 #ifdef CONFIG_DEBUG_TCG
4211 check_regs(s);
4212 #endif
4213 /* Test for (pending) buffer overflow. The assumption is that any
4214 one operation beginning below the high water mark cannot overrun
4215 the buffer completely. Thus we can test for overflow after
4216 generating code without having to check during generation. */
4217 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4218 return -1;
4220 /* Test for TB overflow, as seen by gen_insn_end_off. */
4221 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4222 return -2;
4225 tcg_debug_assert(num_insns >= 0);
4226 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4228 /* Generate TB finalization at the end of block */
4229 #ifdef TCG_TARGET_NEED_LDST_LABELS
4230 i = tcg_out_ldst_finalize(s);
4231 if (i < 0) {
4232 return i;
4234 #endif
4235 #ifdef TCG_TARGET_NEED_POOL_LABELS
4236 i = tcg_out_pool_finalize(s);
4237 if (i < 0) {
4238 return i;
4240 #endif
4241 if (!tcg_resolve_relocs(s)) {
4242 return -2;
4245 #ifndef CONFIG_TCG_INTERPRETER
4246 /* flush instruction cache */
4247 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4248 (uintptr_t)s->code_buf,
4249 tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4250 #endif
4252 return tcg_current_code_size(s);
4255 #ifdef CONFIG_PROFILER
4256 void tcg_dump_info(void)
4258 TCGProfile prof = {};
4259 const TCGProfile *s;
4260 int64_t tb_count;
4261 int64_t tb_div_count;
4262 int64_t tot;
4264 tcg_profile_snapshot_counters(&prof);
4265 s = &prof;
4266 tb_count = s->tb_count;
4267 tb_div_count = tb_count ? tb_count : 1;
4268 tot = s->interm_time + s->code_time;
4270 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4271 tot, tot / 2.4e9);
4272 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4273 " %0.1f%%)\n",
4274 tb_count, s->tb_count1 - tb_count,
4275 (double)(s->tb_count1 - s->tb_count)
4276 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4277 qemu_printf("avg ops/TB %0.1f max=%d\n",
4278 (double)s->op_count / tb_div_count, s->op_count_max);
4279 qemu_printf("deleted ops/TB %0.2f\n",
4280 (double)s->del_op_count / tb_div_count);
4281 qemu_printf("avg temps/TB %0.2f max=%d\n",
4282 (double)s->temp_count / tb_div_count, s->temp_count_max);
4283 qemu_printf("avg host code/TB %0.1f\n",
4284 (double)s->code_out_len / tb_div_count);
4285 qemu_printf("avg search data/TB %0.1f\n",
4286 (double)s->search_out_len / tb_div_count);
4288 qemu_printf("cycles/op %0.1f\n",
4289 s->op_count ? (double)tot / s->op_count : 0);
4290 qemu_printf("cycles/in byte %0.1f\n",
4291 s->code_in_len ? (double)tot / s->code_in_len : 0);
4292 qemu_printf("cycles/out byte %0.1f\n",
4293 s->code_out_len ? (double)tot / s->code_out_len : 0);
4294 qemu_printf("cycles/search byte %0.1f\n",
4295 s->search_out_len ? (double)tot / s->search_out_len : 0);
4296 if (tot == 0) {
4297 tot = 1;
4299 qemu_printf(" gen_interm time %0.1f%%\n",
4300 (double)s->interm_time / tot * 100.0);
4301 qemu_printf(" gen_code time %0.1f%%\n",
4302 (double)s->code_time / tot * 100.0);
4303 qemu_printf("optim./code time %0.1f%%\n",
4304 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4305 * 100.0);
4306 qemu_printf("liveness/code time %0.1f%%\n",
4307 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4308 qemu_printf("cpu_restore count %" PRId64 "\n",
4309 s->restore_count);
4310 qemu_printf(" avg cycles %0.1f\n",
4311 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4313 #else
4314 void tcg_dump_info(void)
4316 qemu_printf("[TCG profiler not compiled]\n");
4318 #endif
4320 #ifdef ELF_HOST_MACHINE
4321 /* In order to use this feature, the backend needs to do three things:
4323 (1) Define ELF_HOST_MACHINE to indicate both what value to
4324 put into the ELF image and to indicate support for the feature.
4326 (2) Define tcg_register_jit. This should create a buffer containing
4327 the contents of a .debug_frame section that describes the post-
4328 prologue unwind info for the tcg machine.
4330 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4333 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4334 typedef enum {
4335 JIT_NOACTION = 0,
4336 JIT_REGISTER_FN,
4337 JIT_UNREGISTER_FN
4338 } jit_actions_t;
4340 struct jit_code_entry {
4341 struct jit_code_entry *next_entry;
4342 struct jit_code_entry *prev_entry;
4343 const void *symfile_addr;
4344 uint64_t symfile_size;
4347 struct jit_descriptor {
4348 uint32_t version;
4349 uint32_t action_flag;
4350 struct jit_code_entry *relevant_entry;
4351 struct jit_code_entry *first_entry;
4354 void __jit_debug_register_code(void) __attribute__((noinline));
4355 void __jit_debug_register_code(void)
4357 asm("");
4360 /* Must statically initialize the version, because GDB may check
4361 the version before we can set it. */
4362 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4364 /* End GDB interface. */
4366 static int find_string(const char *strtab, const char *str)
4368 const char *p = strtab + 1;
4370 while (1) {
4371 if (strcmp(p, str) == 0) {
4372 return p - strtab;
4374 p += strlen(p) + 1;
4378 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4379 const void *debug_frame,
4380 size_t debug_frame_size)
4382 struct __attribute__((packed)) DebugInfo {
4383 uint32_t len;
4384 uint16_t version;
4385 uint32_t abbrev;
4386 uint8_t ptr_size;
4387 uint8_t cu_die;
4388 uint16_t cu_lang;
4389 uintptr_t cu_low_pc;
4390 uintptr_t cu_high_pc;
4391 uint8_t fn_die;
4392 char fn_name[16];
4393 uintptr_t fn_low_pc;
4394 uintptr_t fn_high_pc;
4395 uint8_t cu_eoc;
4398 struct ElfImage {
4399 ElfW(Ehdr) ehdr;
4400 ElfW(Phdr) phdr;
4401 ElfW(Shdr) shdr[7];
4402 ElfW(Sym) sym[2];
4403 struct DebugInfo di;
4404 uint8_t da[24];
4405 char str[80];
4408 struct ElfImage *img;
4410 static const struct ElfImage img_template = {
4411 .ehdr = {
4412 .e_ident[EI_MAG0] = ELFMAG0,
4413 .e_ident[EI_MAG1] = ELFMAG1,
4414 .e_ident[EI_MAG2] = ELFMAG2,
4415 .e_ident[EI_MAG3] = ELFMAG3,
4416 .e_ident[EI_CLASS] = ELF_CLASS,
4417 .e_ident[EI_DATA] = ELF_DATA,
4418 .e_ident[EI_VERSION] = EV_CURRENT,
4419 .e_type = ET_EXEC,
4420 .e_machine = ELF_HOST_MACHINE,
4421 .e_version = EV_CURRENT,
4422 .e_phoff = offsetof(struct ElfImage, phdr),
4423 .e_shoff = offsetof(struct ElfImage, shdr),
4424 .e_ehsize = sizeof(ElfW(Shdr)),
4425 .e_phentsize = sizeof(ElfW(Phdr)),
4426 .e_phnum = 1,
4427 .e_shentsize = sizeof(ElfW(Shdr)),
4428 .e_shnum = ARRAY_SIZE(img->shdr),
4429 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4430 #ifdef ELF_HOST_FLAGS
4431 .e_flags = ELF_HOST_FLAGS,
4432 #endif
4433 #ifdef ELF_OSABI
4434 .e_ident[EI_OSABI] = ELF_OSABI,
4435 #endif
4437 .phdr = {
4438 .p_type = PT_LOAD,
4439 .p_flags = PF_X,
4441 .shdr = {
4442 [0] = { .sh_type = SHT_NULL },
4443 /* Trick: The contents of code_gen_buffer are not present in
4444 this fake ELF file; that got allocated elsewhere. Therefore
4445 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4446 will not look for contents. We can record any address. */
4447 [1] = { /* .text */
4448 .sh_type = SHT_NOBITS,
4449 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4451 [2] = { /* .debug_info */
4452 .sh_type = SHT_PROGBITS,
4453 .sh_offset = offsetof(struct ElfImage, di),
4454 .sh_size = sizeof(struct DebugInfo),
4456 [3] = { /* .debug_abbrev */
4457 .sh_type = SHT_PROGBITS,
4458 .sh_offset = offsetof(struct ElfImage, da),
4459 .sh_size = sizeof(img->da),
4461 [4] = { /* .debug_frame */
4462 .sh_type = SHT_PROGBITS,
4463 .sh_offset = sizeof(struct ElfImage),
4465 [5] = { /* .symtab */
4466 .sh_type = SHT_SYMTAB,
4467 .sh_offset = offsetof(struct ElfImage, sym),
4468 .sh_size = sizeof(img->sym),
4469 .sh_info = 1,
4470 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4471 .sh_entsize = sizeof(ElfW(Sym)),
4473 [6] = { /* .strtab */
4474 .sh_type = SHT_STRTAB,
4475 .sh_offset = offsetof(struct ElfImage, str),
4476 .sh_size = sizeof(img->str),
4479 .sym = {
4480 [1] = { /* code_gen_buffer */
4481 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4482 .st_shndx = 1,
4485 .di = {
4486 .len = sizeof(struct DebugInfo) - 4,
4487 .version = 2,
4488 .ptr_size = sizeof(void *),
4489 .cu_die = 1,
4490 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
4491 .fn_die = 2,
4492 .fn_name = "code_gen_buffer"
4494 .da = {
4495 1, /* abbrev number (the cu) */
4496 0x11, 1, /* DW_TAG_compile_unit, has children */
4497 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4498 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4499 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4500 0, 0, /* end of abbrev */
4501 2, /* abbrev number (the fn) */
4502 0x2e, 0, /* DW_TAG_subprogram, no children */
4503 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4504 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4505 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4506 0, 0, /* end of abbrev */
4507 0 /* no more abbrev */
4509 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4510 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4513 /* We only need a single jit entry; statically allocate it. */
4514 static struct jit_code_entry one_entry;
4516 uintptr_t buf = (uintptr_t)buf_ptr;
4517 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4518 DebugFrameHeader *dfh;
4520 img = g_malloc(img_size);
4521 *img = img_template;
4523 img->phdr.p_vaddr = buf;
4524 img->phdr.p_paddr = buf;
4525 img->phdr.p_memsz = buf_size;
4527 img->shdr[1].sh_name = find_string(img->str, ".text");
4528 img->shdr[1].sh_addr = buf;
4529 img->shdr[1].sh_size = buf_size;
4531 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4532 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4534 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4535 img->shdr[4].sh_size = debug_frame_size;
4537 img->shdr[5].sh_name = find_string(img->str, ".symtab");
4538 img->shdr[6].sh_name = find_string(img->str, ".strtab");
4540 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4541 img->sym[1].st_value = buf;
4542 img->sym[1].st_size = buf_size;
4544 img->di.cu_low_pc = buf;
4545 img->di.cu_high_pc = buf + buf_size;
4546 img->di.fn_low_pc = buf;
4547 img->di.fn_high_pc = buf + buf_size;
4549 dfh = (DebugFrameHeader *)(img + 1);
4550 memcpy(dfh, debug_frame, debug_frame_size);
4551 dfh->fde.func_start = buf;
4552 dfh->fde.func_len = buf_size;
4554 #ifdef DEBUG_JIT
4555 /* Enable this block to be able to debug the ELF image file creation.
4556 One can use readelf, objdump, or other inspection utilities. */
4558 FILE *f = fopen("/tmp/qemu.jit", "w+b");
4559 if (f) {
4560 if (fwrite(img, img_size, 1, f) != img_size) {
4561 /* Avoid stupid unused return value warning for fwrite. */
4563 fclose(f);
4566 #endif
4568 one_entry.symfile_addr = img;
4569 one_entry.symfile_size = img_size;
4571 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4572 __jit_debug_descriptor.relevant_entry = &one_entry;
4573 __jit_debug_descriptor.first_entry = &one_entry;
4574 __jit_debug_register_code();
4576 #else
4577 /* No support for the feature. Provide the entry point expected by exec.c,
4578 and implement the internal function we declared earlier. */
4580 static void tcg_register_jit_int(const void *buf, size_t size,
4581 const void *debug_frame,
4582 size_t debug_frame_size)
4586 void tcg_register_jit(const void *buf, size_t buf_size)
4589 #endif /* ELF_HOST_MACHINE */
4591 #if !TCG_TARGET_MAYBE_vec
4592 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4594 g_assert_not_reached();
4596 #endif