2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
43 #define NO_CPU_IO_DEFS
45 #include "exec/exec-all.h"
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
51 #include "tcg/tcg-op.h"
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS ELFCLASS32
56 # define ELF_CLASS ELFCLASS64
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA ELFDATA2MSB
61 # define ELF_DATA ELFDATA2LSB
66 #include "tcg-internal.h"
68 /* Forward declarations for functions declared in tcg-target.c.inc and
70 static void tcg_target_init(TCGContext
*s
);
71 static void tcg_target_qemu_prologue(TCGContext
*s
);
72 static bool patch_reloc(tcg_insn_unit
*code_ptr
, int type
,
73 intptr_t value
, intptr_t addend
);
75 /* The CIE and FDE header definitions will be common to all hosts. */
77 uint32_t len
__attribute__((aligned((sizeof(void *)))));
83 uint8_t return_column
;
86 typedef struct QEMU_PACKED
{
87 uint32_t len
__attribute__((aligned((sizeof(void *)))));
91 } DebugFrameFDEHeader
;
93 typedef struct QEMU_PACKED
{
95 DebugFrameFDEHeader fde
;
98 static void tcg_register_jit_int(const void *buf
, size_t size
,
99 const void *debug_frame
,
100 size_t debug_frame_size
)
101 __attribute__((unused
));
103 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
104 static void tcg_out_ld(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg1
,
106 static bool tcg_out_mov(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg
);
107 static void tcg_out_movi(TCGContext
*s
, TCGType type
,
108 TCGReg ret
, tcg_target_long arg
);
109 static void tcg_out_op(TCGContext
*s
, TCGOpcode opc
,
110 const TCGArg args
[TCG_MAX_OP_ARGS
],
111 const int const_args
[TCG_MAX_OP_ARGS
]);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
114 TCGReg dst
, TCGReg src
);
115 static bool tcg_out_dupm_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
116 TCGReg dst
, TCGReg base
, intptr_t offset
);
117 static void tcg_out_dupi_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
118 TCGReg dst
, int64_t arg
);
119 static void tcg_out_vec_op(TCGContext
*s
, TCGOpcode opc
,
120 unsigned vecl
, unsigned vece
,
121 const TCGArg args
[TCG_MAX_OP_ARGS
],
122 const int const_args
[TCG_MAX_OP_ARGS
]);
124 static inline bool tcg_out_dup_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
125 TCGReg dst
, TCGReg src
)
127 g_assert_not_reached();
129 static inline bool tcg_out_dupm_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
130 TCGReg dst
, TCGReg base
, intptr_t offset
)
132 g_assert_not_reached();
134 static inline void tcg_out_dupi_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
135 TCGReg dst
, int64_t arg
)
137 g_assert_not_reached();
139 static inline void tcg_out_vec_op(TCGContext
*s
, TCGOpcode opc
,
140 unsigned vecl
, unsigned vece
,
141 const TCGArg args
[TCG_MAX_OP_ARGS
],
142 const int const_args
[TCG_MAX_OP_ARGS
])
144 g_assert_not_reached();
147 static void tcg_out_st(TCGContext
*s
, TCGType type
, TCGReg arg
, TCGReg arg1
,
149 static bool tcg_out_sti(TCGContext
*s
, TCGType type
, TCGArg val
,
150 TCGReg base
, intptr_t ofs
);
151 static void tcg_out_call(TCGContext
*s
, const tcg_insn_unit
*target
);
152 static bool tcg_target_const_match(int64_t val
, TCGType type
, int ct
);
153 #ifdef TCG_TARGET_NEED_LDST_LABELS
154 static int tcg_out_ldst_finalize(TCGContext
*s
);
157 TCGContext
**tcg_ctxs
;
158 unsigned int n_tcg_ctxs
;
159 TCGv_env cpu_env
= 0;
160 const void *tcg_code_gen_epilogue
;
161 uintptr_t tcg_splitwx_diff
;
163 #ifndef CONFIG_TCG_INTERPRETER
164 tcg_prologue_fn
*tcg_qemu_tb_exec
;
167 static TCGRegSet tcg_target_available_regs
[TCG_TYPE_COUNT
];
168 static TCGRegSet tcg_target_call_clobber_regs
;
170 #if TCG_TARGET_INSN_UNIT_SIZE == 1
171 static __attribute__((unused
)) inline void tcg_out8(TCGContext
*s
, uint8_t v
)
176 static __attribute__((unused
)) inline void tcg_patch8(tcg_insn_unit
*p
,
183 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
184 static __attribute__((unused
)) inline void tcg_out16(TCGContext
*s
, uint16_t v
)
186 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
189 tcg_insn_unit
*p
= s
->code_ptr
;
190 memcpy(p
, &v
, sizeof(v
));
191 s
->code_ptr
= p
+ (2 / TCG_TARGET_INSN_UNIT_SIZE
);
195 static __attribute__((unused
)) inline void tcg_patch16(tcg_insn_unit
*p
,
198 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
201 memcpy(p
, &v
, sizeof(v
));
206 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
207 static __attribute__((unused
)) inline void tcg_out32(TCGContext
*s
, uint32_t v
)
209 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
212 tcg_insn_unit
*p
= s
->code_ptr
;
213 memcpy(p
, &v
, sizeof(v
));
214 s
->code_ptr
= p
+ (4 / TCG_TARGET_INSN_UNIT_SIZE
);
218 static __attribute__((unused
)) inline void tcg_patch32(tcg_insn_unit
*p
,
221 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
224 memcpy(p
, &v
, sizeof(v
));
229 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
230 static __attribute__((unused
)) inline void tcg_out64(TCGContext
*s
, uint64_t v
)
232 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
235 tcg_insn_unit
*p
= s
->code_ptr
;
236 memcpy(p
, &v
, sizeof(v
));
237 s
->code_ptr
= p
+ (8 / TCG_TARGET_INSN_UNIT_SIZE
);
241 static __attribute__((unused
)) inline void tcg_patch64(tcg_insn_unit
*p
,
244 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
247 memcpy(p
, &v
, sizeof(v
));
252 /* label relocation processing */
254 static void tcg_out_reloc(TCGContext
*s
, tcg_insn_unit
*code_ptr
, int type
,
255 TCGLabel
*l
, intptr_t addend
)
257 TCGRelocation
*r
= tcg_malloc(sizeof(TCGRelocation
));
262 QSIMPLEQ_INSERT_TAIL(&l
->relocs
, r
, next
);
265 static void tcg_out_label(TCGContext
*s
, TCGLabel
*l
)
267 tcg_debug_assert(!l
->has_value
);
269 l
->u
.value_ptr
= tcg_splitwx_to_rx(s
->code_ptr
);
272 TCGLabel
*gen_new_label(void)
274 TCGContext
*s
= tcg_ctx
;
275 TCGLabel
*l
= tcg_malloc(sizeof(TCGLabel
));
277 memset(l
, 0, sizeof(TCGLabel
));
278 l
->id
= s
->nb_labels
++;
279 QSIMPLEQ_INIT(&l
->relocs
);
281 QSIMPLEQ_INSERT_TAIL(&s
->labels
, l
, next
);
286 static bool tcg_resolve_relocs(TCGContext
*s
)
290 QSIMPLEQ_FOREACH(l
, &s
->labels
, next
) {
292 uintptr_t value
= l
->u
.value
;
294 QSIMPLEQ_FOREACH(r
, &l
->relocs
, next
) {
295 if (!patch_reloc(r
->ptr
, r
->type
, value
, r
->addend
)) {
303 static void set_jmp_reset_offset(TCGContext
*s
, int which
)
306 * We will check for overflow at the end of the opcode loop in
307 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
309 s
->tb_jmp_reset_offset
[which
] = tcg_current_code_size(s
);
312 /* Signal overflow, starting over with fewer guest insns. */
313 static void QEMU_NORETURN
tcg_raise_tb_overflow(TCGContext
*s
)
315 siglongjmp(s
->jmp_trans
, -2);
318 #define C_PFX1(P, A) P##A
319 #define C_PFX2(P, A, B) P##A##_##B
320 #define C_PFX3(P, A, B, C) P##A##_##B##_##C
321 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
322 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
323 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
325 /* Define an enumeration for the various combinations. */
327 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
328 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
329 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
330 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
332 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
333 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
334 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
335 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
337 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
339 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
340 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
341 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
342 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
345 #include "tcg-target-con-set.h"
346 } TCGConstraintSetIndex
;
348 static TCGConstraintSetIndex
tcg_target_op_def(TCGOpcode
);
364 /* Put all of the constraint sets into an array, indexed by the enum. */
366 #define C_O0_I1(I1) { .args_ct_str = { #I1 } },
367 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
368 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
369 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
371 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
372 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
373 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
374 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
376 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
378 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
379 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
380 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
381 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
383 static const TCGTargetOpDef constraint_sets
[] = {
384 #include "tcg-target-con-set.h"
402 /* Expand the enumerator to be returned from tcg_target_op_def(). */
404 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
405 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
406 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
407 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
409 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
410 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
411 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
412 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
414 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
416 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
417 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
418 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
419 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
421 #include "tcg-target.c.inc"
423 #ifdef CONFIG_DEBUG_TCG
424 const void *tcg_splitwx_to_rx(void *rw
)
426 /* Pass NULL pointers unchanged. */
428 g_assert(in_code_gen_buffer(rw
));
429 rw
+= tcg_splitwx_diff
;
434 void *tcg_splitwx_to_rw(const void *rx
)
436 /* Pass NULL pointers unchanged. */
438 rx
-= tcg_splitwx_diff
;
439 /* Assert that we end with a pointer in the rw region. */
440 g_assert(in_code_gen_buffer(rx
));
444 #endif /* CONFIG_DEBUG_TCG */
446 static void alloc_tcg_plugin_context(TCGContext
*s
)
449 s
->plugin_tb
= g_new0(struct qemu_plugin_tb
, 1);
450 s
->plugin_tb
->insns
=
451 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn
);
456 * All TCG threads except the parent (i.e. the one that called tcg_context_init
457 * and registered the target's TCG globals) must register with this function
458 * before initiating translation.
460 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
461 * of tcg_region_init() for the reasoning behind this.
463 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
464 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
465 * is not used anymore for translation once this function is called.
467 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
468 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
470 #ifdef CONFIG_USER_ONLY
471 void tcg_register_thread(void)
473 tcg_ctx
= &tcg_init_ctx
;
476 void tcg_register_thread(void)
478 MachineState
*ms
= MACHINE(qdev_get_machine());
479 TCGContext
*s
= g_malloc(sizeof(*s
));
484 /* Relink mem_base. */
485 for (i
= 0, n
= tcg_init_ctx
.nb_globals
; i
< n
; ++i
) {
486 if (tcg_init_ctx
.temps
[i
].mem_base
) {
487 ptrdiff_t b
= tcg_init_ctx
.temps
[i
].mem_base
- tcg_init_ctx
.temps
;
488 tcg_debug_assert(b
>= 0 && b
< n
);
489 s
->temps
[i
].mem_base
= &s
->temps
[b
];
493 /* Claim an entry in tcg_ctxs */
494 n
= qatomic_fetch_inc(&n_tcg_ctxs
);
495 g_assert(n
< ms
->smp
.max_cpus
);
496 qatomic_set(&tcg_ctxs
[n
], s
);
499 alloc_tcg_plugin_context(s
);
500 tcg_region_initial_alloc(s
);
505 #endif /* !CONFIG_USER_ONLY */
507 /* pool based memory allocation */
508 void *tcg_malloc_internal(TCGContext
*s
, int size
)
513 if (size
> TCG_POOL_CHUNK_SIZE
) {
514 /* big malloc: insert a new pool (XXX: could optimize) */
515 p
= g_malloc(sizeof(TCGPool
) + size
);
517 p
->next
= s
->pool_first_large
;
518 s
->pool_first_large
= p
;
529 pool_size
= TCG_POOL_CHUNK_SIZE
;
530 p
= g_malloc(sizeof(TCGPool
) + pool_size
);
534 s
->pool_current
->next
= p
;
543 s
->pool_cur
= p
->data
+ size
;
544 s
->pool_end
= p
->data
+ p
->size
;
548 void tcg_pool_reset(TCGContext
*s
)
551 for (p
= s
->pool_first_large
; p
; p
= t
) {
555 s
->pool_first_large
= NULL
;
556 s
->pool_cur
= s
->pool_end
= NULL
;
557 s
->pool_current
= NULL
;
560 typedef struct TCGHelperInfo
{
567 #include "exec/helper-proto.h"
569 static const TCGHelperInfo all_helpers
[] = {
570 #include "exec/helper-tcg.h"
572 static GHashTable
*helper_table
;
574 static int indirect_reg_alloc_order
[ARRAY_SIZE(tcg_target_reg_alloc_order
)];
575 static void process_op_defs(TCGContext
*s
);
576 static TCGTemp
*tcg_global_reg_new_internal(TCGContext
*s
, TCGType type
,
577 TCGReg reg
, const char *name
);
579 static void tcg_context_init(unsigned max_cpus
)
581 TCGContext
*s
= &tcg_init_ctx
;
582 int op
, total_args
, n
, i
;
584 TCGArgConstraint
*args_ct
;
587 memset(s
, 0, sizeof(*s
));
590 /* Count total number of arguments and allocate the corresponding
593 for(op
= 0; op
< NB_OPS
; op
++) {
594 def
= &tcg_op_defs
[op
];
595 n
= def
->nb_iargs
+ def
->nb_oargs
;
599 args_ct
= g_new0(TCGArgConstraint
, total_args
);
601 for(op
= 0; op
< NB_OPS
; op
++) {
602 def
= &tcg_op_defs
[op
];
603 def
->args_ct
= args_ct
;
604 n
= def
->nb_iargs
+ def
->nb_oargs
;
608 /* Register helpers. */
609 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
610 helper_table
= g_hash_table_new(NULL
, NULL
);
612 for (i
= 0; i
< ARRAY_SIZE(all_helpers
); ++i
) {
613 g_hash_table_insert(helper_table
, (gpointer
)all_helpers
[i
].func
,
614 (gpointer
)&all_helpers
[i
]);
620 /* Reverse the order of the saved registers, assuming they're all at
621 the start of tcg_target_reg_alloc_order. */
622 for (n
= 0; n
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++n
) {
623 int r
= tcg_target_reg_alloc_order
[n
];
624 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, r
)) {
628 for (i
= 0; i
< n
; ++i
) {
629 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[n
- 1 - i
];
631 for (; i
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++i
) {
632 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[i
];
635 alloc_tcg_plugin_context(s
);
639 * In user-mode we simply share the init context among threads, since we
640 * use a single region. See the documentation tcg_region_init() for the
641 * reasoning behind this.
642 * In softmmu we will have at most max_cpus TCG threads.
644 #ifdef CONFIG_USER_ONLY
648 tcg_ctxs
= g_new(TCGContext
*, max_cpus
);
651 tcg_debug_assert(!tcg_regset_test_reg(s
->reserved_regs
, TCG_AREG0
));
652 ts
= tcg_global_reg_new_internal(s
, TCG_TYPE_PTR
, TCG_AREG0
, "env");
653 cpu_env
= temp_tcgv_ptr(ts
);
656 void tcg_init(size_t tb_size
, int splitwx
, unsigned max_cpus
)
658 tcg_context_init(max_cpus
);
659 tcg_region_init(tb_size
, splitwx
, max_cpus
);
663 * Allocate TBs right before their corresponding translated code, making
664 * sure that TBs and code are on different cache lines.
666 TranslationBlock
*tcg_tb_alloc(TCGContext
*s
)
668 uintptr_t align
= qemu_icache_linesize
;
669 TranslationBlock
*tb
;
673 tb
= (void *)ROUND_UP((uintptr_t)s
->code_gen_ptr
, align
);
674 next
= (void *)ROUND_UP((uintptr_t)(tb
+ 1), align
);
676 if (unlikely(next
> s
->code_gen_highwater
)) {
677 if (tcg_region_alloc(s
)) {
682 qatomic_set(&s
->code_gen_ptr
, next
);
683 s
->data_gen_ptr
= NULL
;
687 void tcg_prologue_init(TCGContext
*s
)
689 size_t prologue_size
;
691 s
->code_ptr
= s
->code_gen_ptr
;
692 s
->code_buf
= s
->code_gen_ptr
;
693 s
->data_gen_ptr
= NULL
;
695 #ifndef CONFIG_TCG_INTERPRETER
696 tcg_qemu_tb_exec
= (tcg_prologue_fn
*)tcg_splitwx_to_rx(s
->code_ptr
);
699 #ifdef TCG_TARGET_NEED_POOL_LABELS
700 s
->pool_labels
= NULL
;
703 qemu_thread_jit_write();
704 /* Generate the prologue. */
705 tcg_target_qemu_prologue(s
);
707 #ifdef TCG_TARGET_NEED_POOL_LABELS
708 /* Allow the prologue to put e.g. guest_base into a pool entry. */
710 int result
= tcg_out_pool_finalize(s
);
711 tcg_debug_assert(result
== 0);
715 prologue_size
= tcg_current_code_size(s
);
717 #ifndef CONFIG_TCG_INTERPRETER
718 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s
->code_buf
),
719 (uintptr_t)s
->code_buf
, prologue_size
);
722 tcg_region_prologue_set(s
);
725 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM
)) {
726 FILE *logfile
= qemu_log_lock();
727 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size
);
728 if (s
->data_gen_ptr
) {
729 size_t code_size
= s
->data_gen_ptr
- s
->code_gen_ptr
;
730 size_t data_size
= prologue_size
- code_size
;
733 log_disas(s
->code_gen_ptr
, code_size
);
735 for (i
= 0; i
< data_size
; i
+= sizeof(tcg_target_ulong
)) {
736 if (sizeof(tcg_target_ulong
) == 8) {
737 qemu_log("0x%08" PRIxPTR
": .quad 0x%016" PRIx64
"\n",
738 (uintptr_t)s
->data_gen_ptr
+ i
,
739 *(uint64_t *)(s
->data_gen_ptr
+ i
));
741 qemu_log("0x%08" PRIxPTR
": .long 0x%08x\n",
742 (uintptr_t)s
->data_gen_ptr
+ i
,
743 *(uint32_t *)(s
->data_gen_ptr
+ i
));
747 log_disas(s
->code_gen_ptr
, prologue_size
);
751 qemu_log_unlock(logfile
);
755 /* Assert that goto_ptr is implemented completely. */
756 if (TCG_TARGET_HAS_goto_ptr
) {
757 tcg_debug_assert(tcg_code_gen_epilogue
!= NULL
);
761 void tcg_func_start(TCGContext
*s
)
764 s
->nb_temps
= s
->nb_globals
;
766 /* No temps have been previously allocated for size or locality. */
767 memset(s
->free_temps
, 0, sizeof(s
->free_temps
));
769 /* No constant temps have been previously allocated. */
770 for (int i
= 0; i
< TCG_TYPE_COUNT
; ++i
) {
771 if (s
->const_table
[i
]) {
772 g_hash_table_remove_all(s
->const_table
[i
]);
778 s
->current_frame_offset
= s
->frame_start
;
780 #ifdef CONFIG_DEBUG_TCG
781 s
->goto_tb_issue_mask
= 0;
784 QTAILQ_INIT(&s
->ops
);
785 QTAILQ_INIT(&s
->free_ops
);
786 QSIMPLEQ_INIT(&s
->labels
);
789 static TCGTemp
*tcg_temp_alloc(TCGContext
*s
)
791 int n
= s
->nb_temps
++;
793 if (n
>= TCG_MAX_TEMPS
) {
794 tcg_raise_tb_overflow(s
);
796 return memset(&s
->temps
[n
], 0, sizeof(TCGTemp
));
799 static TCGTemp
*tcg_global_alloc(TCGContext
*s
)
803 tcg_debug_assert(s
->nb_globals
== s
->nb_temps
);
804 tcg_debug_assert(s
->nb_globals
< TCG_MAX_TEMPS
);
806 ts
= tcg_temp_alloc(s
);
807 ts
->kind
= TEMP_GLOBAL
;
812 static TCGTemp
*tcg_global_reg_new_internal(TCGContext
*s
, TCGType type
,
813 TCGReg reg
, const char *name
)
817 if (TCG_TARGET_REG_BITS
== 32 && type
!= TCG_TYPE_I32
) {
821 ts
= tcg_global_alloc(s
);
822 ts
->base_type
= type
;
824 ts
->kind
= TEMP_FIXED
;
827 tcg_regset_set_reg(s
->reserved_regs
, reg
);
832 void tcg_set_frame(TCGContext
*s
, TCGReg reg
, intptr_t start
, intptr_t size
)
834 s
->frame_start
= start
;
835 s
->frame_end
= start
+ size
;
837 = tcg_global_reg_new_internal(s
, TCG_TYPE_PTR
, reg
, "_frame");
840 TCGTemp
*tcg_global_mem_new_internal(TCGType type
, TCGv_ptr base
,
841 intptr_t offset
, const char *name
)
843 TCGContext
*s
= tcg_ctx
;
844 TCGTemp
*base_ts
= tcgv_ptr_temp(base
);
845 TCGTemp
*ts
= tcg_global_alloc(s
);
846 int indirect_reg
= 0, bigendian
= 0;
847 #ifdef HOST_WORDS_BIGENDIAN
851 switch (base_ts
->kind
) {
855 /* We do not support double-indirect registers. */
856 tcg_debug_assert(!base_ts
->indirect_reg
);
857 base_ts
->indirect_base
= 1;
858 s
->nb_indirects
+= (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
863 g_assert_not_reached();
866 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
867 TCGTemp
*ts2
= tcg_global_alloc(s
);
870 ts
->base_type
= TCG_TYPE_I64
;
871 ts
->type
= TCG_TYPE_I32
;
872 ts
->indirect_reg
= indirect_reg
;
873 ts
->mem_allocated
= 1;
874 ts
->mem_base
= base_ts
;
875 ts
->mem_offset
= offset
+ bigendian
* 4;
876 pstrcpy(buf
, sizeof(buf
), name
);
877 pstrcat(buf
, sizeof(buf
), "_0");
878 ts
->name
= strdup(buf
);
880 tcg_debug_assert(ts2
== ts
+ 1);
881 ts2
->base_type
= TCG_TYPE_I64
;
882 ts2
->type
= TCG_TYPE_I32
;
883 ts2
->indirect_reg
= indirect_reg
;
884 ts2
->mem_allocated
= 1;
885 ts2
->mem_base
= base_ts
;
886 ts2
->mem_offset
= offset
+ (1 - bigendian
) * 4;
887 pstrcpy(buf
, sizeof(buf
), name
);
888 pstrcat(buf
, sizeof(buf
), "_1");
889 ts2
->name
= strdup(buf
);
891 ts
->base_type
= type
;
893 ts
->indirect_reg
= indirect_reg
;
894 ts
->mem_allocated
= 1;
895 ts
->mem_base
= base_ts
;
896 ts
->mem_offset
= offset
;
902 TCGTemp
*tcg_temp_new_internal(TCGType type
, bool temp_local
)
904 TCGContext
*s
= tcg_ctx
;
905 TCGTempKind kind
= temp_local
? TEMP_LOCAL
: TEMP_NORMAL
;
909 k
= type
+ (temp_local
? TCG_TYPE_COUNT
: 0);
910 idx
= find_first_bit(s
->free_temps
[k
].l
, TCG_MAX_TEMPS
);
911 if (idx
< TCG_MAX_TEMPS
) {
912 /* There is already an available temp with the right type. */
913 clear_bit(idx
, s
->free_temps
[k
].l
);
916 ts
->temp_allocated
= 1;
917 tcg_debug_assert(ts
->base_type
== type
);
918 tcg_debug_assert(ts
->kind
== kind
);
920 ts
= tcg_temp_alloc(s
);
921 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
922 TCGTemp
*ts2
= tcg_temp_alloc(s
);
924 ts
->base_type
= type
;
925 ts
->type
= TCG_TYPE_I32
;
926 ts
->temp_allocated
= 1;
929 tcg_debug_assert(ts2
== ts
+ 1);
930 ts2
->base_type
= TCG_TYPE_I64
;
931 ts2
->type
= TCG_TYPE_I32
;
932 ts2
->temp_allocated
= 1;
935 ts
->base_type
= type
;
937 ts
->temp_allocated
= 1;
942 #if defined(CONFIG_DEBUG_TCG)
948 TCGv_vec
tcg_temp_new_vec(TCGType type
)
952 #ifdef CONFIG_DEBUG_TCG
955 assert(TCG_TARGET_HAS_v64
);
958 assert(TCG_TARGET_HAS_v128
);
961 assert(TCG_TARGET_HAS_v256
);
964 g_assert_not_reached();
968 t
= tcg_temp_new_internal(type
, 0);
969 return temp_tcgv_vec(t
);
972 /* Create a new temp of the same type as an existing temp. */
973 TCGv_vec
tcg_temp_new_vec_matching(TCGv_vec match
)
975 TCGTemp
*t
= tcgv_vec_temp(match
);
977 tcg_debug_assert(t
->temp_allocated
!= 0);
979 t
= tcg_temp_new_internal(t
->base_type
, 0);
980 return temp_tcgv_vec(t
);
983 void tcg_temp_free_internal(TCGTemp
*ts
)
985 TCGContext
*s
= tcg_ctx
;
988 /* In order to simplify users of tcg_constant_*, silently ignore free. */
989 if (ts
->kind
== TEMP_CONST
) {
993 #if defined(CONFIG_DEBUG_TCG)
995 if (s
->temps_in_use
< 0) {
996 fprintf(stderr
, "More temporaries freed than allocated!\n");
1000 tcg_debug_assert(ts
->kind
< TEMP_GLOBAL
);
1001 tcg_debug_assert(ts
->temp_allocated
!= 0);
1002 ts
->temp_allocated
= 0;
1005 k
= ts
->base_type
+ (ts
->kind
== TEMP_NORMAL
? 0 : TCG_TYPE_COUNT
);
1006 set_bit(idx
, s
->free_temps
[k
].l
);
1009 TCGTemp
*tcg_constant_internal(TCGType type
, int64_t val
)
1011 TCGContext
*s
= tcg_ctx
;
1012 GHashTable
*h
= s
->const_table
[type
];
1016 h
= g_hash_table_new(g_int64_hash
, g_int64_equal
);
1017 s
->const_table
[type
] = h
;
1020 ts
= g_hash_table_lookup(h
, &val
);
1022 ts
= tcg_temp_alloc(s
);
1024 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1025 TCGTemp
*ts2
= tcg_temp_alloc(s
);
1027 ts
->base_type
= TCG_TYPE_I64
;
1028 ts
->type
= TCG_TYPE_I32
;
1029 ts
->kind
= TEMP_CONST
;
1030 ts
->temp_allocated
= 1;
1032 * Retain the full value of the 64-bit constant in the low
1033 * part, so that the hash table works. Actual uses will
1034 * truncate the value to the low part.
1038 tcg_debug_assert(ts2
== ts
+ 1);
1039 ts2
->base_type
= TCG_TYPE_I64
;
1040 ts2
->type
= TCG_TYPE_I32
;
1041 ts2
->kind
= TEMP_CONST
;
1042 ts2
->temp_allocated
= 1;
1043 ts2
->val
= val
>> 32;
1045 ts
->base_type
= type
;
1047 ts
->kind
= TEMP_CONST
;
1048 ts
->temp_allocated
= 1;
1051 g_hash_table_insert(h
, &ts
->val
, ts
);
1057 TCGv_vec
tcg_constant_vec(TCGType type
, unsigned vece
, int64_t val
)
1059 val
= dup_const(vece
, val
);
1060 return temp_tcgv_vec(tcg_constant_internal(type
, val
));
1063 TCGv_vec
tcg_constant_vec_matching(TCGv_vec match
, unsigned vece
, int64_t val
)
1065 TCGTemp
*t
= tcgv_vec_temp(match
);
1067 tcg_debug_assert(t
->temp_allocated
!= 0);
1068 return tcg_constant_vec(t
->base_type
, vece
, val
);
1071 TCGv_i32
tcg_const_i32(int32_t val
)
1074 t0
= tcg_temp_new_i32();
1075 tcg_gen_movi_i32(t0
, val
);
1079 TCGv_i64
tcg_const_i64(int64_t val
)
1082 t0
= tcg_temp_new_i64();
1083 tcg_gen_movi_i64(t0
, val
);
1087 TCGv_i32
tcg_const_local_i32(int32_t val
)
1090 t0
= tcg_temp_local_new_i32();
1091 tcg_gen_movi_i32(t0
, val
);
1095 TCGv_i64
tcg_const_local_i64(int64_t val
)
1098 t0
= tcg_temp_local_new_i64();
1099 tcg_gen_movi_i64(t0
, val
);
1103 #if defined(CONFIG_DEBUG_TCG)
1104 void tcg_clear_temp_count(void)
1106 TCGContext
*s
= tcg_ctx
;
1107 s
->temps_in_use
= 0;
1110 int tcg_check_temp_count(void)
1112 TCGContext
*s
= tcg_ctx
;
1113 if (s
->temps_in_use
) {
1114 /* Clear the count so that we don't give another
1115 * warning immediately next time around.
1117 s
->temps_in_use
= 0;
1124 /* Return true if OP may appear in the opcode stream.
1125 Test the runtime variable that controls each opcode. */
1126 bool tcg_op_supported(TCGOpcode op
)
1129 = TCG_TARGET_HAS_v64
| TCG_TARGET_HAS_v128
| TCG_TARGET_HAS_v256
;
1132 case INDEX_op_discard
:
1133 case INDEX_op_set_label
:
1137 case INDEX_op_insn_start
:
1138 case INDEX_op_exit_tb
:
1139 case INDEX_op_goto_tb
:
1140 case INDEX_op_qemu_ld_i32
:
1141 case INDEX_op_qemu_st_i32
:
1142 case INDEX_op_qemu_ld_i64
:
1143 case INDEX_op_qemu_st_i64
:
1146 case INDEX_op_qemu_st8_i32
:
1147 return TCG_TARGET_HAS_qemu_st8_i32
;
1149 case INDEX_op_goto_ptr
:
1150 return TCG_TARGET_HAS_goto_ptr
;
1152 case INDEX_op_mov_i32
:
1153 case INDEX_op_setcond_i32
:
1154 case INDEX_op_brcond_i32
:
1155 case INDEX_op_ld8u_i32
:
1156 case INDEX_op_ld8s_i32
:
1157 case INDEX_op_ld16u_i32
:
1158 case INDEX_op_ld16s_i32
:
1159 case INDEX_op_ld_i32
:
1160 case INDEX_op_st8_i32
:
1161 case INDEX_op_st16_i32
:
1162 case INDEX_op_st_i32
:
1163 case INDEX_op_add_i32
:
1164 case INDEX_op_sub_i32
:
1165 case INDEX_op_mul_i32
:
1166 case INDEX_op_and_i32
:
1167 case INDEX_op_or_i32
:
1168 case INDEX_op_xor_i32
:
1169 case INDEX_op_shl_i32
:
1170 case INDEX_op_shr_i32
:
1171 case INDEX_op_sar_i32
:
1174 case INDEX_op_movcond_i32
:
1175 return TCG_TARGET_HAS_movcond_i32
;
1176 case INDEX_op_div_i32
:
1177 case INDEX_op_divu_i32
:
1178 return TCG_TARGET_HAS_div_i32
;
1179 case INDEX_op_rem_i32
:
1180 case INDEX_op_remu_i32
:
1181 return TCG_TARGET_HAS_rem_i32
;
1182 case INDEX_op_div2_i32
:
1183 case INDEX_op_divu2_i32
:
1184 return TCG_TARGET_HAS_div2_i32
;
1185 case INDEX_op_rotl_i32
:
1186 case INDEX_op_rotr_i32
:
1187 return TCG_TARGET_HAS_rot_i32
;
1188 case INDEX_op_deposit_i32
:
1189 return TCG_TARGET_HAS_deposit_i32
;
1190 case INDEX_op_extract_i32
:
1191 return TCG_TARGET_HAS_extract_i32
;
1192 case INDEX_op_sextract_i32
:
1193 return TCG_TARGET_HAS_sextract_i32
;
1194 case INDEX_op_extract2_i32
:
1195 return TCG_TARGET_HAS_extract2_i32
;
1196 case INDEX_op_add2_i32
:
1197 return TCG_TARGET_HAS_add2_i32
;
1198 case INDEX_op_sub2_i32
:
1199 return TCG_TARGET_HAS_sub2_i32
;
1200 case INDEX_op_mulu2_i32
:
1201 return TCG_TARGET_HAS_mulu2_i32
;
1202 case INDEX_op_muls2_i32
:
1203 return TCG_TARGET_HAS_muls2_i32
;
1204 case INDEX_op_muluh_i32
:
1205 return TCG_TARGET_HAS_muluh_i32
;
1206 case INDEX_op_mulsh_i32
:
1207 return TCG_TARGET_HAS_mulsh_i32
;
1208 case INDEX_op_ext8s_i32
:
1209 return TCG_TARGET_HAS_ext8s_i32
;
1210 case INDEX_op_ext16s_i32
:
1211 return TCG_TARGET_HAS_ext16s_i32
;
1212 case INDEX_op_ext8u_i32
:
1213 return TCG_TARGET_HAS_ext8u_i32
;
1214 case INDEX_op_ext16u_i32
:
1215 return TCG_TARGET_HAS_ext16u_i32
;
1216 case INDEX_op_bswap16_i32
:
1217 return TCG_TARGET_HAS_bswap16_i32
;
1218 case INDEX_op_bswap32_i32
:
1219 return TCG_TARGET_HAS_bswap32_i32
;
1220 case INDEX_op_not_i32
:
1221 return TCG_TARGET_HAS_not_i32
;
1222 case INDEX_op_neg_i32
:
1223 return TCG_TARGET_HAS_neg_i32
;
1224 case INDEX_op_andc_i32
:
1225 return TCG_TARGET_HAS_andc_i32
;
1226 case INDEX_op_orc_i32
:
1227 return TCG_TARGET_HAS_orc_i32
;
1228 case INDEX_op_eqv_i32
:
1229 return TCG_TARGET_HAS_eqv_i32
;
1230 case INDEX_op_nand_i32
:
1231 return TCG_TARGET_HAS_nand_i32
;
1232 case INDEX_op_nor_i32
:
1233 return TCG_TARGET_HAS_nor_i32
;
1234 case INDEX_op_clz_i32
:
1235 return TCG_TARGET_HAS_clz_i32
;
1236 case INDEX_op_ctz_i32
:
1237 return TCG_TARGET_HAS_ctz_i32
;
1238 case INDEX_op_ctpop_i32
:
1239 return TCG_TARGET_HAS_ctpop_i32
;
1241 case INDEX_op_brcond2_i32
:
1242 case INDEX_op_setcond2_i32
:
1243 return TCG_TARGET_REG_BITS
== 32;
1245 case INDEX_op_mov_i64
:
1246 case INDEX_op_setcond_i64
:
1247 case INDEX_op_brcond_i64
:
1248 case INDEX_op_ld8u_i64
:
1249 case INDEX_op_ld8s_i64
:
1250 case INDEX_op_ld16u_i64
:
1251 case INDEX_op_ld16s_i64
:
1252 case INDEX_op_ld32u_i64
:
1253 case INDEX_op_ld32s_i64
:
1254 case INDEX_op_ld_i64
:
1255 case INDEX_op_st8_i64
:
1256 case INDEX_op_st16_i64
:
1257 case INDEX_op_st32_i64
:
1258 case INDEX_op_st_i64
:
1259 case INDEX_op_add_i64
:
1260 case INDEX_op_sub_i64
:
1261 case INDEX_op_mul_i64
:
1262 case INDEX_op_and_i64
:
1263 case INDEX_op_or_i64
:
1264 case INDEX_op_xor_i64
:
1265 case INDEX_op_shl_i64
:
1266 case INDEX_op_shr_i64
:
1267 case INDEX_op_sar_i64
:
1268 case INDEX_op_ext_i32_i64
:
1269 case INDEX_op_extu_i32_i64
:
1270 return TCG_TARGET_REG_BITS
== 64;
1272 case INDEX_op_movcond_i64
:
1273 return TCG_TARGET_HAS_movcond_i64
;
1274 case INDEX_op_div_i64
:
1275 case INDEX_op_divu_i64
:
1276 return TCG_TARGET_HAS_div_i64
;
1277 case INDEX_op_rem_i64
:
1278 case INDEX_op_remu_i64
:
1279 return TCG_TARGET_HAS_rem_i64
;
1280 case INDEX_op_div2_i64
:
1281 case INDEX_op_divu2_i64
:
1282 return TCG_TARGET_HAS_div2_i64
;
1283 case INDEX_op_rotl_i64
:
1284 case INDEX_op_rotr_i64
:
1285 return TCG_TARGET_HAS_rot_i64
;
1286 case INDEX_op_deposit_i64
:
1287 return TCG_TARGET_HAS_deposit_i64
;
1288 case INDEX_op_extract_i64
:
1289 return TCG_TARGET_HAS_extract_i64
;
1290 case INDEX_op_sextract_i64
:
1291 return TCG_TARGET_HAS_sextract_i64
;
1292 case INDEX_op_extract2_i64
:
1293 return TCG_TARGET_HAS_extract2_i64
;
1294 case INDEX_op_extrl_i64_i32
:
1295 return TCG_TARGET_HAS_extrl_i64_i32
;
1296 case INDEX_op_extrh_i64_i32
:
1297 return TCG_TARGET_HAS_extrh_i64_i32
;
1298 case INDEX_op_ext8s_i64
:
1299 return TCG_TARGET_HAS_ext8s_i64
;
1300 case INDEX_op_ext16s_i64
:
1301 return TCG_TARGET_HAS_ext16s_i64
;
1302 case INDEX_op_ext32s_i64
:
1303 return TCG_TARGET_HAS_ext32s_i64
;
1304 case INDEX_op_ext8u_i64
:
1305 return TCG_TARGET_HAS_ext8u_i64
;
1306 case INDEX_op_ext16u_i64
:
1307 return TCG_TARGET_HAS_ext16u_i64
;
1308 case INDEX_op_ext32u_i64
:
1309 return TCG_TARGET_HAS_ext32u_i64
;
1310 case INDEX_op_bswap16_i64
:
1311 return TCG_TARGET_HAS_bswap16_i64
;
1312 case INDEX_op_bswap32_i64
:
1313 return TCG_TARGET_HAS_bswap32_i64
;
1314 case INDEX_op_bswap64_i64
:
1315 return TCG_TARGET_HAS_bswap64_i64
;
1316 case INDEX_op_not_i64
:
1317 return TCG_TARGET_HAS_not_i64
;
1318 case INDEX_op_neg_i64
:
1319 return TCG_TARGET_HAS_neg_i64
;
1320 case INDEX_op_andc_i64
:
1321 return TCG_TARGET_HAS_andc_i64
;
1322 case INDEX_op_orc_i64
:
1323 return TCG_TARGET_HAS_orc_i64
;
1324 case INDEX_op_eqv_i64
:
1325 return TCG_TARGET_HAS_eqv_i64
;
1326 case INDEX_op_nand_i64
:
1327 return TCG_TARGET_HAS_nand_i64
;
1328 case INDEX_op_nor_i64
:
1329 return TCG_TARGET_HAS_nor_i64
;
1330 case INDEX_op_clz_i64
:
1331 return TCG_TARGET_HAS_clz_i64
;
1332 case INDEX_op_ctz_i64
:
1333 return TCG_TARGET_HAS_ctz_i64
;
1334 case INDEX_op_ctpop_i64
:
1335 return TCG_TARGET_HAS_ctpop_i64
;
1336 case INDEX_op_add2_i64
:
1337 return TCG_TARGET_HAS_add2_i64
;
1338 case INDEX_op_sub2_i64
:
1339 return TCG_TARGET_HAS_sub2_i64
;
1340 case INDEX_op_mulu2_i64
:
1341 return TCG_TARGET_HAS_mulu2_i64
;
1342 case INDEX_op_muls2_i64
:
1343 return TCG_TARGET_HAS_muls2_i64
;
1344 case INDEX_op_muluh_i64
:
1345 return TCG_TARGET_HAS_muluh_i64
;
1346 case INDEX_op_mulsh_i64
:
1347 return TCG_TARGET_HAS_mulsh_i64
;
1349 case INDEX_op_mov_vec
:
1350 case INDEX_op_dup_vec
:
1351 case INDEX_op_dupm_vec
:
1352 case INDEX_op_ld_vec
:
1353 case INDEX_op_st_vec
:
1354 case INDEX_op_add_vec
:
1355 case INDEX_op_sub_vec
:
1356 case INDEX_op_and_vec
:
1357 case INDEX_op_or_vec
:
1358 case INDEX_op_xor_vec
:
1359 case INDEX_op_cmp_vec
:
1361 case INDEX_op_dup2_vec
:
1362 return have_vec
&& TCG_TARGET_REG_BITS
== 32;
1363 case INDEX_op_not_vec
:
1364 return have_vec
&& TCG_TARGET_HAS_not_vec
;
1365 case INDEX_op_neg_vec
:
1366 return have_vec
&& TCG_TARGET_HAS_neg_vec
;
1367 case INDEX_op_abs_vec
:
1368 return have_vec
&& TCG_TARGET_HAS_abs_vec
;
1369 case INDEX_op_andc_vec
:
1370 return have_vec
&& TCG_TARGET_HAS_andc_vec
;
1371 case INDEX_op_orc_vec
:
1372 return have_vec
&& TCG_TARGET_HAS_orc_vec
;
1373 case INDEX_op_mul_vec
:
1374 return have_vec
&& TCG_TARGET_HAS_mul_vec
;
1375 case INDEX_op_shli_vec
:
1376 case INDEX_op_shri_vec
:
1377 case INDEX_op_sari_vec
:
1378 return have_vec
&& TCG_TARGET_HAS_shi_vec
;
1379 case INDEX_op_shls_vec
:
1380 case INDEX_op_shrs_vec
:
1381 case INDEX_op_sars_vec
:
1382 return have_vec
&& TCG_TARGET_HAS_shs_vec
;
1383 case INDEX_op_shlv_vec
:
1384 case INDEX_op_shrv_vec
:
1385 case INDEX_op_sarv_vec
:
1386 return have_vec
&& TCG_TARGET_HAS_shv_vec
;
1387 case INDEX_op_rotli_vec
:
1388 return have_vec
&& TCG_TARGET_HAS_roti_vec
;
1389 case INDEX_op_rotls_vec
:
1390 return have_vec
&& TCG_TARGET_HAS_rots_vec
;
1391 case INDEX_op_rotlv_vec
:
1392 case INDEX_op_rotrv_vec
:
1393 return have_vec
&& TCG_TARGET_HAS_rotv_vec
;
1394 case INDEX_op_ssadd_vec
:
1395 case INDEX_op_usadd_vec
:
1396 case INDEX_op_sssub_vec
:
1397 case INDEX_op_ussub_vec
:
1398 return have_vec
&& TCG_TARGET_HAS_sat_vec
;
1399 case INDEX_op_smin_vec
:
1400 case INDEX_op_umin_vec
:
1401 case INDEX_op_smax_vec
:
1402 case INDEX_op_umax_vec
:
1403 return have_vec
&& TCG_TARGET_HAS_minmax_vec
;
1404 case INDEX_op_bitsel_vec
:
1405 return have_vec
&& TCG_TARGET_HAS_bitsel_vec
;
1406 case INDEX_op_cmpsel_vec
:
1407 return have_vec
&& TCG_TARGET_HAS_cmpsel_vec
;
1410 tcg_debug_assert(op
> INDEX_op_last_generic
&& op
< NB_OPS
);
1415 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1416 and endian swap. Maybe it would be better to do the alignment
1417 and endian swap in tcg_reg_alloc_call(). */
1418 void tcg_gen_callN(void *func
, TCGTemp
*ret
, int nargs
, TCGTemp
**args
)
1420 int i
, real_args
, nb_rets
, pi
;
1421 unsigned sizemask
, flags
;
1422 TCGHelperInfo
*info
;
1425 info
= g_hash_table_lookup(helper_table
, (gpointer
)func
);
1426 flags
= info
->flags
;
1427 sizemask
= info
->sizemask
;
1429 #ifdef CONFIG_PLUGIN
1430 /* detect non-plugin helpers */
1431 if (tcg_ctx
->plugin_insn
&& unlikely(strncmp(info
->name
, "plugin_", 7))) {
1432 tcg_ctx
->plugin_insn
->calls_helpers
= true;
1436 #if defined(__sparc__) && !defined(__arch64__) \
1437 && !defined(CONFIG_TCG_INTERPRETER)
1438 /* We have 64-bit values in one register, but need to pass as two
1439 separate parameters. Split them. */
1440 int orig_sizemask
= sizemask
;
1441 int orig_nargs
= nargs
;
1442 TCGv_i64 retl
, reth
;
1443 TCGTemp
*split_args
[MAX_OPC_PARAM
];
1447 if (sizemask
!= 0) {
1448 for (i
= real_args
= 0; i
< nargs
; ++i
) {
1449 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1451 TCGv_i64 orig
= temp_tcgv_i64(args
[i
]);
1452 TCGv_i32 h
= tcg_temp_new_i32();
1453 TCGv_i32 l
= tcg_temp_new_i32();
1454 tcg_gen_extr_i64_i32(l
, h
, orig
);
1455 split_args
[real_args
++] = tcgv_i32_temp(h
);
1456 split_args
[real_args
++] = tcgv_i32_temp(l
);
1458 split_args
[real_args
++] = args
[i
];
1465 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1466 for (i
= 0; i
< nargs
; ++i
) {
1467 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1468 int is_signed
= sizemask
& (2 << (i
+1)*2);
1470 TCGv_i64 temp
= tcg_temp_new_i64();
1471 TCGv_i64 orig
= temp_tcgv_i64(args
[i
]);
1473 tcg_gen_ext32s_i64(temp
, orig
);
1475 tcg_gen_ext32u_i64(temp
, orig
);
1477 args
[i
] = tcgv_i64_temp(temp
);
1480 #endif /* TCG_TARGET_EXTEND_ARGS */
1482 op
= tcg_emit_op(INDEX_op_call
);
1486 #if defined(__sparc__) && !defined(__arch64__) \
1487 && !defined(CONFIG_TCG_INTERPRETER)
1488 if (orig_sizemask
& 1) {
1489 /* The 32-bit ABI is going to return the 64-bit value in
1490 the %o0/%o1 register pair. Prepare for this by using
1491 two return temporaries, and reassemble below. */
1492 retl
= tcg_temp_new_i64();
1493 reth
= tcg_temp_new_i64();
1494 op
->args
[pi
++] = tcgv_i64_arg(reth
);
1495 op
->args
[pi
++] = tcgv_i64_arg(retl
);
1498 op
->args
[pi
++] = temp_arg(ret
);
1502 if (TCG_TARGET_REG_BITS
< 64 && (sizemask
& 1)) {
1503 #ifdef HOST_WORDS_BIGENDIAN
1504 op
->args
[pi
++] = temp_arg(ret
+ 1);
1505 op
->args
[pi
++] = temp_arg(ret
);
1507 op
->args
[pi
++] = temp_arg(ret
);
1508 op
->args
[pi
++] = temp_arg(ret
+ 1);
1512 op
->args
[pi
++] = temp_arg(ret
);
1519 TCGOP_CALLO(op
) = nb_rets
;
1522 for (i
= 0; i
< nargs
; i
++) {
1523 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1524 if (TCG_TARGET_REG_BITS
< 64 && is_64bit
) {
1525 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1526 /* some targets want aligned 64 bit args */
1527 if (real_args
& 1) {
1528 op
->args
[pi
++] = TCG_CALL_DUMMY_ARG
;
1532 /* If stack grows up, then we will be placing successive
1533 arguments at lower addresses, which means we need to
1534 reverse the order compared to how we would normally
1535 treat either big or little-endian. For those arguments
1536 that will wind up in registers, this still works for
1537 HPPA (the only current STACK_GROWSUP target) since the
1538 argument registers are *also* allocated in decreasing
1539 order. If another such target is added, this logic may
1540 have to get more complicated to differentiate between
1541 stack arguments and register arguments. */
1542 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1543 op
->args
[pi
++] = temp_arg(args
[i
] + 1);
1544 op
->args
[pi
++] = temp_arg(args
[i
]);
1546 op
->args
[pi
++] = temp_arg(args
[i
]);
1547 op
->args
[pi
++] = temp_arg(args
[i
] + 1);
1553 op
->args
[pi
++] = temp_arg(args
[i
]);
1556 op
->args
[pi
++] = (uintptr_t)func
;
1557 op
->args
[pi
++] = flags
;
1558 TCGOP_CALLI(op
) = real_args
;
1560 /* Make sure the fields didn't overflow. */
1561 tcg_debug_assert(TCGOP_CALLI(op
) == real_args
);
1562 tcg_debug_assert(pi
<= ARRAY_SIZE(op
->args
));
1564 #if defined(__sparc__) && !defined(__arch64__) \
1565 && !defined(CONFIG_TCG_INTERPRETER)
1566 /* Free all of the parts we allocated above. */
1567 for (i
= real_args
= 0; i
< orig_nargs
; ++i
) {
1568 int is_64bit
= orig_sizemask
& (1 << (i
+1)*2);
1570 tcg_temp_free_internal(args
[real_args
++]);
1571 tcg_temp_free_internal(args
[real_args
++]);
1576 if (orig_sizemask
& 1) {
1577 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1578 Note that describing these as TCGv_i64 eliminates an unnecessary
1579 zero-extension that tcg_gen_concat_i32_i64 would create. */
1580 tcg_gen_concat32_i64(temp_tcgv_i64(ret
), retl
, reth
);
1581 tcg_temp_free_i64(retl
);
1582 tcg_temp_free_i64(reth
);
1584 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1585 for (i
= 0; i
< nargs
; ++i
) {
1586 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1588 tcg_temp_free_internal(args
[i
]);
1591 #endif /* TCG_TARGET_EXTEND_ARGS */
1594 static void tcg_reg_alloc_start(TCGContext
*s
)
1598 for (i
= 0, n
= s
->nb_temps
; i
< n
; i
++) {
1599 TCGTemp
*ts
= &s
->temps
[i
];
1600 TCGTempVal val
= TEMP_VAL_MEM
;
1604 val
= TEMP_VAL_CONST
;
1612 val
= TEMP_VAL_DEAD
;
1615 ts
->mem_allocated
= 0;
1618 g_assert_not_reached();
1623 memset(s
->reg_to_temp
, 0, sizeof(s
->reg_to_temp
));
1626 static char *tcg_get_arg_str_ptr(TCGContext
*s
, char *buf
, int buf_size
,
1629 int idx
= temp_idx(ts
);
1634 pstrcpy(buf
, buf_size
, ts
->name
);
1637 snprintf(buf
, buf_size
, "loc%d", idx
- s
->nb_globals
);
1640 snprintf(buf
, buf_size
, "tmp%d", idx
- s
->nb_globals
);
1645 snprintf(buf
, buf_size
, "$0x%x", (int32_t)ts
->val
);
1647 #if TCG_TARGET_REG_BITS > 32
1649 snprintf(buf
, buf_size
, "$0x%" PRIx64
, ts
->val
);
1655 snprintf(buf
, buf_size
, "v%d$0x%" PRIx64
,
1656 64 << (ts
->type
- TCG_TYPE_V64
), ts
->val
);
1659 g_assert_not_reached();
1666 static char *tcg_get_arg_str(TCGContext
*s
, char *buf
,
1667 int buf_size
, TCGArg arg
)
1669 return tcg_get_arg_str_ptr(s
, buf
, buf_size
, arg_temp(arg
));
1672 /* Find helper name. */
1673 static inline const char *tcg_find_helper(TCGContext
*s
, uintptr_t val
)
1675 const char *ret
= NULL
;
1677 TCGHelperInfo
*info
= g_hash_table_lookup(helper_table
, (gpointer
)val
);
1685 static const char * const cond_name
[] =
1687 [TCG_COND_NEVER
] = "never",
1688 [TCG_COND_ALWAYS
] = "always",
1689 [TCG_COND_EQ
] = "eq",
1690 [TCG_COND_NE
] = "ne",
1691 [TCG_COND_LT
] = "lt",
1692 [TCG_COND_GE
] = "ge",
1693 [TCG_COND_LE
] = "le",
1694 [TCG_COND_GT
] = "gt",
1695 [TCG_COND_LTU
] = "ltu",
1696 [TCG_COND_GEU
] = "geu",
1697 [TCG_COND_LEU
] = "leu",
1698 [TCG_COND_GTU
] = "gtu"
1701 static const char * const ldst_name
[] =
1717 static const char * const alignment_name
[(MO_AMASK
>> MO_ASHIFT
) + 1] = {
1718 #ifdef TARGET_ALIGNED_ONLY
1719 [MO_UNALN
>> MO_ASHIFT
] = "un+",
1720 [MO_ALIGN
>> MO_ASHIFT
] = "",
1722 [MO_UNALN
>> MO_ASHIFT
] = "",
1723 [MO_ALIGN
>> MO_ASHIFT
] = "al+",
1725 [MO_ALIGN_2
>> MO_ASHIFT
] = "al2+",
1726 [MO_ALIGN_4
>> MO_ASHIFT
] = "al4+",
1727 [MO_ALIGN_8
>> MO_ASHIFT
] = "al8+",
1728 [MO_ALIGN_16
>> MO_ASHIFT
] = "al16+",
1729 [MO_ALIGN_32
>> MO_ASHIFT
] = "al32+",
1730 [MO_ALIGN_64
>> MO_ASHIFT
] = "al64+",
1733 static inline bool tcg_regset_single(TCGRegSet d
)
1735 return (d
& (d
- 1)) == 0;
1738 static inline TCGReg
tcg_regset_first(TCGRegSet d
)
1740 if (TCG_TARGET_NB_REGS
<= 32) {
1747 static void tcg_dump_ops(TCGContext
*s
, bool have_prefs
)
1752 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
1753 int i
, k
, nb_oargs
, nb_iargs
, nb_cargs
;
1754 const TCGOpDef
*def
;
1759 def
= &tcg_op_defs
[c
];
1761 if (c
== INDEX_op_insn_start
) {
1763 col
+= qemu_log("\n ----");
1765 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
1767 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1768 a
= deposit64(op
->args
[i
* 2], 32, 32, op
->args
[i
* 2 + 1]);
1772 col
+= qemu_log(" " TARGET_FMT_lx
, a
);
1774 } else if (c
== INDEX_op_call
) {
1775 /* variable number of arguments */
1776 nb_oargs
= TCGOP_CALLO(op
);
1777 nb_iargs
= TCGOP_CALLI(op
);
1778 nb_cargs
= def
->nb_cargs
;
1780 /* function name, flags, out args */
1781 col
+= qemu_log(" %s %s,$0x%" TCG_PRIlx
",$%d", def
->name
,
1782 tcg_find_helper(s
, op
->args
[nb_oargs
+ nb_iargs
]),
1783 op
->args
[nb_oargs
+ nb_iargs
+ 1], nb_oargs
);
1784 for (i
= 0; i
< nb_oargs
; i
++) {
1785 col
+= qemu_log(",%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
1788 for (i
= 0; i
< nb_iargs
; i
++) {
1789 TCGArg arg
= op
->args
[nb_oargs
+ i
];
1790 const char *t
= "<dummy>";
1791 if (arg
!= TCG_CALL_DUMMY_ARG
) {
1792 t
= tcg_get_arg_str(s
, buf
, sizeof(buf
), arg
);
1794 col
+= qemu_log(",%s", t
);
1797 col
+= qemu_log(" %s ", def
->name
);
1799 nb_oargs
= def
->nb_oargs
;
1800 nb_iargs
= def
->nb_iargs
;
1801 nb_cargs
= def
->nb_cargs
;
1803 if (def
->flags
& TCG_OPF_VECTOR
) {
1804 col
+= qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op
),
1805 8 << TCGOP_VECE(op
));
1809 for (i
= 0; i
< nb_oargs
; i
++) {
1811 col
+= qemu_log(",");
1813 col
+= qemu_log("%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
1816 for (i
= 0; i
< nb_iargs
; i
++) {
1818 col
+= qemu_log(",");
1820 col
+= qemu_log("%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
1824 case INDEX_op_brcond_i32
:
1825 case INDEX_op_setcond_i32
:
1826 case INDEX_op_movcond_i32
:
1827 case INDEX_op_brcond2_i32
:
1828 case INDEX_op_setcond2_i32
:
1829 case INDEX_op_brcond_i64
:
1830 case INDEX_op_setcond_i64
:
1831 case INDEX_op_movcond_i64
:
1832 case INDEX_op_cmp_vec
:
1833 case INDEX_op_cmpsel_vec
:
1834 if (op
->args
[k
] < ARRAY_SIZE(cond_name
)
1835 && cond_name
[op
->args
[k
]]) {
1836 col
+= qemu_log(",%s", cond_name
[op
->args
[k
++]]);
1838 col
+= qemu_log(",$0x%" TCG_PRIlx
, op
->args
[k
++]);
1842 case INDEX_op_qemu_ld_i32
:
1843 case INDEX_op_qemu_st_i32
:
1844 case INDEX_op_qemu_st8_i32
:
1845 case INDEX_op_qemu_ld_i64
:
1846 case INDEX_op_qemu_st_i64
:
1848 TCGMemOpIdx oi
= op
->args
[k
++];
1849 MemOp op
= get_memop(oi
);
1850 unsigned ix
= get_mmuidx(oi
);
1852 if (op
& ~(MO_AMASK
| MO_BSWAP
| MO_SSIZE
)) {
1853 col
+= qemu_log(",$0x%x,%u", op
, ix
);
1855 const char *s_al
, *s_op
;
1856 s_al
= alignment_name
[(op
& MO_AMASK
) >> MO_ASHIFT
];
1857 s_op
= ldst_name
[op
& (MO_BSWAP
| MO_SSIZE
)];
1858 col
+= qemu_log(",%s%s,%u", s_al
, s_op
, ix
);
1868 case INDEX_op_set_label
:
1870 case INDEX_op_brcond_i32
:
1871 case INDEX_op_brcond_i64
:
1872 case INDEX_op_brcond2_i32
:
1873 col
+= qemu_log("%s$L%d", k
? "," : "",
1874 arg_label(op
->args
[k
])->id
);
1880 for (; i
< nb_cargs
; i
++, k
++) {
1881 col
+= qemu_log("%s$0x%" TCG_PRIlx
, k
? "," : "", op
->args
[k
]);
1885 if (have_prefs
|| op
->life
) {
1887 QemuLogFile
*logfile
;
1890 logfile
= qatomic_rcu_read(&qemu_logfile
);
1892 for (; col
< 40; ++col
) {
1893 putc(' ', logfile
->fd
);
1900 unsigned life
= op
->life
;
1902 if (life
& (SYNC_ARG
* 3)) {
1904 for (i
= 0; i
< 2; ++i
) {
1905 if (life
& (SYNC_ARG
<< i
)) {
1913 for (i
= 0; life
; ++i
, life
>>= 1) {
1922 for (i
= 0; i
< nb_oargs
; ++i
) {
1923 TCGRegSet set
= op
->output_pref
[i
];
1932 } else if (set
== MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS
)) {
1934 #ifdef CONFIG_DEBUG_TCG
1935 } else if (tcg_regset_single(set
)) {
1936 TCGReg reg
= tcg_regset_first(set
);
1937 qemu_log("%s", tcg_target_reg_names
[reg
]);
1939 } else if (TCG_TARGET_NB_REGS
<= 32) {
1940 qemu_log("%#x", (uint32_t)set
);
1942 qemu_log("%#" PRIx64
, (uint64_t)set
);
1951 /* we give more priority to constraints with less registers */
1952 static int get_constraint_priority(const TCGOpDef
*def
, int k
)
1954 const TCGArgConstraint
*arg_ct
= &def
->args_ct
[k
];
1957 if (arg_ct
->oalias
) {
1958 /* an alias is equivalent to a single register */
1961 n
= ctpop64(arg_ct
->regs
);
1963 return TCG_TARGET_NB_REGS
- n
+ 1;
1966 /* sort from highest priority to lowest */
1967 static void sort_constraints(TCGOpDef
*def
, int start
, int n
)
1970 TCGArgConstraint
*a
= def
->args_ct
;
1972 for (i
= 0; i
< n
; i
++) {
1973 a
[start
+ i
].sort_index
= start
+ i
;
1978 for (i
= 0; i
< n
- 1; i
++) {
1979 for (j
= i
+ 1; j
< n
; j
++) {
1980 int p1
= get_constraint_priority(def
, a
[start
+ i
].sort_index
);
1981 int p2
= get_constraint_priority(def
, a
[start
+ j
].sort_index
);
1983 int tmp
= a
[start
+ i
].sort_index
;
1984 a
[start
+ i
].sort_index
= a
[start
+ j
].sort_index
;
1985 a
[start
+ j
].sort_index
= tmp
;
1991 static void process_op_defs(TCGContext
*s
)
1995 for (op
= 0; op
< NB_OPS
; op
++) {
1996 TCGOpDef
*def
= &tcg_op_defs
[op
];
1997 const TCGTargetOpDef
*tdefs
;
2000 if (def
->flags
& TCG_OPF_NOT_PRESENT
) {
2004 nb_args
= def
->nb_iargs
+ def
->nb_oargs
;
2010 * Macro magic should make it impossible, but double-check that
2011 * the array index is in range. Since the signness of an enum
2012 * is implementation defined, force the result to unsigned.
2014 unsigned con_set
= tcg_target_op_def(op
);
2015 tcg_debug_assert(con_set
< ARRAY_SIZE(constraint_sets
));
2016 tdefs
= &constraint_sets
[con_set
];
2018 for (i
= 0; i
< nb_args
; i
++) {
2019 const char *ct_str
= tdefs
->args_ct_str
[i
];
2020 /* Incomplete TCGTargetOpDef entry. */
2021 tcg_debug_assert(ct_str
!= NULL
);
2023 while (*ct_str
!= '\0') {
2027 int oarg
= *ct_str
- '0';
2028 tcg_debug_assert(ct_str
== tdefs
->args_ct_str
[i
]);
2029 tcg_debug_assert(oarg
< def
->nb_oargs
);
2030 tcg_debug_assert(def
->args_ct
[oarg
].regs
!= 0);
2031 def
->args_ct
[i
] = def
->args_ct
[oarg
];
2032 /* The output sets oalias. */
2033 def
->args_ct
[oarg
].oalias
= true;
2034 def
->args_ct
[oarg
].alias_index
= i
;
2035 /* The input sets ialias. */
2036 def
->args_ct
[i
].ialias
= true;
2037 def
->args_ct
[i
].alias_index
= oarg
;
2042 def
->args_ct
[i
].newreg
= true;
2046 def
->args_ct
[i
].ct
|= TCG_CT_CONST
;
2050 /* Include all of the target-specific constraints. */
2053 #define CONST(CASE, MASK) \
2054 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2055 #define REGS(CASE, MASK) \
2056 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2058 #include "tcg-target-con-str.h"
2063 /* Typo in TCGTargetOpDef constraint. */
2064 g_assert_not_reached();
2069 /* TCGTargetOpDef entry with too much information? */
2070 tcg_debug_assert(i
== TCG_MAX_OP_ARGS
|| tdefs
->args_ct_str
[i
] == NULL
);
2072 /* sort the constraints (XXX: this is just an heuristic) */
2073 sort_constraints(def
, 0, def
->nb_oargs
);
2074 sort_constraints(def
, def
->nb_oargs
, def
->nb_iargs
);
2078 void tcg_op_remove(TCGContext
*s
, TCGOp
*op
)
2084 label
= arg_label(op
->args
[0]);
2087 case INDEX_op_brcond_i32
:
2088 case INDEX_op_brcond_i64
:
2089 label
= arg_label(op
->args
[3]);
2092 case INDEX_op_brcond2_i32
:
2093 label
= arg_label(op
->args
[5]);
2100 QTAILQ_REMOVE(&s
->ops
, op
, link
);
2101 QTAILQ_INSERT_TAIL(&s
->free_ops
, op
, link
);
2104 #ifdef CONFIG_PROFILER
2105 qatomic_set(&s
->prof
.del_op_count
, s
->prof
.del_op_count
+ 1);
2109 static TCGOp
*tcg_op_alloc(TCGOpcode opc
)
2111 TCGContext
*s
= tcg_ctx
;
2114 if (likely(QTAILQ_EMPTY(&s
->free_ops
))) {
2115 op
= tcg_malloc(sizeof(TCGOp
));
2117 op
= QTAILQ_FIRST(&s
->free_ops
);
2118 QTAILQ_REMOVE(&s
->free_ops
, op
, link
);
2120 memset(op
, 0, offsetof(TCGOp
, link
));
2127 TCGOp
*tcg_emit_op(TCGOpcode opc
)
2129 TCGOp
*op
= tcg_op_alloc(opc
);
2130 QTAILQ_INSERT_TAIL(&tcg_ctx
->ops
, op
, link
);
2134 TCGOp
*tcg_op_insert_before(TCGContext
*s
, TCGOp
*old_op
, TCGOpcode opc
)
2136 TCGOp
*new_op
= tcg_op_alloc(opc
);
2137 QTAILQ_INSERT_BEFORE(old_op
, new_op
, link
);
2141 TCGOp
*tcg_op_insert_after(TCGContext
*s
, TCGOp
*old_op
, TCGOpcode opc
)
2143 TCGOp
*new_op
= tcg_op_alloc(opc
);
2144 QTAILQ_INSERT_AFTER(&s
->ops
, old_op
, new_op
, link
);
2148 /* Reachable analysis : remove unreachable code. */
2149 static void reachable_code_pass(TCGContext
*s
)
2151 TCGOp
*op
, *op_next
;
2154 QTAILQ_FOREACH_SAFE(op
, &s
->ops
, link
, op_next
) {
2160 case INDEX_op_set_label
:
2161 label
= arg_label(op
->args
[0]);
2162 if (label
->refs
== 0) {
2164 * While there is an occasional backward branch, virtually
2165 * all branches generated by the translators are forward.
2166 * Which means that generally we will have already removed
2167 * all references to the label that will be, and there is
2168 * little to be gained by iterating.
2172 /* Once we see a label, insns become live again. */
2177 * Optimization can fold conditional branches to unconditional.
2178 * If we find a label with one reference which is preceded by
2179 * an unconditional branch to it, remove both. This needed to
2180 * wait until the dead code in between them was removed.
2182 if (label
->refs
== 1) {
2183 TCGOp
*op_prev
= QTAILQ_PREV(op
, link
);
2184 if (op_prev
->opc
== INDEX_op_br
&&
2185 label
== arg_label(op_prev
->args
[0])) {
2186 tcg_op_remove(s
, op_prev
);
2194 case INDEX_op_exit_tb
:
2195 case INDEX_op_goto_ptr
:
2196 /* Unconditional branches; everything following is dead. */
2201 /* Notice noreturn helper calls, raising exceptions. */
2202 call_flags
= op
->args
[TCGOP_CALLO(op
) + TCGOP_CALLI(op
) + 1];
2203 if (call_flags
& TCG_CALL_NO_RETURN
) {
2208 case INDEX_op_insn_start
:
2209 /* Never remove -- we need to keep these for unwind. */
2218 tcg_op_remove(s
, op
);
2226 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2227 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2229 /* For liveness_pass_1, the register preferences for a given temp. */
2230 static inline TCGRegSet
*la_temp_pref(TCGTemp
*ts
)
2232 return ts
->state_ptr
;
2235 /* For liveness_pass_1, reset the preferences for a given temp to the
2236 * maximal regset for its type.
2238 static inline void la_reset_pref(TCGTemp
*ts
)
2241 = (ts
->state
== TS_DEAD
? 0 : tcg_target_available_regs
[ts
->type
]);
2244 /* liveness analysis: end of function: all temps are dead, and globals
2245 should be in memory. */
2246 static void la_func_end(TCGContext
*s
, int ng
, int nt
)
2250 for (i
= 0; i
< ng
; ++i
) {
2251 s
->temps
[i
].state
= TS_DEAD
| TS_MEM
;
2252 la_reset_pref(&s
->temps
[i
]);
2254 for (i
= ng
; i
< nt
; ++i
) {
2255 s
->temps
[i
].state
= TS_DEAD
;
2256 la_reset_pref(&s
->temps
[i
]);
2260 /* liveness analysis: end of basic block: all temps are dead, globals
2261 and local temps should be in memory. */
2262 static void la_bb_end(TCGContext
*s
, int ng
, int nt
)
2266 for (i
= 0; i
< nt
; ++i
) {
2267 TCGTemp
*ts
= &s
->temps
[i
];
2274 state
= TS_DEAD
| TS_MEM
;
2281 g_assert_not_reached();
2288 /* liveness analysis: sync globals back to memory. */
2289 static void la_global_sync(TCGContext
*s
, int ng
)
2293 for (i
= 0; i
< ng
; ++i
) {
2294 int state
= s
->temps
[i
].state
;
2295 s
->temps
[i
].state
= state
| TS_MEM
;
2296 if (state
== TS_DEAD
) {
2297 /* If the global was previously dead, reset prefs. */
2298 la_reset_pref(&s
->temps
[i
]);
2304 * liveness analysis: conditional branch: all temps are dead,
2305 * globals and local temps should be synced.
2307 static void la_bb_sync(TCGContext
*s
, int ng
, int nt
)
2309 la_global_sync(s
, ng
);
2311 for (int i
= ng
; i
< nt
; ++i
) {
2312 TCGTemp
*ts
= &s
->temps
[i
];
2318 ts
->state
= state
| TS_MEM
;
2319 if (state
!= TS_DEAD
) {
2324 s
->temps
[i
].state
= TS_DEAD
;
2329 g_assert_not_reached();
2331 la_reset_pref(&s
->temps
[i
]);
2335 /* liveness analysis: sync globals back to memory and kill. */
2336 static void la_global_kill(TCGContext
*s
, int ng
)
2340 for (i
= 0; i
< ng
; i
++) {
2341 s
->temps
[i
].state
= TS_DEAD
| TS_MEM
;
2342 la_reset_pref(&s
->temps
[i
]);
2346 /* liveness analysis: note live globals crossing calls. */
2347 static void la_cross_call(TCGContext
*s
, int nt
)
2349 TCGRegSet mask
= ~tcg_target_call_clobber_regs
;
2352 for (i
= 0; i
< nt
; i
++) {
2353 TCGTemp
*ts
= &s
->temps
[i
];
2354 if (!(ts
->state
& TS_DEAD
)) {
2355 TCGRegSet
*pset
= la_temp_pref(ts
);
2356 TCGRegSet set
= *pset
;
2359 /* If the combination is not possible, restart. */
2361 set
= tcg_target_available_regs
[ts
->type
] & mask
;
2368 /* Liveness analysis : update the opc_arg_life array to tell if a
2369 given input arguments is dead. Instructions updating dead
2370 temporaries are removed. */
2371 static void liveness_pass_1(TCGContext
*s
)
2373 int nb_globals
= s
->nb_globals
;
2374 int nb_temps
= s
->nb_temps
;
2375 TCGOp
*op
, *op_prev
;
2379 prefs
= tcg_malloc(sizeof(TCGRegSet
) * nb_temps
);
2380 for (i
= 0; i
< nb_temps
; ++i
) {
2381 s
->temps
[i
].state_ptr
= prefs
+ i
;
2384 /* ??? Should be redundant with the exit_tb that ends the TB. */
2385 la_func_end(s
, nb_globals
, nb_temps
);
2387 QTAILQ_FOREACH_REVERSE_SAFE(op
, &s
->ops
, link
, op_prev
) {
2388 int nb_iargs
, nb_oargs
;
2389 TCGOpcode opc_new
, opc_new2
;
2391 TCGLifeData arg_life
= 0;
2393 TCGOpcode opc
= op
->opc
;
2394 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
2402 nb_oargs
= TCGOP_CALLO(op
);
2403 nb_iargs
= TCGOP_CALLI(op
);
2404 call_flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
2406 /* pure functions can be removed if their result is unused */
2407 if (call_flags
& TCG_CALL_NO_SIDE_EFFECTS
) {
2408 for (i
= 0; i
< nb_oargs
; i
++) {
2409 ts
= arg_temp(op
->args
[i
]);
2410 if (ts
->state
!= TS_DEAD
) {
2411 goto do_not_remove_call
;
2418 /* Output args are dead. */
2419 for (i
= 0; i
< nb_oargs
; i
++) {
2420 ts
= arg_temp(op
->args
[i
]);
2421 if (ts
->state
& TS_DEAD
) {
2422 arg_life
|= DEAD_ARG
<< i
;
2424 if (ts
->state
& TS_MEM
) {
2425 arg_life
|= SYNC_ARG
<< i
;
2427 ts
->state
= TS_DEAD
;
2430 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2431 op
->output_pref
[i
] = 0;
2434 if (!(call_flags
& (TCG_CALL_NO_WRITE_GLOBALS
|
2435 TCG_CALL_NO_READ_GLOBALS
))) {
2436 la_global_kill(s
, nb_globals
);
2437 } else if (!(call_flags
& TCG_CALL_NO_READ_GLOBALS
)) {
2438 la_global_sync(s
, nb_globals
);
2441 /* Record arguments that die in this helper. */
2442 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
2443 ts
= arg_temp(op
->args
[i
]);
2444 if (ts
&& ts
->state
& TS_DEAD
) {
2445 arg_life
|= DEAD_ARG
<< i
;
2449 /* For all live registers, remove call-clobbered prefs. */
2450 la_cross_call(s
, nb_temps
);
2452 nb_call_regs
= ARRAY_SIZE(tcg_target_call_iarg_regs
);
2454 /* Input arguments are live for preceding opcodes. */
2455 for (i
= 0; i
< nb_iargs
; i
++) {
2456 ts
= arg_temp(op
->args
[i
+ nb_oargs
]);
2457 if (ts
&& ts
->state
& TS_DEAD
) {
2458 /* For those arguments that die, and will be allocated
2459 * in registers, clear the register set for that arg,
2460 * to be filled in below. For args that will be on
2461 * the stack, reset to any available reg.
2464 = (i
< nb_call_regs
? 0 :
2465 tcg_target_available_regs
[ts
->type
]);
2466 ts
->state
&= ~TS_DEAD
;
2470 /* For each input argument, add its input register to prefs.
2471 If a temp is used once, this produces a single set bit. */
2472 for (i
= 0; i
< MIN(nb_call_regs
, nb_iargs
); i
++) {
2473 ts
= arg_temp(op
->args
[i
+ nb_oargs
]);
2475 tcg_regset_set_reg(*la_temp_pref(ts
),
2476 tcg_target_call_iarg_regs
[i
]);
2481 case INDEX_op_insn_start
:
2483 case INDEX_op_discard
:
2484 /* mark the temporary as dead */
2485 ts
= arg_temp(op
->args
[0]);
2486 ts
->state
= TS_DEAD
;
2490 case INDEX_op_add2_i32
:
2491 opc_new
= INDEX_op_add_i32
;
2493 case INDEX_op_sub2_i32
:
2494 opc_new
= INDEX_op_sub_i32
;
2496 case INDEX_op_add2_i64
:
2497 opc_new
= INDEX_op_add_i64
;
2499 case INDEX_op_sub2_i64
:
2500 opc_new
= INDEX_op_sub_i64
;
2504 /* Test if the high part of the operation is dead, but not
2505 the low part. The result can be optimized to a simple
2506 add or sub. This happens often for x86_64 guest when the
2507 cpu mode is set to 32 bit. */
2508 if (arg_temp(op
->args
[1])->state
== TS_DEAD
) {
2509 if (arg_temp(op
->args
[0])->state
== TS_DEAD
) {
2512 /* Replace the opcode and adjust the args in place,
2513 leaving 3 unused args at the end. */
2514 op
->opc
= opc
= opc_new
;
2515 op
->args
[1] = op
->args
[2];
2516 op
->args
[2] = op
->args
[4];
2517 /* Fall through and mark the single-word operation live. */
2523 case INDEX_op_mulu2_i32
:
2524 opc_new
= INDEX_op_mul_i32
;
2525 opc_new2
= INDEX_op_muluh_i32
;
2526 have_opc_new2
= TCG_TARGET_HAS_muluh_i32
;
2528 case INDEX_op_muls2_i32
:
2529 opc_new
= INDEX_op_mul_i32
;
2530 opc_new2
= INDEX_op_mulsh_i32
;
2531 have_opc_new2
= TCG_TARGET_HAS_mulsh_i32
;
2533 case INDEX_op_mulu2_i64
:
2534 opc_new
= INDEX_op_mul_i64
;
2535 opc_new2
= INDEX_op_muluh_i64
;
2536 have_opc_new2
= TCG_TARGET_HAS_muluh_i64
;
2538 case INDEX_op_muls2_i64
:
2539 opc_new
= INDEX_op_mul_i64
;
2540 opc_new2
= INDEX_op_mulsh_i64
;
2541 have_opc_new2
= TCG_TARGET_HAS_mulsh_i64
;
2546 if (arg_temp(op
->args
[1])->state
== TS_DEAD
) {
2547 if (arg_temp(op
->args
[0])->state
== TS_DEAD
) {
2548 /* Both parts of the operation are dead. */
2551 /* The high part of the operation is dead; generate the low. */
2552 op
->opc
= opc
= opc_new
;
2553 op
->args
[1] = op
->args
[2];
2554 op
->args
[2] = op
->args
[3];
2555 } else if (arg_temp(op
->args
[0])->state
== TS_DEAD
&& have_opc_new2
) {
2556 /* The low part of the operation is dead; generate the high. */
2557 op
->opc
= opc
= opc_new2
;
2558 op
->args
[0] = op
->args
[1];
2559 op
->args
[1] = op
->args
[2];
2560 op
->args
[2] = op
->args
[3];
2564 /* Mark the single-word operation live. */
2569 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2570 nb_iargs
= def
->nb_iargs
;
2571 nb_oargs
= def
->nb_oargs
;
2573 /* Test if the operation can be removed because all
2574 its outputs are dead. We assume that nb_oargs == 0
2575 implies side effects */
2576 if (!(def
->flags
& TCG_OPF_SIDE_EFFECTS
) && nb_oargs
!= 0) {
2577 for (i
= 0; i
< nb_oargs
; i
++) {
2578 if (arg_temp(op
->args
[i
])->state
!= TS_DEAD
) {
2587 tcg_op_remove(s
, op
);
2591 for (i
= 0; i
< nb_oargs
; i
++) {
2592 ts
= arg_temp(op
->args
[i
]);
2594 /* Remember the preference of the uses that followed. */
2595 op
->output_pref
[i
] = *la_temp_pref(ts
);
2597 /* Output args are dead. */
2598 if (ts
->state
& TS_DEAD
) {
2599 arg_life
|= DEAD_ARG
<< i
;
2601 if (ts
->state
& TS_MEM
) {
2602 arg_life
|= SYNC_ARG
<< i
;
2604 ts
->state
= TS_DEAD
;
2608 /* If end of basic block, update. */
2609 if (def
->flags
& TCG_OPF_BB_EXIT
) {
2610 la_func_end(s
, nb_globals
, nb_temps
);
2611 } else if (def
->flags
& TCG_OPF_COND_BRANCH
) {
2612 la_bb_sync(s
, nb_globals
, nb_temps
);
2613 } else if (def
->flags
& TCG_OPF_BB_END
) {
2614 la_bb_end(s
, nb_globals
, nb_temps
);
2615 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
2616 la_global_sync(s
, nb_globals
);
2617 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
2618 la_cross_call(s
, nb_temps
);
2622 /* Record arguments that die in this opcode. */
2623 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
2624 ts
= arg_temp(op
->args
[i
]);
2625 if (ts
->state
& TS_DEAD
) {
2626 arg_life
|= DEAD_ARG
<< i
;
2630 /* Input arguments are live for preceding opcodes. */
2631 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
2632 ts
= arg_temp(op
->args
[i
]);
2633 if (ts
->state
& TS_DEAD
) {
2634 /* For operands that were dead, initially allow
2635 all regs for the type. */
2636 *la_temp_pref(ts
) = tcg_target_available_regs
[ts
->type
];
2637 ts
->state
&= ~TS_DEAD
;
2641 /* Incorporate constraints for this operand. */
2643 case INDEX_op_mov_i32
:
2644 case INDEX_op_mov_i64
:
2645 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2646 have proper constraints. That said, special case
2647 moves to propagate preferences backward. */
2648 if (IS_DEAD_ARG(1)) {
2649 *la_temp_pref(arg_temp(op
->args
[0]))
2650 = *la_temp_pref(arg_temp(op
->args
[1]));
2655 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
2656 const TCGArgConstraint
*ct
= &def
->args_ct
[i
];
2657 TCGRegSet set
, *pset
;
2659 ts
= arg_temp(op
->args
[i
]);
2660 pset
= la_temp_pref(ts
);
2665 set
&= op
->output_pref
[ct
->alias_index
];
2667 /* If the combination is not possible, restart. */
2677 op
->life
= arg_life
;
2681 /* Liveness analysis: Convert indirect regs to direct temporaries. */
2682 static bool liveness_pass_2(TCGContext
*s
)
2684 int nb_globals
= s
->nb_globals
;
2686 bool changes
= false;
2687 TCGOp
*op
, *op_next
;
2689 /* Create a temporary for each indirect global. */
2690 for (i
= 0; i
< nb_globals
; ++i
) {
2691 TCGTemp
*its
= &s
->temps
[i
];
2692 if (its
->indirect_reg
) {
2693 TCGTemp
*dts
= tcg_temp_alloc(s
);
2694 dts
->type
= its
->type
;
2695 dts
->base_type
= its
->base_type
;
2696 its
->state_ptr
= dts
;
2698 its
->state_ptr
= NULL
;
2700 /* All globals begin dead. */
2701 its
->state
= TS_DEAD
;
2703 for (nb_temps
= s
->nb_temps
; i
< nb_temps
; ++i
) {
2704 TCGTemp
*its
= &s
->temps
[i
];
2705 its
->state_ptr
= NULL
;
2706 its
->state
= TS_DEAD
;
2709 QTAILQ_FOREACH_SAFE(op
, &s
->ops
, link
, op_next
) {
2710 TCGOpcode opc
= op
->opc
;
2711 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
2712 TCGLifeData arg_life
= op
->life
;
2713 int nb_iargs
, nb_oargs
, call_flags
;
2714 TCGTemp
*arg_ts
, *dir_ts
;
2716 if (opc
== INDEX_op_call
) {
2717 nb_oargs
= TCGOP_CALLO(op
);
2718 nb_iargs
= TCGOP_CALLI(op
);
2719 call_flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
2721 nb_iargs
= def
->nb_iargs
;
2722 nb_oargs
= def
->nb_oargs
;
2724 /* Set flags similar to how calls require. */
2725 if (def
->flags
& TCG_OPF_COND_BRANCH
) {
2726 /* Like reading globals: sync_globals */
2727 call_flags
= TCG_CALL_NO_WRITE_GLOBALS
;
2728 } else if (def
->flags
& TCG_OPF_BB_END
) {
2729 /* Like writing globals: save_globals */
2731 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
2732 /* Like reading globals: sync_globals */
2733 call_flags
= TCG_CALL_NO_WRITE_GLOBALS
;
2735 /* No effect on globals. */
2736 call_flags
= (TCG_CALL_NO_READ_GLOBALS
|
2737 TCG_CALL_NO_WRITE_GLOBALS
);
2741 /* Make sure that input arguments are available. */
2742 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
2743 arg_ts
= arg_temp(op
->args
[i
]);
2745 dir_ts
= arg_ts
->state_ptr
;
2746 if (dir_ts
&& arg_ts
->state
== TS_DEAD
) {
2747 TCGOpcode lopc
= (arg_ts
->type
== TCG_TYPE_I32
2750 TCGOp
*lop
= tcg_op_insert_before(s
, op
, lopc
);
2752 lop
->args
[0] = temp_arg(dir_ts
);
2753 lop
->args
[1] = temp_arg(arg_ts
->mem_base
);
2754 lop
->args
[2] = arg_ts
->mem_offset
;
2756 /* Loaded, but synced with memory. */
2757 arg_ts
->state
= TS_MEM
;
2762 /* Perform input replacement, and mark inputs that became dead.
2763 No action is required except keeping temp_state up to date
2764 so that we reload when needed. */
2765 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
2766 arg_ts
= arg_temp(op
->args
[i
]);
2768 dir_ts
= arg_ts
->state_ptr
;
2770 op
->args
[i
] = temp_arg(dir_ts
);
2772 if (IS_DEAD_ARG(i
)) {
2773 arg_ts
->state
= TS_DEAD
;
2779 /* Liveness analysis should ensure that the following are
2780 all correct, for call sites and basic block end points. */
2781 if (call_flags
& TCG_CALL_NO_READ_GLOBALS
) {
2783 } else if (call_flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
2784 for (i
= 0; i
< nb_globals
; ++i
) {
2785 /* Liveness should see that globals are synced back,
2786 that is, either TS_DEAD or TS_MEM. */
2787 arg_ts
= &s
->temps
[i
];
2788 tcg_debug_assert(arg_ts
->state_ptr
== 0
2789 || arg_ts
->state
!= 0);
2792 for (i
= 0; i
< nb_globals
; ++i
) {
2793 /* Liveness should see that globals are saved back,
2794 that is, TS_DEAD, waiting to be reloaded. */
2795 arg_ts
= &s
->temps
[i
];
2796 tcg_debug_assert(arg_ts
->state_ptr
== 0
2797 || arg_ts
->state
== TS_DEAD
);
2801 /* Outputs become available. */
2802 if (opc
== INDEX_op_mov_i32
|| opc
== INDEX_op_mov_i64
) {
2803 arg_ts
= arg_temp(op
->args
[0]);
2804 dir_ts
= arg_ts
->state_ptr
;
2806 op
->args
[0] = temp_arg(dir_ts
);
2809 /* The output is now live and modified. */
2812 if (NEED_SYNC_ARG(0)) {
2813 TCGOpcode sopc
= (arg_ts
->type
== TCG_TYPE_I32
2816 TCGOp
*sop
= tcg_op_insert_after(s
, op
, sopc
);
2817 TCGTemp
*out_ts
= dir_ts
;
2819 if (IS_DEAD_ARG(0)) {
2820 out_ts
= arg_temp(op
->args
[1]);
2821 arg_ts
->state
= TS_DEAD
;
2822 tcg_op_remove(s
, op
);
2824 arg_ts
->state
= TS_MEM
;
2827 sop
->args
[0] = temp_arg(out_ts
);
2828 sop
->args
[1] = temp_arg(arg_ts
->mem_base
);
2829 sop
->args
[2] = arg_ts
->mem_offset
;
2831 tcg_debug_assert(!IS_DEAD_ARG(0));
2835 for (i
= 0; i
< nb_oargs
; i
++) {
2836 arg_ts
= arg_temp(op
->args
[i
]);
2837 dir_ts
= arg_ts
->state_ptr
;
2841 op
->args
[i
] = temp_arg(dir_ts
);
2844 /* The output is now live and modified. */
2847 /* Sync outputs upon their last write. */
2848 if (NEED_SYNC_ARG(i
)) {
2849 TCGOpcode sopc
= (arg_ts
->type
== TCG_TYPE_I32
2852 TCGOp
*sop
= tcg_op_insert_after(s
, op
, sopc
);
2854 sop
->args
[0] = temp_arg(dir_ts
);
2855 sop
->args
[1] = temp_arg(arg_ts
->mem_base
);
2856 sop
->args
[2] = arg_ts
->mem_offset
;
2858 arg_ts
->state
= TS_MEM
;
2860 /* Drop outputs that are dead. */
2861 if (IS_DEAD_ARG(i
)) {
2862 arg_ts
->state
= TS_DEAD
;
2871 #ifdef CONFIG_DEBUG_TCG
2872 static void dump_regs(TCGContext
*s
)
2878 for(i
= 0; i
< s
->nb_temps
; i
++) {
2880 printf(" %10s: ", tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), ts
));
2881 switch(ts
->val_type
) {
2883 printf("%s", tcg_target_reg_names
[ts
->reg
]);
2886 printf("%d(%s)", (int)ts
->mem_offset
,
2887 tcg_target_reg_names
[ts
->mem_base
->reg
]);
2889 case TEMP_VAL_CONST
:
2890 printf("$0x%" PRIx64
, ts
->val
);
2902 for(i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
2903 if (s
->reg_to_temp
[i
] != NULL
) {
2905 tcg_target_reg_names
[i
],
2906 tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), s
->reg_to_temp
[i
]));
2911 static void check_regs(TCGContext
*s
)
2918 for (reg
= 0; reg
< TCG_TARGET_NB_REGS
; reg
++) {
2919 ts
= s
->reg_to_temp
[reg
];
2921 if (ts
->val_type
!= TEMP_VAL_REG
|| ts
->reg
!= reg
) {
2922 printf("Inconsistency for register %s:\n",
2923 tcg_target_reg_names
[reg
]);
2928 for (k
= 0; k
< s
->nb_temps
; k
++) {
2930 if (ts
->val_type
== TEMP_VAL_REG
2931 && ts
->kind
!= TEMP_FIXED
2932 && s
->reg_to_temp
[ts
->reg
] != ts
) {
2933 printf("Inconsistency for temp %s:\n",
2934 tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), ts
));
2936 printf("reg state:\n");
2944 static void temp_allocate_frame(TCGContext
*s
, TCGTemp
*ts
)
2946 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
2947 /* Sparc64 stack is accessed with offset of 2047 */
2948 s
->current_frame_offset
= (s
->current_frame_offset
+
2949 (tcg_target_long
)sizeof(tcg_target_long
) - 1) &
2950 ~(sizeof(tcg_target_long
) - 1);
2952 if (s
->current_frame_offset
+ (tcg_target_long
)sizeof(tcg_target_long
) >
2956 ts
->mem_offset
= s
->current_frame_offset
;
2957 ts
->mem_base
= s
->frame_temp
;
2958 ts
->mem_allocated
= 1;
2959 s
->current_frame_offset
+= sizeof(tcg_target_long
);
2962 static void temp_load(TCGContext
*, TCGTemp
*, TCGRegSet
, TCGRegSet
, TCGRegSet
);
2964 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
2965 mark it free; otherwise mark it dead. */
2966 static void temp_free_or_dead(TCGContext
*s
, TCGTemp
*ts
, int free_or_dead
)
2968 TCGTempVal new_type
;
2975 new_type
= TEMP_VAL_MEM
;
2978 new_type
= free_or_dead
< 0 ? TEMP_VAL_MEM
: TEMP_VAL_DEAD
;
2981 new_type
= TEMP_VAL_CONST
;
2984 g_assert_not_reached();
2986 if (ts
->val_type
== TEMP_VAL_REG
) {
2987 s
->reg_to_temp
[ts
->reg
] = NULL
;
2989 ts
->val_type
= new_type
;
2992 /* Mark a temporary as dead. */
2993 static inline void temp_dead(TCGContext
*s
, TCGTemp
*ts
)
2995 temp_free_or_dead(s
, ts
, 1);
2998 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
2999 registers needs to be allocated to store a constant. If 'free_or_dead'
3000 is non-zero, subsequently release the temporary; if it is positive, the
3001 temp is dead; if it is negative, the temp is free. */
3002 static void temp_sync(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet allocated_regs
,
3003 TCGRegSet preferred_regs
, int free_or_dead
)
3005 if (!temp_readonly(ts
) && !ts
->mem_coherent
) {
3006 if (!ts
->mem_allocated
) {
3007 temp_allocate_frame(s
, ts
);
3009 switch (ts
->val_type
) {
3010 case TEMP_VAL_CONST
:
3011 /* If we're going to free the temp immediately, then we won't
3012 require it later in a register, so attempt to store the
3013 constant to memory directly. */
3015 && tcg_out_sti(s
, ts
->type
, ts
->val
,
3016 ts
->mem_base
->reg
, ts
->mem_offset
)) {
3019 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
3020 allocated_regs
, preferred_regs
);
3024 tcg_out_st(s
, ts
->type
, ts
->reg
,
3025 ts
->mem_base
->reg
, ts
->mem_offset
);
3035 ts
->mem_coherent
= 1;
3038 temp_free_or_dead(s
, ts
, free_or_dead
);
3042 /* free register 'reg' by spilling the corresponding temporary if necessary */
3043 static void tcg_reg_free(TCGContext
*s
, TCGReg reg
, TCGRegSet allocated_regs
)
3045 TCGTemp
*ts
= s
->reg_to_temp
[reg
];
3047 temp_sync(s
, ts
, allocated_regs
, 0, -1);
3053 * @required_regs: Set of registers in which we must allocate.
3054 * @allocated_regs: Set of registers which must be avoided.
3055 * @preferred_regs: Set of registers we should prefer.
3056 * @rev: True if we search the registers in "indirect" order.
3058 * The allocated register must be in @required_regs & ~@allocated_regs,
3059 * but if we can put it in @preferred_regs we may save a move later.
3061 static TCGReg
tcg_reg_alloc(TCGContext
*s
, TCGRegSet required_regs
,
3062 TCGRegSet allocated_regs
,
3063 TCGRegSet preferred_regs
, bool rev
)
3065 int i
, j
, f
, n
= ARRAY_SIZE(tcg_target_reg_alloc_order
);
3066 TCGRegSet reg_ct
[2];
3069 reg_ct
[1] = required_regs
& ~allocated_regs
;
3070 tcg_debug_assert(reg_ct
[1] != 0);
3071 reg_ct
[0] = reg_ct
[1] & preferred_regs
;
3073 /* Skip the preferred_regs option if it cannot be satisfied,
3074 or if the preference made no difference. */
3075 f
= reg_ct
[0] == 0 || reg_ct
[0] == reg_ct
[1];
3077 order
= rev
? indirect_reg_alloc_order
: tcg_target_reg_alloc_order
;
3079 /* Try free registers, preferences first. */
3080 for (j
= f
; j
< 2; j
++) {
3081 TCGRegSet set
= reg_ct
[j
];
3083 if (tcg_regset_single(set
)) {
3084 /* One register in the set. */
3085 TCGReg reg
= tcg_regset_first(set
);
3086 if (s
->reg_to_temp
[reg
] == NULL
) {
3090 for (i
= 0; i
< n
; i
++) {
3091 TCGReg reg
= order
[i
];
3092 if (s
->reg_to_temp
[reg
] == NULL
&&
3093 tcg_regset_test_reg(set
, reg
)) {
3100 /* We must spill something. */
3101 for (j
= f
; j
< 2; j
++) {
3102 TCGRegSet set
= reg_ct
[j
];
3104 if (tcg_regset_single(set
)) {
3105 /* One register in the set. */
3106 TCGReg reg
= tcg_regset_first(set
);
3107 tcg_reg_free(s
, reg
, allocated_regs
);
3110 for (i
= 0; i
< n
; i
++) {
3111 TCGReg reg
= order
[i
];
3112 if (tcg_regset_test_reg(set
, reg
)) {
3113 tcg_reg_free(s
, reg
, allocated_regs
);
3123 /* Make sure the temporary is in a register. If needed, allocate the register
3124 from DESIRED while avoiding ALLOCATED. */
3125 static void temp_load(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet desired_regs
,
3126 TCGRegSet allocated_regs
, TCGRegSet preferred_regs
)
3130 switch (ts
->val_type
) {
3133 case TEMP_VAL_CONST
:
3134 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
,
3135 preferred_regs
, ts
->indirect_base
);
3136 if (ts
->type
<= TCG_TYPE_I64
) {
3137 tcg_out_movi(s
, ts
->type
, reg
, ts
->val
);
3139 uint64_t val
= ts
->val
;
3143 * Find the minimal vector element that matches the constant.
3144 * The targets will, in general, have to do this search anyway,
3145 * do this generically.
3147 if (val
== dup_const(MO_8
, val
)) {
3149 } else if (val
== dup_const(MO_16
, val
)) {
3151 } else if (val
== dup_const(MO_32
, val
)) {
3155 tcg_out_dupi_vec(s
, ts
->type
, vece
, reg
, ts
->val
);
3157 ts
->mem_coherent
= 0;
3160 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
,
3161 preferred_regs
, ts
->indirect_base
);
3162 tcg_out_ld(s
, ts
->type
, reg
, ts
->mem_base
->reg
, ts
->mem_offset
);
3163 ts
->mem_coherent
= 1;
3170 ts
->val_type
= TEMP_VAL_REG
;
3171 s
->reg_to_temp
[reg
] = ts
;
3174 /* Save a temporary to memory. 'allocated_regs' is used in case a
3175 temporary registers needs to be allocated to store a constant. */
3176 static void temp_save(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet allocated_regs
)
3178 /* The liveness analysis already ensures that globals are back
3179 in memory. Keep an tcg_debug_assert for safety. */
3180 tcg_debug_assert(ts
->val_type
== TEMP_VAL_MEM
|| temp_readonly(ts
));
3183 /* save globals to their canonical location and assume they can be
3184 modified be the following code. 'allocated_regs' is used in case a
3185 temporary registers needs to be allocated to store a constant. */
3186 static void save_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
3190 for (i
= 0, n
= s
->nb_globals
; i
< n
; i
++) {
3191 temp_save(s
, &s
->temps
[i
], allocated_regs
);
3195 /* sync globals to their canonical location and assume they can be
3196 read by the following code. 'allocated_regs' is used in case a
3197 temporary registers needs to be allocated to store a constant. */
3198 static void sync_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
3202 for (i
= 0, n
= s
->nb_globals
; i
< n
; i
++) {
3203 TCGTemp
*ts
= &s
->temps
[i
];
3204 tcg_debug_assert(ts
->val_type
!= TEMP_VAL_REG
3205 || ts
->kind
== TEMP_FIXED
3206 || ts
->mem_coherent
);
3210 /* at the end of a basic block, we assume all temporaries are dead and
3211 all globals are stored at their canonical location. */
3212 static void tcg_reg_alloc_bb_end(TCGContext
*s
, TCGRegSet allocated_regs
)
3216 for (i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
3217 TCGTemp
*ts
= &s
->temps
[i
];
3221 temp_save(s
, ts
, allocated_regs
);
3224 /* The liveness analysis already ensures that temps are dead.
3225 Keep an tcg_debug_assert for safety. */
3226 tcg_debug_assert(ts
->val_type
== TEMP_VAL_DEAD
);
3229 /* Similarly, we should have freed any allocated register. */
3230 tcg_debug_assert(ts
->val_type
== TEMP_VAL_CONST
);
3233 g_assert_not_reached();
3237 save_globals(s
, allocated_regs
);
3241 * At a conditional branch, we assume all temporaries are dead and
3242 * all globals and local temps are synced to their location.
3244 static void tcg_reg_alloc_cbranch(TCGContext
*s
, TCGRegSet allocated_regs
)
3246 sync_globals(s
, allocated_regs
);
3248 for (int i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
3249 TCGTemp
*ts
= &s
->temps
[i
];
3251 * The liveness analysis already ensures that temps are dead.
3252 * Keep tcg_debug_asserts for safety.
3256 tcg_debug_assert(ts
->val_type
!= TEMP_VAL_REG
|| ts
->mem_coherent
);
3259 tcg_debug_assert(ts
->val_type
== TEMP_VAL_DEAD
);
3264 g_assert_not_reached();
3270 * Specialized code generation for INDEX_op_mov_* with a constant.
3272 static void tcg_reg_alloc_do_movi(TCGContext
*s
, TCGTemp
*ots
,
3273 tcg_target_ulong val
, TCGLifeData arg_life
,
3274 TCGRegSet preferred_regs
)
3276 /* ENV should not be modified. */
3277 tcg_debug_assert(!temp_readonly(ots
));
3279 /* The movi is not explicitly generated here. */
3280 if (ots
->val_type
== TEMP_VAL_REG
) {
3281 s
->reg_to_temp
[ots
->reg
] = NULL
;
3283 ots
->val_type
= TEMP_VAL_CONST
;
3285 ots
->mem_coherent
= 0;
3286 if (NEED_SYNC_ARG(0)) {
3287 temp_sync(s
, ots
, s
->reserved_regs
, preferred_regs
, IS_DEAD_ARG(0));
3288 } else if (IS_DEAD_ARG(0)) {
3294 * Specialized code generation for INDEX_op_mov_*.
3296 static void tcg_reg_alloc_mov(TCGContext
*s
, const TCGOp
*op
)
3298 const TCGLifeData arg_life
= op
->life
;
3299 TCGRegSet allocated_regs
, preferred_regs
;
3301 TCGType otype
, itype
;
3303 allocated_regs
= s
->reserved_regs
;
3304 preferred_regs
= op
->output_pref
[0];
3305 ots
= arg_temp(op
->args
[0]);
3306 ts
= arg_temp(op
->args
[1]);
3308 /* ENV should not be modified. */
3309 tcg_debug_assert(!temp_readonly(ots
));
3311 /* Note that otype != itype for no-op truncation. */
3315 if (ts
->val_type
== TEMP_VAL_CONST
) {
3316 /* propagate constant or generate sti */
3317 tcg_target_ulong val
= ts
->val
;
3318 if (IS_DEAD_ARG(1)) {
3321 tcg_reg_alloc_do_movi(s
, ots
, val
, arg_life
, preferred_regs
);
3325 /* If the source value is in memory we're going to be forced
3326 to have it in a register in order to perform the copy. Copy
3327 the SOURCE value into its own register first, that way we
3328 don't have to reload SOURCE the next time it is used. */
3329 if (ts
->val_type
== TEMP_VAL_MEM
) {
3330 temp_load(s
, ts
, tcg_target_available_regs
[itype
],
3331 allocated_regs
, preferred_regs
);
3334 tcg_debug_assert(ts
->val_type
== TEMP_VAL_REG
);
3335 if (IS_DEAD_ARG(0)) {
3336 /* mov to a non-saved dead register makes no sense (even with
3337 liveness analysis disabled). */
3338 tcg_debug_assert(NEED_SYNC_ARG(0));
3339 if (!ots
->mem_allocated
) {
3340 temp_allocate_frame(s
, ots
);
3342 tcg_out_st(s
, otype
, ts
->reg
, ots
->mem_base
->reg
, ots
->mem_offset
);
3343 if (IS_DEAD_ARG(1)) {
3348 if (IS_DEAD_ARG(1) && ts
->kind
!= TEMP_FIXED
) {
3349 /* the mov can be suppressed */
3350 if (ots
->val_type
== TEMP_VAL_REG
) {
3351 s
->reg_to_temp
[ots
->reg
] = NULL
;
3356 if (ots
->val_type
!= TEMP_VAL_REG
) {
3357 /* When allocating a new register, make sure to not spill the
3359 tcg_regset_set_reg(allocated_regs
, ts
->reg
);
3360 ots
->reg
= tcg_reg_alloc(s
, tcg_target_available_regs
[otype
],
3361 allocated_regs
, preferred_regs
,
3362 ots
->indirect_base
);
3364 if (!tcg_out_mov(s
, otype
, ots
->reg
, ts
->reg
)) {
3366 * Cross register class move not supported.
3367 * Store the source register into the destination slot
3368 * and leave the destination temp as TEMP_VAL_MEM.
3370 assert(!temp_readonly(ots
));
3371 if (!ts
->mem_allocated
) {
3372 temp_allocate_frame(s
, ots
);
3374 tcg_out_st(s
, ts
->type
, ts
->reg
,
3375 ots
->mem_base
->reg
, ots
->mem_offset
);
3376 ots
->mem_coherent
= 1;
3377 temp_free_or_dead(s
, ots
, -1);
3381 ots
->val_type
= TEMP_VAL_REG
;
3382 ots
->mem_coherent
= 0;
3383 s
->reg_to_temp
[ots
->reg
] = ots
;
3384 if (NEED_SYNC_ARG(0)) {
3385 temp_sync(s
, ots
, allocated_regs
, 0, 0);
3391 * Specialized code generation for INDEX_op_dup_vec.
3393 static void tcg_reg_alloc_dup(TCGContext
*s
, const TCGOp
*op
)
3395 const TCGLifeData arg_life
= op
->life
;
3396 TCGRegSet dup_out_regs
, dup_in_regs
;
3398 TCGType itype
, vtype
;
3399 intptr_t endian_fixup
;
3403 ots
= arg_temp(op
->args
[0]);
3404 its
= arg_temp(op
->args
[1]);
3406 /* ENV should not be modified. */
3407 tcg_debug_assert(!temp_readonly(ots
));
3410 vece
= TCGOP_VECE(op
);
3411 vtype
= TCGOP_VECL(op
) + TCG_TYPE_V64
;
3413 if (its
->val_type
== TEMP_VAL_CONST
) {
3414 /* Propagate constant via movi -> dupi. */
3415 tcg_target_ulong val
= its
->val
;
3416 if (IS_DEAD_ARG(1)) {
3419 tcg_reg_alloc_do_movi(s
, ots
, val
, arg_life
, op
->output_pref
[0]);
3423 dup_out_regs
= tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[0].regs
;
3424 dup_in_regs
= tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[1].regs
;
3426 /* Allocate the output register now. */
3427 if (ots
->val_type
!= TEMP_VAL_REG
) {
3428 TCGRegSet allocated_regs
= s
->reserved_regs
;
3430 if (!IS_DEAD_ARG(1) && its
->val_type
== TEMP_VAL_REG
) {
3431 /* Make sure to not spill the input register. */
3432 tcg_regset_set_reg(allocated_regs
, its
->reg
);
3434 ots
->reg
= tcg_reg_alloc(s
, dup_out_regs
, allocated_regs
,
3435 op
->output_pref
[0], ots
->indirect_base
);
3436 ots
->val_type
= TEMP_VAL_REG
;
3437 ots
->mem_coherent
= 0;
3438 s
->reg_to_temp
[ots
->reg
] = ots
;
3441 switch (its
->val_type
) {
3444 * The dup constriaints must be broad, covering all possible VECE.
3445 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3446 * to fail, indicating that extra moves are required for that case.
3448 if (tcg_regset_test_reg(dup_in_regs
, its
->reg
)) {
3449 if (tcg_out_dup_vec(s
, vtype
, vece
, ots
->reg
, its
->reg
)) {
3452 /* Try again from memory or a vector input register. */
3454 if (!its
->mem_coherent
) {
3456 * The input register is not synced, and so an extra store
3457 * would be required to use memory. Attempt an integer-vector
3458 * register move first. We do not have a TCGRegSet for this.
3460 if (tcg_out_mov(s
, itype
, ots
->reg
, its
->reg
)) {
3463 /* Sync the temp back to its slot and load from there. */
3464 temp_sync(s
, its
, s
->reserved_regs
, 0, 0);
3469 #ifdef HOST_WORDS_BIGENDIAN
3470 endian_fixup
= itype
== TCG_TYPE_I32
? 4 : 8;
3471 endian_fixup
-= 1 << vece
;
3475 if (tcg_out_dupm_vec(s
, vtype
, vece
, ots
->reg
, its
->mem_base
->reg
,
3476 its
->mem_offset
+ endian_fixup
)) {
3479 tcg_out_ld(s
, itype
, ots
->reg
, its
->mem_base
->reg
, its
->mem_offset
);
3483 g_assert_not_reached();
3486 /* We now have a vector input register, so dup must succeed. */
3487 ok
= tcg_out_dup_vec(s
, vtype
, vece
, ots
->reg
, ots
->reg
);
3488 tcg_debug_assert(ok
);
3491 if (IS_DEAD_ARG(1)) {
3494 if (NEED_SYNC_ARG(0)) {
3495 temp_sync(s
, ots
, s
->reserved_regs
, 0, 0);
3497 if (IS_DEAD_ARG(0)) {
3502 static void tcg_reg_alloc_op(TCGContext
*s
, const TCGOp
*op
)
3504 const TCGLifeData arg_life
= op
->life
;
3505 const TCGOpDef
* const def
= &tcg_op_defs
[op
->opc
];
3506 TCGRegSet i_allocated_regs
;
3507 TCGRegSet o_allocated_regs
;
3508 int i
, k
, nb_iargs
, nb_oargs
;
3511 const TCGArgConstraint
*arg_ct
;
3513 TCGArg new_args
[TCG_MAX_OP_ARGS
];
3514 int const_args
[TCG_MAX_OP_ARGS
];
3516 nb_oargs
= def
->nb_oargs
;
3517 nb_iargs
= def
->nb_iargs
;
3519 /* copy constants */
3520 memcpy(new_args
+ nb_oargs
+ nb_iargs
,
3521 op
->args
+ nb_oargs
+ nb_iargs
,
3522 sizeof(TCGArg
) * def
->nb_cargs
);
3524 i_allocated_regs
= s
->reserved_regs
;
3525 o_allocated_regs
= s
->reserved_regs
;
3527 /* satisfy input constraints */
3528 for (k
= 0; k
< nb_iargs
; k
++) {
3529 TCGRegSet i_preferred_regs
, o_preferred_regs
;
3531 i
= def
->args_ct
[nb_oargs
+ k
].sort_index
;
3533 arg_ct
= &def
->args_ct
[i
];
3536 if (ts
->val_type
== TEMP_VAL_CONST
3537 && tcg_target_const_match(ts
->val
, ts
->type
, arg_ct
->ct
)) {
3538 /* constant is OK for instruction */
3540 new_args
[i
] = ts
->val
;
3544 i_preferred_regs
= o_preferred_regs
= 0;
3545 if (arg_ct
->ialias
) {
3546 o_preferred_regs
= op
->output_pref
[arg_ct
->alias_index
];
3549 * If the input is readonly, then it cannot also be an
3550 * output and aliased to itself. If the input is not
3551 * dead after the instruction, we must allocate a new
3552 * register and move it.
3554 if (temp_readonly(ts
) || !IS_DEAD_ARG(i
)) {
3555 goto allocate_in_reg
;
3559 * Check if the current register has already been allocated
3560 * for another input aliased to an output.
3562 if (ts
->val_type
== TEMP_VAL_REG
) {
3564 for (int k2
= 0; k2
< k
; k2
++) {
3565 int i2
= def
->args_ct
[nb_oargs
+ k2
].sort_index
;
3566 if (def
->args_ct
[i2
].ialias
&& reg
== new_args
[i2
]) {
3567 goto allocate_in_reg
;
3571 i_preferred_regs
= o_preferred_regs
;
3574 temp_load(s
, ts
, arg_ct
->regs
, i_allocated_regs
, i_preferred_regs
);
3577 if (!tcg_regset_test_reg(arg_ct
->regs
, reg
)) {
3580 * Allocate a new register matching the constraint
3581 * and move the temporary register into it.
3583 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
3584 i_allocated_regs
, 0);
3585 reg
= tcg_reg_alloc(s
, arg_ct
->regs
, i_allocated_regs
,
3586 o_preferred_regs
, ts
->indirect_base
);
3587 if (!tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
)) {
3589 * Cross register class move not supported. Sync the
3590 * temp back to its slot and load from there.
3592 temp_sync(s
, ts
, i_allocated_regs
, 0, 0);
3593 tcg_out_ld(s
, ts
->type
, reg
,
3594 ts
->mem_base
->reg
, ts
->mem_offset
);
3599 tcg_regset_set_reg(i_allocated_regs
, reg
);
3602 /* mark dead temporaries and free the associated registers */
3603 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3604 if (IS_DEAD_ARG(i
)) {
3605 temp_dead(s
, arg_temp(op
->args
[i
]));
3609 if (def
->flags
& TCG_OPF_COND_BRANCH
) {
3610 tcg_reg_alloc_cbranch(s
, i_allocated_regs
);
3611 } else if (def
->flags
& TCG_OPF_BB_END
) {
3612 tcg_reg_alloc_bb_end(s
, i_allocated_regs
);
3614 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
3615 /* XXX: permit generic clobber register list ? */
3616 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
3617 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
3618 tcg_reg_free(s
, i
, i_allocated_regs
);
3622 if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
3623 /* sync globals if the op has side effects and might trigger
3625 sync_globals(s
, i_allocated_regs
);
3628 /* satisfy the output constraints */
3629 for(k
= 0; k
< nb_oargs
; k
++) {
3630 i
= def
->args_ct
[k
].sort_index
;
3632 arg_ct
= &def
->args_ct
[i
];
3635 /* ENV should not be modified. */
3636 tcg_debug_assert(!temp_readonly(ts
));
3638 if (arg_ct
->oalias
&& !const_args
[arg_ct
->alias_index
]) {
3639 reg
= new_args
[arg_ct
->alias_index
];
3640 } else if (arg_ct
->newreg
) {
3641 reg
= tcg_reg_alloc(s
, arg_ct
->regs
,
3642 i_allocated_regs
| o_allocated_regs
,
3643 op
->output_pref
[k
], ts
->indirect_base
);
3645 reg
= tcg_reg_alloc(s
, arg_ct
->regs
, o_allocated_regs
,
3646 op
->output_pref
[k
], ts
->indirect_base
);
3648 tcg_regset_set_reg(o_allocated_regs
, reg
);
3649 if (ts
->val_type
== TEMP_VAL_REG
) {
3650 s
->reg_to_temp
[ts
->reg
] = NULL
;
3652 ts
->val_type
= TEMP_VAL_REG
;
3655 * Temp value is modified, so the value kept in memory is
3656 * potentially not the same.
3658 ts
->mem_coherent
= 0;
3659 s
->reg_to_temp
[reg
] = ts
;
3664 /* emit instruction */
3665 if (def
->flags
& TCG_OPF_VECTOR
) {
3666 tcg_out_vec_op(s
, op
->opc
, TCGOP_VECL(op
), TCGOP_VECE(op
),
3667 new_args
, const_args
);
3669 tcg_out_op(s
, op
->opc
, new_args
, const_args
);
3672 /* move the outputs in the correct register if needed */
3673 for(i
= 0; i
< nb_oargs
; i
++) {
3674 ts
= arg_temp(op
->args
[i
]);
3676 /* ENV should not be modified. */
3677 tcg_debug_assert(!temp_readonly(ts
));
3679 if (NEED_SYNC_ARG(i
)) {
3680 temp_sync(s
, ts
, o_allocated_regs
, 0, IS_DEAD_ARG(i
));
3681 } else if (IS_DEAD_ARG(i
)) {
3687 static bool tcg_reg_alloc_dup2(TCGContext
*s
, const TCGOp
*op
)
3689 const TCGLifeData arg_life
= op
->life
;
3690 TCGTemp
*ots
, *itsl
, *itsh
;
3691 TCGType vtype
= TCGOP_VECL(op
) + TCG_TYPE_V64
;
3693 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3694 tcg_debug_assert(TCG_TARGET_REG_BITS
== 32);
3695 tcg_debug_assert(TCGOP_VECE(op
) == MO_64
);
3697 ots
= arg_temp(op
->args
[0]);
3698 itsl
= arg_temp(op
->args
[1]);
3699 itsh
= arg_temp(op
->args
[2]);
3701 /* ENV should not be modified. */
3702 tcg_debug_assert(!temp_readonly(ots
));
3704 /* Allocate the output register now. */
3705 if (ots
->val_type
!= TEMP_VAL_REG
) {
3706 TCGRegSet allocated_regs
= s
->reserved_regs
;
3707 TCGRegSet dup_out_regs
=
3708 tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[0].regs
;
3710 /* Make sure to not spill the input registers. */
3711 if (!IS_DEAD_ARG(1) && itsl
->val_type
== TEMP_VAL_REG
) {
3712 tcg_regset_set_reg(allocated_regs
, itsl
->reg
);
3714 if (!IS_DEAD_ARG(2) && itsh
->val_type
== TEMP_VAL_REG
) {
3715 tcg_regset_set_reg(allocated_regs
, itsh
->reg
);
3718 ots
->reg
= tcg_reg_alloc(s
, dup_out_regs
, allocated_regs
,
3719 op
->output_pref
[0], ots
->indirect_base
);
3720 ots
->val_type
= TEMP_VAL_REG
;
3721 ots
->mem_coherent
= 0;
3722 s
->reg_to_temp
[ots
->reg
] = ots
;
3725 /* Promote dup2 of immediates to dupi_vec. */
3726 if (itsl
->val_type
== TEMP_VAL_CONST
&& itsh
->val_type
== TEMP_VAL_CONST
) {
3727 uint64_t val
= deposit64(itsl
->val
, 32, 32, itsh
->val
);
3730 if (val
== dup_const(MO_8
, val
)) {
3732 } else if (val
== dup_const(MO_16
, val
)) {
3734 } else if (val
== dup_const(MO_32
, val
)) {
3738 tcg_out_dupi_vec(s
, vtype
, vece
, ots
->reg
, val
);
3742 /* If the two inputs form one 64-bit value, try dupm_vec. */
3743 if (itsl
+ 1 == itsh
&& itsl
->base_type
== TCG_TYPE_I64
) {
3744 if (!itsl
->mem_coherent
) {
3745 temp_sync(s
, itsl
, s
->reserved_regs
, 0, 0);
3747 if (!itsh
->mem_coherent
) {
3748 temp_sync(s
, itsh
, s
->reserved_regs
, 0, 0);
3750 #ifdef HOST_WORDS_BIGENDIAN
3751 TCGTemp
*its
= itsh
;
3753 TCGTemp
*its
= itsl
;
3755 if (tcg_out_dupm_vec(s
, vtype
, MO_64
, ots
->reg
,
3756 its
->mem_base
->reg
, its
->mem_offset
)) {
3761 /* Fall back to generic expansion. */
3765 if (IS_DEAD_ARG(1)) {
3768 if (IS_DEAD_ARG(2)) {
3771 if (NEED_SYNC_ARG(0)) {
3772 temp_sync(s
, ots
, s
->reserved_regs
, 0, IS_DEAD_ARG(0));
3773 } else if (IS_DEAD_ARG(0)) {
3779 #ifdef TCG_TARGET_STACK_GROWSUP
3780 #define STACK_DIR(x) (-(x))
3782 #define STACK_DIR(x) (x)
3785 static void tcg_reg_alloc_call(TCGContext
*s
, TCGOp
*op
)
3787 const int nb_oargs
= TCGOP_CALLO(op
);
3788 const int nb_iargs
= TCGOP_CALLI(op
);
3789 const TCGLifeData arg_life
= op
->life
;
3790 int flags
, nb_regs
, i
;
3794 intptr_t stack_offset
;
3795 size_t call_stack_size
;
3796 tcg_insn_unit
*func_addr
;
3798 TCGRegSet allocated_regs
;
3800 func_addr
= (tcg_insn_unit
*)(intptr_t)op
->args
[nb_oargs
+ nb_iargs
];
3801 flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
3803 nb_regs
= ARRAY_SIZE(tcg_target_call_iarg_regs
);
3804 if (nb_regs
> nb_iargs
) {
3808 /* assign stack slots first */
3809 call_stack_size
= (nb_iargs
- nb_regs
) * sizeof(tcg_target_long
);
3810 call_stack_size
= (call_stack_size
+ TCG_TARGET_STACK_ALIGN
- 1) &
3811 ~(TCG_TARGET_STACK_ALIGN
- 1);
3812 allocate_args
= (call_stack_size
> TCG_STATIC_CALL_ARGS_SIZE
);
3813 if (allocate_args
) {
3814 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3815 preallocate call stack */
3819 stack_offset
= TCG_TARGET_CALL_STACK_OFFSET
;
3820 for (i
= nb_regs
; i
< nb_iargs
; i
++) {
3821 arg
= op
->args
[nb_oargs
+ i
];
3822 #ifdef TCG_TARGET_STACK_GROWSUP
3823 stack_offset
-= sizeof(tcg_target_long
);
3825 if (arg
!= TCG_CALL_DUMMY_ARG
) {
3827 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
3828 s
->reserved_regs
, 0);
3829 tcg_out_st(s
, ts
->type
, ts
->reg
, TCG_REG_CALL_STACK
, stack_offset
);
3831 #ifndef TCG_TARGET_STACK_GROWSUP
3832 stack_offset
+= sizeof(tcg_target_long
);
3836 /* assign input registers */
3837 allocated_regs
= s
->reserved_regs
;
3838 for (i
= 0; i
< nb_regs
; i
++) {
3839 arg
= op
->args
[nb_oargs
+ i
];
3840 if (arg
!= TCG_CALL_DUMMY_ARG
) {
3842 reg
= tcg_target_call_iarg_regs
[i
];
3844 if (ts
->val_type
== TEMP_VAL_REG
) {
3845 if (ts
->reg
!= reg
) {
3846 tcg_reg_free(s
, reg
, allocated_regs
);
3847 if (!tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
)) {
3849 * Cross register class move not supported. Sync the
3850 * temp back to its slot and load from there.
3852 temp_sync(s
, ts
, allocated_regs
, 0, 0);
3853 tcg_out_ld(s
, ts
->type
, reg
,
3854 ts
->mem_base
->reg
, ts
->mem_offset
);
3858 TCGRegSet arg_set
= 0;
3860 tcg_reg_free(s
, reg
, allocated_regs
);
3861 tcg_regset_set_reg(arg_set
, reg
);
3862 temp_load(s
, ts
, arg_set
, allocated_regs
, 0);
3865 tcg_regset_set_reg(allocated_regs
, reg
);
3869 /* mark dead temporaries and free the associated registers */
3870 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
3871 if (IS_DEAD_ARG(i
)) {
3872 temp_dead(s
, arg_temp(op
->args
[i
]));
3876 /* clobber call registers */
3877 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
3878 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
3879 tcg_reg_free(s
, i
, allocated_regs
);
3883 /* Save globals if they might be written by the helper, sync them if
3884 they might be read. */
3885 if (flags
& TCG_CALL_NO_READ_GLOBALS
) {
3887 } else if (flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
3888 sync_globals(s
, allocated_regs
);
3890 save_globals(s
, allocated_regs
);
3893 tcg_out_call(s
, func_addr
);
3895 /* assign output registers and emit moves if needed */
3896 for(i
= 0; i
< nb_oargs
; i
++) {
3900 /* ENV should not be modified. */
3901 tcg_debug_assert(!temp_readonly(ts
));
3903 reg
= tcg_target_call_oarg_regs
[i
];
3904 tcg_debug_assert(s
->reg_to_temp
[reg
] == NULL
);
3905 if (ts
->val_type
== TEMP_VAL_REG
) {
3906 s
->reg_to_temp
[ts
->reg
] = NULL
;
3908 ts
->val_type
= TEMP_VAL_REG
;
3910 ts
->mem_coherent
= 0;
3911 s
->reg_to_temp
[reg
] = ts
;
3912 if (NEED_SYNC_ARG(i
)) {
3913 temp_sync(s
, ts
, allocated_regs
, 0, IS_DEAD_ARG(i
));
3914 } else if (IS_DEAD_ARG(i
)) {
3920 #ifdef CONFIG_PROFILER
3922 /* avoid copy/paste errors */
3923 #define PROF_ADD(to, from, field) \
3925 (to)->field += qatomic_read(&((from)->field)); \
3928 #define PROF_MAX(to, from, field) \
3930 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
3931 if (val__ > (to)->field) { \
3932 (to)->field = val__; \
3936 /* Pass in a zero'ed @prof */
3938 void tcg_profile_snapshot(TCGProfile
*prof
, bool counters
, bool table
)
3940 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
3943 for (i
= 0; i
< n_ctxs
; i
++) {
3944 TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
3945 const TCGProfile
*orig
= &s
->prof
;
3948 PROF_ADD(prof
, orig
, cpu_exec_time
);
3949 PROF_ADD(prof
, orig
, tb_count1
);
3950 PROF_ADD(prof
, orig
, tb_count
);
3951 PROF_ADD(prof
, orig
, op_count
);
3952 PROF_MAX(prof
, orig
, op_count_max
);
3953 PROF_ADD(prof
, orig
, temp_count
);
3954 PROF_MAX(prof
, orig
, temp_count_max
);
3955 PROF_ADD(prof
, orig
, del_op_count
);
3956 PROF_ADD(prof
, orig
, code_in_len
);
3957 PROF_ADD(prof
, orig
, code_out_len
);
3958 PROF_ADD(prof
, orig
, search_out_len
);
3959 PROF_ADD(prof
, orig
, interm_time
);
3960 PROF_ADD(prof
, orig
, code_time
);
3961 PROF_ADD(prof
, orig
, la_time
);
3962 PROF_ADD(prof
, orig
, opt_time
);
3963 PROF_ADD(prof
, orig
, restore_count
);
3964 PROF_ADD(prof
, orig
, restore_time
);
3969 for (i
= 0; i
< NB_OPS
; i
++) {
3970 PROF_ADD(prof
, orig
, table_op_count
[i
]);
3979 static void tcg_profile_snapshot_counters(TCGProfile
*prof
)
3981 tcg_profile_snapshot(prof
, true, false);
3984 static void tcg_profile_snapshot_table(TCGProfile
*prof
)
3986 tcg_profile_snapshot(prof
, false, true);
3989 void tcg_dump_op_count(void)
3991 TCGProfile prof
= {};
3994 tcg_profile_snapshot_table(&prof
);
3995 for (i
= 0; i
< NB_OPS
; i
++) {
3996 qemu_printf("%s %" PRId64
"\n", tcg_op_defs
[i
].name
,
3997 prof
.table_op_count
[i
]);
4001 int64_t tcg_cpu_exec_time(void)
4003 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
4007 for (i
= 0; i
< n_ctxs
; i
++) {
4008 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
4009 const TCGProfile
*prof
= &s
->prof
;
4011 ret
+= qatomic_read(&prof
->cpu_exec_time
);
4016 void tcg_dump_op_count(void)
4018 qemu_printf("[TCG profiler not compiled]\n");
4021 int64_t tcg_cpu_exec_time(void)
4023 error_report("%s: TCG profiler not compiled", __func__
);
4029 int tcg_gen_code(TCGContext
*s
, TranslationBlock
*tb
)
4031 #ifdef CONFIG_PROFILER
4032 TCGProfile
*prof
= &s
->prof
;
4037 #ifdef CONFIG_PROFILER
4041 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
4044 qatomic_set(&prof
->op_count
, prof
->op_count
+ n
);
4045 if (n
> prof
->op_count_max
) {
4046 qatomic_set(&prof
->op_count_max
, n
);
4050 qatomic_set(&prof
->temp_count
, prof
->temp_count
+ n
);
4051 if (n
> prof
->temp_count_max
) {
4052 qatomic_set(&prof
->temp_count_max
, n
);
4058 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP
)
4059 && qemu_log_in_addr_range(tb
->pc
))) {
4060 FILE *logfile
= qemu_log_lock();
4062 tcg_dump_ops(s
, false);
4064 qemu_log_unlock(logfile
);
4068 #ifdef CONFIG_DEBUG_TCG
4069 /* Ensure all labels referenced have been emitted. */
4074 QSIMPLEQ_FOREACH(l
, &s
->labels
, next
) {
4075 if (unlikely(!l
->present
) && l
->refs
) {
4076 qemu_log_mask(CPU_LOG_TB_OP
,
4077 "$L%d referenced but not present.\n", l
->id
);
4085 #ifdef CONFIG_PROFILER
4086 qatomic_set(&prof
->opt_time
, prof
->opt_time
- profile_getclock());
4089 #ifdef USE_TCG_OPTIMIZATIONS
4093 #ifdef CONFIG_PROFILER
4094 qatomic_set(&prof
->opt_time
, prof
->opt_time
+ profile_getclock());
4095 qatomic_set(&prof
->la_time
, prof
->la_time
- profile_getclock());
4098 reachable_code_pass(s
);
4101 if (s
->nb_indirects
> 0) {
4103 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND
)
4104 && qemu_log_in_addr_range(tb
->pc
))) {
4105 FILE *logfile
= qemu_log_lock();
4106 qemu_log("OP before indirect lowering:\n");
4107 tcg_dump_ops(s
, false);
4109 qemu_log_unlock(logfile
);
4112 /* Replace indirect temps with direct temps. */
4113 if (liveness_pass_2(s
)) {
4114 /* If changes were made, re-run liveness. */
4119 #ifdef CONFIG_PROFILER
4120 qatomic_set(&prof
->la_time
, prof
->la_time
+ profile_getclock());
4124 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT
)
4125 && qemu_log_in_addr_range(tb
->pc
))) {
4126 FILE *logfile
= qemu_log_lock();
4127 qemu_log("OP after optimization and liveness analysis:\n");
4128 tcg_dump_ops(s
, true);
4130 qemu_log_unlock(logfile
);
4134 tcg_reg_alloc_start(s
);
4137 * Reset the buffer pointers when restarting after overflow.
4138 * TODO: Move this into translate-all.c with the rest of the
4139 * buffer management. Having only this done here is confusing.
4141 s
->code_buf
= tcg_splitwx_to_rw(tb
->tc
.ptr
);
4142 s
->code_ptr
= s
->code_buf
;
4144 #ifdef TCG_TARGET_NEED_LDST_LABELS
4145 QSIMPLEQ_INIT(&s
->ldst_labels
);
4147 #ifdef TCG_TARGET_NEED_POOL_LABELS
4148 s
->pool_labels
= NULL
;
4152 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
4153 TCGOpcode opc
= op
->opc
;
4155 #ifdef CONFIG_PROFILER
4156 qatomic_set(&prof
->table_op_count
[opc
], prof
->table_op_count
[opc
] + 1);
4160 case INDEX_op_mov_i32
:
4161 case INDEX_op_mov_i64
:
4162 case INDEX_op_mov_vec
:
4163 tcg_reg_alloc_mov(s
, op
);
4165 case INDEX_op_dup_vec
:
4166 tcg_reg_alloc_dup(s
, op
);
4168 case INDEX_op_insn_start
:
4169 if (num_insns
>= 0) {
4170 size_t off
= tcg_current_code_size(s
);
4171 s
->gen_insn_end_off
[num_insns
] = off
;
4172 /* Assert that we do not overflow our stored offset. */
4173 assert(s
->gen_insn_end_off
[num_insns
] == off
);
4176 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
4178 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4179 a
= deposit64(op
->args
[i
* 2], 32, 32, op
->args
[i
* 2 + 1]);
4183 s
->gen_insn_data
[num_insns
][i
] = a
;
4186 case INDEX_op_discard
:
4187 temp_dead(s
, arg_temp(op
->args
[0]));
4189 case INDEX_op_set_label
:
4190 tcg_reg_alloc_bb_end(s
, s
->reserved_regs
);
4191 tcg_out_label(s
, arg_label(op
->args
[0]));
4194 tcg_reg_alloc_call(s
, op
);
4196 case INDEX_op_dup2_vec
:
4197 if (tcg_reg_alloc_dup2(s
, op
)) {
4202 /* Sanity check that we've not introduced any unhandled opcodes. */
4203 tcg_debug_assert(tcg_op_supported(opc
));
4204 /* Note: in order to speed up the code, it would be much
4205 faster to have specialized register allocator functions for
4206 some common argument patterns */
4207 tcg_reg_alloc_op(s
, op
);
4210 #ifdef CONFIG_DEBUG_TCG
4213 /* Test for (pending) buffer overflow. The assumption is that any
4214 one operation beginning below the high water mark cannot overrun
4215 the buffer completely. Thus we can test for overflow after
4216 generating code without having to check during generation. */
4217 if (unlikely((void *)s
->code_ptr
> s
->code_gen_highwater
)) {
4220 /* Test for TB overflow, as seen by gen_insn_end_off. */
4221 if (unlikely(tcg_current_code_size(s
) > UINT16_MAX
)) {
4225 tcg_debug_assert(num_insns
>= 0);
4226 s
->gen_insn_end_off
[num_insns
] = tcg_current_code_size(s
);
4228 /* Generate TB finalization at the end of block */
4229 #ifdef TCG_TARGET_NEED_LDST_LABELS
4230 i
= tcg_out_ldst_finalize(s
);
4235 #ifdef TCG_TARGET_NEED_POOL_LABELS
4236 i
= tcg_out_pool_finalize(s
);
4241 if (!tcg_resolve_relocs(s
)) {
4245 #ifndef CONFIG_TCG_INTERPRETER
4246 /* flush instruction cache */
4247 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s
->code_buf
),
4248 (uintptr_t)s
->code_buf
,
4249 tcg_ptr_byte_diff(s
->code_ptr
, s
->code_buf
));
4252 return tcg_current_code_size(s
);
4255 #ifdef CONFIG_PROFILER
4256 void tcg_dump_info(void)
4258 TCGProfile prof
= {};
4259 const TCGProfile
*s
;
4261 int64_t tb_div_count
;
4264 tcg_profile_snapshot_counters(&prof
);
4266 tb_count
= s
->tb_count
;
4267 tb_div_count
= tb_count
? tb_count
: 1;
4268 tot
= s
->interm_time
+ s
->code_time
;
4270 qemu_printf("JIT cycles %" PRId64
" (%0.3f s at 2.4 GHz)\n",
4272 qemu_printf("translated TBs %" PRId64
" (aborted=%" PRId64
4274 tb_count
, s
->tb_count1
- tb_count
,
4275 (double)(s
->tb_count1
- s
->tb_count
)
4276 / (s
->tb_count1
? s
->tb_count1
: 1) * 100.0);
4277 qemu_printf("avg ops/TB %0.1f max=%d\n",
4278 (double)s
->op_count
/ tb_div_count
, s
->op_count_max
);
4279 qemu_printf("deleted ops/TB %0.2f\n",
4280 (double)s
->del_op_count
/ tb_div_count
);
4281 qemu_printf("avg temps/TB %0.2f max=%d\n",
4282 (double)s
->temp_count
/ tb_div_count
, s
->temp_count_max
);
4283 qemu_printf("avg host code/TB %0.1f\n",
4284 (double)s
->code_out_len
/ tb_div_count
);
4285 qemu_printf("avg search data/TB %0.1f\n",
4286 (double)s
->search_out_len
/ tb_div_count
);
4288 qemu_printf("cycles/op %0.1f\n",
4289 s
->op_count
? (double)tot
/ s
->op_count
: 0);
4290 qemu_printf("cycles/in byte %0.1f\n",
4291 s
->code_in_len
? (double)tot
/ s
->code_in_len
: 0);
4292 qemu_printf("cycles/out byte %0.1f\n",
4293 s
->code_out_len
? (double)tot
/ s
->code_out_len
: 0);
4294 qemu_printf("cycles/search byte %0.1f\n",
4295 s
->search_out_len
? (double)tot
/ s
->search_out_len
: 0);
4299 qemu_printf(" gen_interm time %0.1f%%\n",
4300 (double)s
->interm_time
/ tot
* 100.0);
4301 qemu_printf(" gen_code time %0.1f%%\n",
4302 (double)s
->code_time
/ tot
* 100.0);
4303 qemu_printf("optim./code time %0.1f%%\n",
4304 (double)s
->opt_time
/ (s
->code_time
? s
->code_time
: 1)
4306 qemu_printf("liveness/code time %0.1f%%\n",
4307 (double)s
->la_time
/ (s
->code_time
? s
->code_time
: 1) * 100.0);
4308 qemu_printf("cpu_restore count %" PRId64
"\n",
4310 qemu_printf(" avg cycles %0.1f\n",
4311 s
->restore_count
? (double)s
->restore_time
/ s
->restore_count
: 0);
4314 void tcg_dump_info(void)
4316 qemu_printf("[TCG profiler not compiled]\n");
4320 #ifdef ELF_HOST_MACHINE
4321 /* In order to use this feature, the backend needs to do three things:
4323 (1) Define ELF_HOST_MACHINE to indicate both what value to
4324 put into the ELF image and to indicate support for the feature.
4326 (2) Define tcg_register_jit. This should create a buffer containing
4327 the contents of a .debug_frame section that describes the post-
4328 prologue unwind info for the tcg machine.
4330 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4333 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4340 struct jit_code_entry
{
4341 struct jit_code_entry
*next_entry
;
4342 struct jit_code_entry
*prev_entry
;
4343 const void *symfile_addr
;
4344 uint64_t symfile_size
;
4347 struct jit_descriptor
{
4349 uint32_t action_flag
;
4350 struct jit_code_entry
*relevant_entry
;
4351 struct jit_code_entry
*first_entry
;
4354 void __jit_debug_register_code(void) __attribute__((noinline
));
4355 void __jit_debug_register_code(void)
4360 /* Must statically initialize the version, because GDB may check
4361 the version before we can set it. */
4362 struct jit_descriptor __jit_debug_descriptor
= { 1, 0, 0, 0 };
4364 /* End GDB interface. */
4366 static int find_string(const char *strtab
, const char *str
)
4368 const char *p
= strtab
+ 1;
4371 if (strcmp(p
, str
) == 0) {
4378 static void tcg_register_jit_int(const void *buf_ptr
, size_t buf_size
,
4379 const void *debug_frame
,
4380 size_t debug_frame_size
)
4382 struct __attribute__((packed
)) DebugInfo
{
4389 uintptr_t cu_low_pc
;
4390 uintptr_t cu_high_pc
;
4393 uintptr_t fn_low_pc
;
4394 uintptr_t fn_high_pc
;
4403 struct DebugInfo di
;
4408 struct ElfImage
*img
;
4410 static const struct ElfImage img_template
= {
4412 .e_ident
[EI_MAG0
] = ELFMAG0
,
4413 .e_ident
[EI_MAG1
] = ELFMAG1
,
4414 .e_ident
[EI_MAG2
] = ELFMAG2
,
4415 .e_ident
[EI_MAG3
] = ELFMAG3
,
4416 .e_ident
[EI_CLASS
] = ELF_CLASS
,
4417 .e_ident
[EI_DATA
] = ELF_DATA
,
4418 .e_ident
[EI_VERSION
] = EV_CURRENT
,
4420 .e_machine
= ELF_HOST_MACHINE
,
4421 .e_version
= EV_CURRENT
,
4422 .e_phoff
= offsetof(struct ElfImage
, phdr
),
4423 .e_shoff
= offsetof(struct ElfImage
, shdr
),
4424 .e_ehsize
= sizeof(ElfW(Shdr
)),
4425 .e_phentsize
= sizeof(ElfW(Phdr
)),
4427 .e_shentsize
= sizeof(ElfW(Shdr
)),
4428 .e_shnum
= ARRAY_SIZE(img
->shdr
),
4429 .e_shstrndx
= ARRAY_SIZE(img
->shdr
) - 1,
4430 #ifdef ELF_HOST_FLAGS
4431 .e_flags
= ELF_HOST_FLAGS
,
4434 .e_ident
[EI_OSABI
] = ELF_OSABI
,
4442 [0] = { .sh_type
= SHT_NULL
},
4443 /* Trick: The contents of code_gen_buffer are not present in
4444 this fake ELF file; that got allocated elsewhere. Therefore
4445 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4446 will not look for contents. We can record any address. */
4448 .sh_type
= SHT_NOBITS
,
4449 .sh_flags
= SHF_EXECINSTR
| SHF_ALLOC
,
4451 [2] = { /* .debug_info */
4452 .sh_type
= SHT_PROGBITS
,
4453 .sh_offset
= offsetof(struct ElfImage
, di
),
4454 .sh_size
= sizeof(struct DebugInfo
),
4456 [3] = { /* .debug_abbrev */
4457 .sh_type
= SHT_PROGBITS
,
4458 .sh_offset
= offsetof(struct ElfImage
, da
),
4459 .sh_size
= sizeof(img
->da
),
4461 [4] = { /* .debug_frame */
4462 .sh_type
= SHT_PROGBITS
,
4463 .sh_offset
= sizeof(struct ElfImage
),
4465 [5] = { /* .symtab */
4466 .sh_type
= SHT_SYMTAB
,
4467 .sh_offset
= offsetof(struct ElfImage
, sym
),
4468 .sh_size
= sizeof(img
->sym
),
4470 .sh_link
= ARRAY_SIZE(img
->shdr
) - 1,
4471 .sh_entsize
= sizeof(ElfW(Sym
)),
4473 [6] = { /* .strtab */
4474 .sh_type
= SHT_STRTAB
,
4475 .sh_offset
= offsetof(struct ElfImage
, str
),
4476 .sh_size
= sizeof(img
->str
),
4480 [1] = { /* code_gen_buffer */
4481 .st_info
= ELF_ST_INFO(STB_GLOBAL
, STT_FUNC
),
4486 .len
= sizeof(struct DebugInfo
) - 4,
4488 .ptr_size
= sizeof(void *),
4490 .cu_lang
= 0x8001, /* DW_LANG_Mips_Assembler */
4492 .fn_name
= "code_gen_buffer"
4495 1, /* abbrev number (the cu) */
4496 0x11, 1, /* DW_TAG_compile_unit, has children */
4497 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4498 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4499 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4500 0, 0, /* end of abbrev */
4501 2, /* abbrev number (the fn) */
4502 0x2e, 0, /* DW_TAG_subprogram, no children */
4503 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4504 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4505 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4506 0, 0, /* end of abbrev */
4507 0 /* no more abbrev */
4509 .str
= "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4510 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4513 /* We only need a single jit entry; statically allocate it. */
4514 static struct jit_code_entry one_entry
;
4516 uintptr_t buf
= (uintptr_t)buf_ptr
;
4517 size_t img_size
= sizeof(struct ElfImage
) + debug_frame_size
;
4518 DebugFrameHeader
*dfh
;
4520 img
= g_malloc(img_size
);
4521 *img
= img_template
;
4523 img
->phdr
.p_vaddr
= buf
;
4524 img
->phdr
.p_paddr
= buf
;
4525 img
->phdr
.p_memsz
= buf_size
;
4527 img
->shdr
[1].sh_name
= find_string(img
->str
, ".text");
4528 img
->shdr
[1].sh_addr
= buf
;
4529 img
->shdr
[1].sh_size
= buf_size
;
4531 img
->shdr
[2].sh_name
= find_string(img
->str
, ".debug_info");
4532 img
->shdr
[3].sh_name
= find_string(img
->str
, ".debug_abbrev");
4534 img
->shdr
[4].sh_name
= find_string(img
->str
, ".debug_frame");
4535 img
->shdr
[4].sh_size
= debug_frame_size
;
4537 img
->shdr
[5].sh_name
= find_string(img
->str
, ".symtab");
4538 img
->shdr
[6].sh_name
= find_string(img
->str
, ".strtab");
4540 img
->sym
[1].st_name
= find_string(img
->str
, "code_gen_buffer");
4541 img
->sym
[1].st_value
= buf
;
4542 img
->sym
[1].st_size
= buf_size
;
4544 img
->di
.cu_low_pc
= buf
;
4545 img
->di
.cu_high_pc
= buf
+ buf_size
;
4546 img
->di
.fn_low_pc
= buf
;
4547 img
->di
.fn_high_pc
= buf
+ buf_size
;
4549 dfh
= (DebugFrameHeader
*)(img
+ 1);
4550 memcpy(dfh
, debug_frame
, debug_frame_size
);
4551 dfh
->fde
.func_start
= buf
;
4552 dfh
->fde
.func_len
= buf_size
;
4555 /* Enable this block to be able to debug the ELF image file creation.
4556 One can use readelf, objdump, or other inspection utilities. */
4558 FILE *f
= fopen("/tmp/qemu.jit", "w+b");
4560 if (fwrite(img
, img_size
, 1, f
) != img_size
) {
4561 /* Avoid stupid unused return value warning for fwrite. */
4568 one_entry
.symfile_addr
= img
;
4569 one_entry
.symfile_size
= img_size
;
4571 __jit_debug_descriptor
.action_flag
= JIT_REGISTER_FN
;
4572 __jit_debug_descriptor
.relevant_entry
= &one_entry
;
4573 __jit_debug_descriptor
.first_entry
= &one_entry
;
4574 __jit_debug_register_code();
4577 /* No support for the feature. Provide the entry point expected by exec.c,
4578 and implement the internal function we declared earlier. */
4580 static void tcg_register_jit_int(const void *buf
, size_t size
,
4581 const void *debug_frame
,
4582 size_t debug_frame_size
)
4586 void tcg_register_jit(const void *buf
, size_t buf_size
)
4589 #endif /* ELF_HOST_MACHINE */
4591 #if !TCG_TARGET_MAYBE_vec
4592 void tcg_expand_vec_op(TCGOpcode o
, TCGType t
, unsigned e
, TCGArg a0
, ...)
4594 g_assert_not_reached();