2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
43 #define NO_CPU_IO_DEFS
45 #include "exec/exec-all.h"
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
51 #include "tcg/tcg-op.h"
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS ELFCLASS32
56 # define ELF_CLASS ELFCLASS64
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA ELFDATA2MSB
61 # define ELF_DATA ELFDATA2LSB
67 /* Forward declarations for functions declared in tcg-target.c.inc and
69 static void tcg_target_init(TCGContext
*s
);
70 static void tcg_target_qemu_prologue(TCGContext
*s
);
71 static bool patch_reloc(tcg_insn_unit
*code_ptr
, int type
,
72 intptr_t value
, intptr_t addend
);
74 /* The CIE and FDE header definitions will be common to all hosts. */
76 uint32_t len
__attribute__((aligned((sizeof(void *)))));
82 uint8_t return_column
;
85 typedef struct QEMU_PACKED
{
86 uint32_t len
__attribute__((aligned((sizeof(void *)))));
90 } DebugFrameFDEHeader
;
92 typedef struct QEMU_PACKED
{
94 DebugFrameFDEHeader fde
;
97 static void tcg_register_jit_int(const void *buf
, size_t size
,
98 const void *debug_frame
,
99 size_t debug_frame_size
)
100 __attribute__((unused
));
102 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
103 static void tcg_out_ld(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg1
,
105 static bool tcg_out_mov(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg
);
106 static void tcg_out_movi(TCGContext
*s
, TCGType type
,
107 TCGReg ret
, tcg_target_long arg
);
108 static void tcg_out_op(TCGContext
*s
, TCGOpcode opc
,
109 const TCGArg args
[TCG_MAX_OP_ARGS
],
110 const int const_args
[TCG_MAX_OP_ARGS
]);
111 #if TCG_TARGET_MAYBE_vec
112 static bool tcg_out_dup_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
113 TCGReg dst
, TCGReg src
);
114 static bool tcg_out_dupm_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
115 TCGReg dst
, TCGReg base
, intptr_t offset
);
116 static void tcg_out_dupi_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
117 TCGReg dst
, int64_t arg
);
118 static void tcg_out_vec_op(TCGContext
*s
, TCGOpcode opc
,
119 unsigned vecl
, unsigned vece
,
120 const TCGArg args
[TCG_MAX_OP_ARGS
],
121 const int const_args
[TCG_MAX_OP_ARGS
]);
123 static inline bool tcg_out_dup_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
124 TCGReg dst
, TCGReg src
)
126 g_assert_not_reached();
128 static inline bool tcg_out_dupm_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
129 TCGReg dst
, TCGReg base
, intptr_t offset
)
131 g_assert_not_reached();
133 static inline void tcg_out_dupi_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
134 TCGReg dst
, int64_t arg
)
136 g_assert_not_reached();
138 static inline void tcg_out_vec_op(TCGContext
*s
, TCGOpcode opc
,
139 unsigned vecl
, unsigned vece
,
140 const TCGArg args
[TCG_MAX_OP_ARGS
],
141 const int const_args
[TCG_MAX_OP_ARGS
])
143 g_assert_not_reached();
146 static void tcg_out_st(TCGContext
*s
, TCGType type
, TCGReg arg
, TCGReg arg1
,
148 static bool tcg_out_sti(TCGContext
*s
, TCGType type
, TCGArg val
,
149 TCGReg base
, intptr_t ofs
);
150 static void tcg_out_call(TCGContext
*s
, const tcg_insn_unit
*target
);
151 static int tcg_target_const_match(tcg_target_long val
, TCGType type
,
152 const TCGArgConstraint
*arg_ct
);
153 #ifdef TCG_TARGET_NEED_LDST_LABELS
154 static int tcg_out_ldst_finalize(TCGContext
*s
);
157 #define TCG_HIGHWATER 1024
159 static TCGContext
**tcg_ctxs
;
160 static unsigned int n_tcg_ctxs
;
161 TCGv_env cpu_env
= 0;
162 const void *tcg_code_gen_epilogue
;
163 uintptr_t tcg_splitwx_diff
;
165 #ifndef CONFIG_TCG_INTERPRETER
166 tcg_prologue_fn
*tcg_qemu_tb_exec
;
169 struct tcg_region_tree
{
172 /* padding to avoid false sharing is computed at run-time */
176 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
177 * dynamically allocate from as demand dictates. Given appropriate region
178 * sizing, this minimizes flushes even when some TCG threads generate a lot
179 * more code than others.
181 struct tcg_region_state
{
184 /* fields set at init time */
189 size_t size
; /* size of one region */
190 size_t stride
; /* .size + guard size */
192 /* fields protected by the lock */
193 size_t current
; /* current region index */
194 size_t agg_size_full
; /* aggregate size of full regions */
197 static struct tcg_region_state region
;
199 * This is an array of struct tcg_region_tree's, with padding.
200 * We use void * to simplify the computation of region_trees[i]; each
201 * struct is found every tree_size bytes.
203 static void *region_trees
;
204 static size_t tree_size
;
205 static TCGRegSet tcg_target_available_regs
[TCG_TYPE_COUNT
];
206 static TCGRegSet tcg_target_call_clobber_regs
;
208 #if TCG_TARGET_INSN_UNIT_SIZE == 1
209 static __attribute__((unused
)) inline void tcg_out8(TCGContext
*s
, uint8_t v
)
214 static __attribute__((unused
)) inline void tcg_patch8(tcg_insn_unit
*p
,
221 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
222 static __attribute__((unused
)) inline void tcg_out16(TCGContext
*s
, uint16_t v
)
224 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
227 tcg_insn_unit
*p
= s
->code_ptr
;
228 memcpy(p
, &v
, sizeof(v
));
229 s
->code_ptr
= p
+ (2 / TCG_TARGET_INSN_UNIT_SIZE
);
233 static __attribute__((unused
)) inline void tcg_patch16(tcg_insn_unit
*p
,
236 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
239 memcpy(p
, &v
, sizeof(v
));
244 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
245 static __attribute__((unused
)) inline void tcg_out32(TCGContext
*s
, uint32_t v
)
247 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
250 tcg_insn_unit
*p
= s
->code_ptr
;
251 memcpy(p
, &v
, sizeof(v
));
252 s
->code_ptr
= p
+ (4 / TCG_TARGET_INSN_UNIT_SIZE
);
256 static __attribute__((unused
)) inline void tcg_patch32(tcg_insn_unit
*p
,
259 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
262 memcpy(p
, &v
, sizeof(v
));
267 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
268 static __attribute__((unused
)) inline void tcg_out64(TCGContext
*s
, uint64_t v
)
270 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
273 tcg_insn_unit
*p
= s
->code_ptr
;
274 memcpy(p
, &v
, sizeof(v
));
275 s
->code_ptr
= p
+ (8 / TCG_TARGET_INSN_UNIT_SIZE
);
279 static __attribute__((unused
)) inline void tcg_patch64(tcg_insn_unit
*p
,
282 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
285 memcpy(p
, &v
, sizeof(v
));
290 /* label relocation processing */
292 static void tcg_out_reloc(TCGContext
*s
, tcg_insn_unit
*code_ptr
, int type
,
293 TCGLabel
*l
, intptr_t addend
)
295 TCGRelocation
*r
= tcg_malloc(sizeof(TCGRelocation
));
300 QSIMPLEQ_INSERT_TAIL(&l
->relocs
, r
, next
);
303 static void tcg_out_label(TCGContext
*s
, TCGLabel
*l
)
305 tcg_debug_assert(!l
->has_value
);
307 l
->u
.value_ptr
= tcg_splitwx_to_rx(s
->code_ptr
);
310 TCGLabel
*gen_new_label(void)
312 TCGContext
*s
= tcg_ctx
;
313 TCGLabel
*l
= tcg_malloc(sizeof(TCGLabel
));
315 memset(l
, 0, sizeof(TCGLabel
));
316 l
->id
= s
->nb_labels
++;
317 QSIMPLEQ_INIT(&l
->relocs
);
319 QSIMPLEQ_INSERT_TAIL(&s
->labels
, l
, next
);
324 static bool tcg_resolve_relocs(TCGContext
*s
)
328 QSIMPLEQ_FOREACH(l
, &s
->labels
, next
) {
330 uintptr_t value
= l
->u
.value
;
332 QSIMPLEQ_FOREACH(r
, &l
->relocs
, next
) {
333 if (!patch_reloc(r
->ptr
, r
->type
, value
, r
->addend
)) {
341 static void set_jmp_reset_offset(TCGContext
*s
, int which
)
344 * We will check for overflow at the end of the opcode loop in
345 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
347 s
->tb_jmp_reset_offset
[which
] = tcg_current_code_size(s
);
350 /* Signal overflow, starting over with fewer guest insns. */
351 static void QEMU_NORETURN
tcg_raise_tb_overflow(TCGContext
*s
)
353 siglongjmp(s
->jmp_trans
, -2);
356 #define C_PFX1(P, A) P##A
357 #define C_PFX2(P, A, B) P##A##_##B
358 #define C_PFX3(P, A, B, C) P##A##_##B##_##C
359 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
360 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
361 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
363 /* Define an enumeration for the various combinations. */
365 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
366 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
367 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
368 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
370 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
371 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
372 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
373 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
375 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
377 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
378 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
379 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
380 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
383 #include "tcg-target-con-set.h"
384 } TCGConstraintSetIndex
;
386 static TCGConstraintSetIndex
tcg_target_op_def(TCGOpcode
);
402 /* Put all of the constraint sets into an array, indexed by the enum. */
404 #define C_O0_I1(I1) { .args_ct_str = { #I1 } },
405 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
406 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
407 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
409 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
410 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
411 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
412 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
414 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
416 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
417 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
418 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
419 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
421 static const TCGTargetOpDef constraint_sets
[] = {
422 #include "tcg-target-con-set.h"
440 /* Expand the enumerator to be returned from tcg_target_op_def(). */
442 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
443 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
444 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
445 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
447 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
448 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
449 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
450 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
452 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
454 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
455 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
456 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
457 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
459 #include "tcg-target.c.inc"
461 /* compare a pointer @ptr and a tb_tc @s */
462 static int ptr_cmp_tb_tc(const void *ptr
, const struct tb_tc
*s
)
464 if (ptr
>= s
->ptr
+ s
->size
) {
466 } else if (ptr
< s
->ptr
) {
472 static gint
tb_tc_cmp(gconstpointer ap
, gconstpointer bp
)
474 const struct tb_tc
*a
= ap
;
475 const struct tb_tc
*b
= bp
;
478 * When both sizes are set, we know this isn't a lookup.
479 * This is the most likely case: every TB must be inserted; lookups
480 * are a lot less frequent.
482 if (likely(a
->size
&& b
->size
)) {
483 if (a
->ptr
> b
->ptr
) {
485 } else if (a
->ptr
< b
->ptr
) {
488 /* a->ptr == b->ptr should happen only on deletions */
489 g_assert(a
->size
== b
->size
);
493 * All lookups have either .size field set to 0.
494 * From the glib sources we see that @ap is always the lookup key. However
495 * the docs provide no guarantee, so we just mark this case as likely.
497 if (likely(a
->size
== 0)) {
498 return ptr_cmp_tb_tc(a
->ptr
, b
);
500 return ptr_cmp_tb_tc(b
->ptr
, a
);
503 static void tcg_region_trees_init(void)
507 tree_size
= ROUND_UP(sizeof(struct tcg_region_tree
), qemu_dcache_linesize
);
508 region_trees
= qemu_memalign(qemu_dcache_linesize
, region
.n
* tree_size
);
509 for (i
= 0; i
< region
.n
; i
++) {
510 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
512 qemu_mutex_init(&rt
->lock
);
513 rt
->tree
= g_tree_new(tb_tc_cmp
);
517 static struct tcg_region_tree
*tc_ptr_to_region_tree(const void *p
)
522 * Like tcg_splitwx_to_rw, with no assert. The pc may come from
523 * a signal handler over which the caller has no control.
525 if (!in_code_gen_buffer(p
)) {
526 p
-= tcg_splitwx_diff
;
527 if (!in_code_gen_buffer(p
)) {
532 if (p
< region
.start_aligned
) {
535 ptrdiff_t offset
= p
- region
.start_aligned
;
537 if (offset
> region
.stride
* (region
.n
- 1)) {
538 region_idx
= region
.n
- 1;
540 region_idx
= offset
/ region
.stride
;
543 return region_trees
+ region_idx
* tree_size
;
546 void tcg_tb_insert(TranslationBlock
*tb
)
548 struct tcg_region_tree
*rt
= tc_ptr_to_region_tree(tb
->tc
.ptr
);
550 g_assert(rt
!= NULL
);
551 qemu_mutex_lock(&rt
->lock
);
552 g_tree_insert(rt
->tree
, &tb
->tc
, tb
);
553 qemu_mutex_unlock(&rt
->lock
);
556 void tcg_tb_remove(TranslationBlock
*tb
)
558 struct tcg_region_tree
*rt
= tc_ptr_to_region_tree(tb
->tc
.ptr
);
560 g_assert(rt
!= NULL
);
561 qemu_mutex_lock(&rt
->lock
);
562 g_tree_remove(rt
->tree
, &tb
->tc
);
563 qemu_mutex_unlock(&rt
->lock
);
567 * Find the TB 'tb' such that
568 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
569 * Return NULL if not found.
571 TranslationBlock
*tcg_tb_lookup(uintptr_t tc_ptr
)
573 struct tcg_region_tree
*rt
= tc_ptr_to_region_tree((void *)tc_ptr
);
574 TranslationBlock
*tb
;
575 struct tb_tc s
= { .ptr
= (void *)tc_ptr
};
581 qemu_mutex_lock(&rt
->lock
);
582 tb
= g_tree_lookup(rt
->tree
, &s
);
583 qemu_mutex_unlock(&rt
->lock
);
587 static void tcg_region_tree_lock_all(void)
591 for (i
= 0; i
< region
.n
; i
++) {
592 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
594 qemu_mutex_lock(&rt
->lock
);
598 static void tcg_region_tree_unlock_all(void)
602 for (i
= 0; i
< region
.n
; i
++) {
603 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
605 qemu_mutex_unlock(&rt
->lock
);
609 void tcg_tb_foreach(GTraverseFunc func
, gpointer user_data
)
613 tcg_region_tree_lock_all();
614 for (i
= 0; i
< region
.n
; i
++) {
615 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
617 g_tree_foreach(rt
->tree
, func
, user_data
);
619 tcg_region_tree_unlock_all();
622 size_t tcg_nb_tbs(void)
627 tcg_region_tree_lock_all();
628 for (i
= 0; i
< region
.n
; i
++) {
629 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
631 nb_tbs
+= g_tree_nnodes(rt
->tree
);
633 tcg_region_tree_unlock_all();
637 static gboolean
tcg_region_tree_traverse(gpointer k
, gpointer v
, gpointer data
)
639 TranslationBlock
*tb
= v
;
645 static void tcg_region_tree_reset_all(void)
649 tcg_region_tree_lock_all();
650 for (i
= 0; i
< region
.n
; i
++) {
651 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
653 g_tree_foreach(rt
->tree
, tcg_region_tree_traverse
, NULL
);
654 /* Increment the refcount first so that destroy acts as a reset */
655 g_tree_ref(rt
->tree
);
656 g_tree_destroy(rt
->tree
);
658 tcg_region_tree_unlock_all();
661 static void tcg_region_bounds(size_t curr_region
, void **pstart
, void **pend
)
665 start
= region
.start_aligned
+ curr_region
* region
.stride
;
666 end
= start
+ region
.size
;
668 if (curr_region
== 0) {
669 start
= region
.start
;
671 if (curr_region
== region
.n
- 1) {
679 static void tcg_region_assign(TCGContext
*s
, size_t curr_region
)
683 tcg_region_bounds(curr_region
, &start
, &end
);
685 s
->code_gen_buffer
= start
;
686 s
->code_gen_ptr
= start
;
687 s
->code_gen_buffer_size
= end
- start
;
688 s
->code_gen_highwater
= end
- TCG_HIGHWATER
;
691 static bool tcg_region_alloc__locked(TCGContext
*s
)
693 if (region
.current
== region
.n
) {
696 tcg_region_assign(s
, region
.current
);
702 * Request a new region once the one in use has filled up.
703 * Returns true on error.
705 static bool tcg_region_alloc(TCGContext
*s
)
708 /* read the region size now; alloc__locked will overwrite it on success */
709 size_t size_full
= s
->code_gen_buffer_size
;
711 qemu_mutex_lock(®ion
.lock
);
712 err
= tcg_region_alloc__locked(s
);
714 region
.agg_size_full
+= size_full
- TCG_HIGHWATER
;
716 qemu_mutex_unlock(®ion
.lock
);
721 * Perform a context's first region allocation.
722 * This function does _not_ increment region.agg_size_full.
724 static inline bool tcg_region_initial_alloc__locked(TCGContext
*s
)
726 return tcg_region_alloc__locked(s
);
729 /* Call from a safe-work context */
730 void tcg_region_reset_all(void)
732 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
735 qemu_mutex_lock(®ion
.lock
);
737 region
.agg_size_full
= 0;
739 for (i
= 0; i
< n_ctxs
; i
++) {
740 TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
741 bool err
= tcg_region_initial_alloc__locked(s
);
745 qemu_mutex_unlock(®ion
.lock
);
747 tcg_region_tree_reset_all();
750 #ifdef CONFIG_USER_ONLY
751 static size_t tcg_n_regions(void)
757 * It is likely that some vCPUs will translate more code than others, so we
758 * first try to set more regions than max_cpus, with those regions being of
759 * reasonable size. If that's not possible we make do by evenly dividing
760 * the code_gen_buffer among the vCPUs.
762 static size_t tcg_n_regions(void)
766 /* Use a single region if all we have is one vCPU thread */
767 #if !defined(CONFIG_USER_ONLY)
768 MachineState
*ms
= MACHINE(qdev_get_machine());
769 unsigned int max_cpus
= ms
->smp
.max_cpus
;
771 if (max_cpus
== 1 || !qemu_tcg_mttcg_enabled()) {
775 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
776 for (i
= 8; i
> 0; i
--) {
777 size_t regions_per_thread
= i
;
780 region_size
= tcg_init_ctx
.code_gen_buffer_size
;
781 region_size
/= max_cpus
* regions_per_thread
;
783 if (region_size
>= 2 * 1024u * 1024) {
784 return max_cpus
* regions_per_thread
;
787 /* If we can't, then just allocate one region per vCPU thread */
793 * Initializes region partitioning.
795 * Called at init time from the parent thread (i.e. the one calling
796 * tcg_context_init), after the target's TCG globals have been set.
798 * Region partitioning works by splitting code_gen_buffer into separate regions,
799 * and then assigning regions to TCG threads so that the threads can translate
800 * code in parallel without synchronization.
802 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
803 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
804 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
805 * must have been parsed before calling this function, since it calls
806 * qemu_tcg_mttcg_enabled().
808 * In user-mode we use a single region. Having multiple regions in user-mode
809 * is not supported, because the number of vCPU threads (recall that each thread
810 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
811 * OS, and usually this number is huge (tens of thousands is not uncommon).
812 * Thus, given this large bound on the number of vCPU threads and the fact
813 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
814 * that the availability of at least one region per vCPU thread.
816 * However, this user-mode limitation is unlikely to be a significant problem
817 * in practice. Multi-threaded guests share most if not all of their translated
818 * code, which makes parallel code generation less appealing than in softmmu.
820 void tcg_region_init(void)
822 void *buf
= tcg_init_ctx
.code_gen_buffer
;
824 size_t size
= tcg_init_ctx
.code_gen_buffer_size
;
825 size_t page_size
= qemu_real_host_page_size
;
830 n_regions
= tcg_n_regions();
832 /* The first region will be 'aligned - buf' bytes larger than the others */
833 aligned
= QEMU_ALIGN_PTR_UP(buf
, page_size
);
834 g_assert(aligned
< tcg_init_ctx
.code_gen_buffer
+ size
);
836 * Make region_size a multiple of page_size, using aligned as the start.
837 * As a result of this we might end up with a few extra pages at the end of
838 * the buffer; we will assign those to the last region.
840 region_size
= (size
- (aligned
- buf
)) / n_regions
;
841 region_size
= QEMU_ALIGN_DOWN(region_size
, page_size
);
843 /* A region must have at least 2 pages; one code, one guard */
844 g_assert(region_size
>= 2 * page_size
);
846 /* init the region struct */
847 qemu_mutex_init(®ion
.lock
);
848 region
.n
= n_regions
;
849 region
.size
= region_size
- page_size
;
850 region
.stride
= region_size
;
852 region
.start_aligned
= aligned
;
853 /* page-align the end, since its last page will be a guard page */
854 region
.end
= QEMU_ALIGN_PTR_DOWN(buf
+ size
, page_size
);
855 /* account for that last guard page */
856 region
.end
-= page_size
;
859 * Set guard pages in the rw buffer, as that's the one into which
860 * buffer overruns could occur. Do not set guard pages in the rx
861 * buffer -- let that one use hugepages throughout.
863 for (i
= 0; i
< region
.n
; i
++) {
866 tcg_region_bounds(i
, &start
, &end
);
869 * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
870 * rejects a permission change from RWX -> NONE. Guard pages are
871 * nice for bug detection but are not essential; ignore any failure.
873 (void)qemu_mprotect_none(end
, page_size
);
876 tcg_region_trees_init();
878 /* In user-mode we support only one ctx, so do the initial allocation now */
879 #ifdef CONFIG_USER_ONLY
881 bool err
= tcg_region_initial_alloc__locked(tcg_ctx
);
888 #ifdef CONFIG_DEBUG_TCG
889 const void *tcg_splitwx_to_rx(void *rw
)
891 /* Pass NULL pointers unchanged. */
893 g_assert(in_code_gen_buffer(rw
));
894 rw
+= tcg_splitwx_diff
;
899 void *tcg_splitwx_to_rw(const void *rx
)
901 /* Pass NULL pointers unchanged. */
903 rx
-= tcg_splitwx_diff
;
904 /* Assert that we end with a pointer in the rw region. */
905 g_assert(in_code_gen_buffer(rx
));
909 #endif /* CONFIG_DEBUG_TCG */
911 static void alloc_tcg_plugin_context(TCGContext
*s
)
914 s
->plugin_tb
= g_new0(struct qemu_plugin_tb
, 1);
915 s
->plugin_tb
->insns
=
916 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn
);
921 * All TCG threads except the parent (i.e. the one that called tcg_context_init
922 * and registered the target's TCG globals) must register with this function
923 * before initiating translation.
925 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
926 * of tcg_region_init() for the reasoning behind this.
928 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
929 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
930 * is not used anymore for translation once this function is called.
932 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
933 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
935 #ifdef CONFIG_USER_ONLY
936 void tcg_register_thread(void)
938 tcg_ctx
= &tcg_init_ctx
;
941 void tcg_register_thread(void)
943 MachineState
*ms
= MACHINE(qdev_get_machine());
944 TCGContext
*s
= g_malloc(sizeof(*s
));
950 /* Relink mem_base. */
951 for (i
= 0, n
= tcg_init_ctx
.nb_globals
; i
< n
; ++i
) {
952 if (tcg_init_ctx
.temps
[i
].mem_base
) {
953 ptrdiff_t b
= tcg_init_ctx
.temps
[i
].mem_base
- tcg_init_ctx
.temps
;
954 tcg_debug_assert(b
>= 0 && b
< n
);
955 s
->temps
[i
].mem_base
= &s
->temps
[b
];
959 /* Claim an entry in tcg_ctxs */
960 n
= qatomic_fetch_inc(&n_tcg_ctxs
);
961 g_assert(n
< ms
->smp
.max_cpus
);
962 qatomic_set(&tcg_ctxs
[n
], s
);
965 alloc_tcg_plugin_context(s
);
969 qemu_mutex_lock(®ion
.lock
);
970 err
= tcg_region_initial_alloc__locked(tcg_ctx
);
972 qemu_mutex_unlock(®ion
.lock
);
974 #endif /* !CONFIG_USER_ONLY */
977 * Returns the size (in bytes) of all translated code (i.e. from all regions)
978 * currently in the cache.
979 * See also: tcg_code_capacity()
980 * Do not confuse with tcg_current_code_size(); that one applies to a single
983 size_t tcg_code_size(void)
985 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
989 qemu_mutex_lock(®ion
.lock
);
990 total
= region
.agg_size_full
;
991 for (i
= 0; i
< n_ctxs
; i
++) {
992 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
995 size
= qatomic_read(&s
->code_gen_ptr
) - s
->code_gen_buffer
;
996 g_assert(size
<= s
->code_gen_buffer_size
);
999 qemu_mutex_unlock(®ion
.lock
);
1004 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
1006 * See also: tcg_code_size()
1008 size_t tcg_code_capacity(void)
1010 size_t guard_size
, capacity
;
1012 /* no need for synchronization; these variables are set at init time */
1013 guard_size
= region
.stride
- region
.size
;
1014 capacity
= region
.end
+ guard_size
- region
.start
;
1015 capacity
-= region
.n
* (guard_size
+ TCG_HIGHWATER
);
1019 size_t tcg_tb_phys_invalidate_count(void)
1021 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
1025 for (i
= 0; i
< n_ctxs
; i
++) {
1026 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
1028 total
+= qatomic_read(&s
->tb_phys_invalidate_count
);
1033 /* pool based memory allocation */
1034 void *tcg_malloc_internal(TCGContext
*s
, int size
)
1039 if (size
> TCG_POOL_CHUNK_SIZE
) {
1040 /* big malloc: insert a new pool (XXX: could optimize) */
1041 p
= g_malloc(sizeof(TCGPool
) + size
);
1043 p
->next
= s
->pool_first_large
;
1044 s
->pool_first_large
= p
;
1047 p
= s
->pool_current
;
1055 pool_size
= TCG_POOL_CHUNK_SIZE
;
1056 p
= g_malloc(sizeof(TCGPool
) + pool_size
);
1057 p
->size
= pool_size
;
1059 if (s
->pool_current
)
1060 s
->pool_current
->next
= p
;
1068 s
->pool_current
= p
;
1069 s
->pool_cur
= p
->data
+ size
;
1070 s
->pool_end
= p
->data
+ p
->size
;
1074 void tcg_pool_reset(TCGContext
*s
)
1077 for (p
= s
->pool_first_large
; p
; p
= t
) {
1081 s
->pool_first_large
= NULL
;
1082 s
->pool_cur
= s
->pool_end
= NULL
;
1083 s
->pool_current
= NULL
;
1086 typedef struct TCGHelperInfo
{
1093 #include "exec/helper-proto.h"
1095 static const TCGHelperInfo all_helpers
[] = {
1096 #include "exec/helper-tcg.h"
1098 static GHashTable
*helper_table
;
1100 static int indirect_reg_alloc_order
[ARRAY_SIZE(tcg_target_reg_alloc_order
)];
1101 static void process_op_defs(TCGContext
*s
);
1102 static TCGTemp
*tcg_global_reg_new_internal(TCGContext
*s
, TCGType type
,
1103 TCGReg reg
, const char *name
);
1105 void tcg_context_init(TCGContext
*s
)
1107 int op
, total_args
, n
, i
;
1109 TCGArgConstraint
*args_ct
;
1112 memset(s
, 0, sizeof(*s
));
1115 /* Count total number of arguments and allocate the corresponding
1118 for(op
= 0; op
< NB_OPS
; op
++) {
1119 def
= &tcg_op_defs
[op
];
1120 n
= def
->nb_iargs
+ def
->nb_oargs
;
1124 args_ct
= g_new0(TCGArgConstraint
, total_args
);
1126 for(op
= 0; op
< NB_OPS
; op
++) {
1127 def
= &tcg_op_defs
[op
];
1128 def
->args_ct
= args_ct
;
1129 n
= def
->nb_iargs
+ def
->nb_oargs
;
1133 /* Register helpers. */
1134 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
1135 helper_table
= g_hash_table_new(NULL
, NULL
);
1137 for (i
= 0; i
< ARRAY_SIZE(all_helpers
); ++i
) {
1138 g_hash_table_insert(helper_table
, (gpointer
)all_helpers
[i
].func
,
1139 (gpointer
)&all_helpers
[i
]);
1145 /* Reverse the order of the saved registers, assuming they're all at
1146 the start of tcg_target_reg_alloc_order. */
1147 for (n
= 0; n
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++n
) {
1148 int r
= tcg_target_reg_alloc_order
[n
];
1149 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, r
)) {
1153 for (i
= 0; i
< n
; ++i
) {
1154 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[n
- 1 - i
];
1156 for (; i
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++i
) {
1157 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[i
];
1160 alloc_tcg_plugin_context(s
);
1164 * In user-mode we simply share the init context among threads, since we
1165 * use a single region. See the documentation tcg_region_init() for the
1166 * reasoning behind this.
1167 * In softmmu we will have at most max_cpus TCG threads.
1169 #ifdef CONFIG_USER_ONLY
1170 tcg_ctxs
= &tcg_ctx
;
1173 MachineState
*ms
= MACHINE(qdev_get_machine());
1174 unsigned int max_cpus
= ms
->smp
.max_cpus
;
1175 tcg_ctxs
= g_new(TCGContext
*, max_cpus
);
1178 tcg_debug_assert(!tcg_regset_test_reg(s
->reserved_regs
, TCG_AREG0
));
1179 ts
= tcg_global_reg_new_internal(s
, TCG_TYPE_PTR
, TCG_AREG0
, "env");
1180 cpu_env
= temp_tcgv_ptr(ts
);
1184 * Allocate TBs right before their corresponding translated code, making
1185 * sure that TBs and code are on different cache lines.
1187 TranslationBlock
*tcg_tb_alloc(TCGContext
*s
)
1189 uintptr_t align
= qemu_icache_linesize
;
1190 TranslationBlock
*tb
;
1194 tb
= (void *)ROUND_UP((uintptr_t)s
->code_gen_ptr
, align
);
1195 next
= (void *)ROUND_UP((uintptr_t)(tb
+ 1), align
);
1197 if (unlikely(next
> s
->code_gen_highwater
)) {
1198 if (tcg_region_alloc(s
)) {
1203 qatomic_set(&s
->code_gen_ptr
, next
);
1204 s
->data_gen_ptr
= NULL
;
1208 void tcg_prologue_init(TCGContext
*s
)
1210 size_t prologue_size
, total_size
;
1213 /* Put the prologue at the beginning of code_gen_buffer. */
1214 buf0
= s
->code_gen_buffer
;
1215 total_size
= s
->code_gen_buffer_size
;
1218 s
->data_gen_ptr
= NULL
;
1221 * The region trees are not yet configured, but tcg_splitwx_to_rx
1222 * needs the bounds for an assert.
1224 region
.start
= buf0
;
1225 region
.end
= buf0
+ total_size
;
1227 #ifndef CONFIG_TCG_INTERPRETER
1228 tcg_qemu_tb_exec
= (tcg_prologue_fn
*)tcg_splitwx_to_rx(buf0
);
1231 /* Compute a high-water mark, at which we voluntarily flush the buffer
1232 and start over. The size here is arbitrary, significantly larger
1233 than we expect the code generation for any one opcode to require. */
1234 s
->code_gen_highwater
= s
->code_gen_buffer
+ (total_size
- TCG_HIGHWATER
);
1236 #ifdef TCG_TARGET_NEED_POOL_LABELS
1237 s
->pool_labels
= NULL
;
1240 qemu_thread_jit_write();
1241 /* Generate the prologue. */
1242 tcg_target_qemu_prologue(s
);
1244 #ifdef TCG_TARGET_NEED_POOL_LABELS
1245 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1247 int result
= tcg_out_pool_finalize(s
);
1248 tcg_debug_assert(result
== 0);
1253 #ifndef CONFIG_TCG_INTERPRETER
1254 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0
), (uintptr_t)buf0
,
1255 tcg_ptr_byte_diff(buf1
, buf0
));
1258 /* Deduct the prologue from the buffer. */
1259 prologue_size
= tcg_current_code_size(s
);
1260 s
->code_gen_ptr
= buf1
;
1261 s
->code_gen_buffer
= buf1
;
1263 total_size
-= prologue_size
;
1264 s
->code_gen_buffer_size
= total_size
;
1266 tcg_register_jit(tcg_splitwx_to_rx(s
->code_gen_buffer
), total_size
);
1269 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM
)) {
1270 FILE *logfile
= qemu_log_lock();
1271 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size
);
1272 if (s
->data_gen_ptr
) {
1273 size_t code_size
= s
->data_gen_ptr
- buf0
;
1274 size_t data_size
= prologue_size
- code_size
;
1277 log_disas(buf0
, code_size
);
1279 for (i
= 0; i
< data_size
; i
+= sizeof(tcg_target_ulong
)) {
1280 if (sizeof(tcg_target_ulong
) == 8) {
1281 qemu_log("0x%08" PRIxPTR
": .quad 0x%016" PRIx64
"\n",
1282 (uintptr_t)s
->data_gen_ptr
+ i
,
1283 *(uint64_t *)(s
->data_gen_ptr
+ i
));
1285 qemu_log("0x%08" PRIxPTR
": .long 0x%08x\n",
1286 (uintptr_t)s
->data_gen_ptr
+ i
,
1287 *(uint32_t *)(s
->data_gen_ptr
+ i
));
1291 log_disas(buf0
, prologue_size
);
1295 qemu_log_unlock(logfile
);
1299 /* Assert that goto_ptr is implemented completely. */
1300 if (TCG_TARGET_HAS_goto_ptr
) {
1301 tcg_debug_assert(tcg_code_gen_epilogue
!= NULL
);
1305 void tcg_func_start(TCGContext
*s
)
1308 s
->nb_temps
= s
->nb_globals
;
1310 /* No temps have been previously allocated for size or locality. */
1311 memset(s
->free_temps
, 0, sizeof(s
->free_temps
));
1313 /* No constant temps have been previously allocated. */
1314 for (int i
= 0; i
< TCG_TYPE_COUNT
; ++i
) {
1315 if (s
->const_table
[i
]) {
1316 g_hash_table_remove_all(s
->const_table
[i
]);
1322 s
->current_frame_offset
= s
->frame_start
;
1324 #ifdef CONFIG_DEBUG_TCG
1325 s
->goto_tb_issue_mask
= 0;
1328 QTAILQ_INIT(&s
->ops
);
1329 QTAILQ_INIT(&s
->free_ops
);
1330 QSIMPLEQ_INIT(&s
->labels
);
1333 static TCGTemp
*tcg_temp_alloc(TCGContext
*s
)
1335 int n
= s
->nb_temps
++;
1337 if (n
>= TCG_MAX_TEMPS
) {
1338 tcg_raise_tb_overflow(s
);
1340 return memset(&s
->temps
[n
], 0, sizeof(TCGTemp
));
1343 static TCGTemp
*tcg_global_alloc(TCGContext
*s
)
1347 tcg_debug_assert(s
->nb_globals
== s
->nb_temps
);
1348 tcg_debug_assert(s
->nb_globals
< TCG_MAX_TEMPS
);
1350 ts
= tcg_temp_alloc(s
);
1351 ts
->kind
= TEMP_GLOBAL
;
1356 static TCGTemp
*tcg_global_reg_new_internal(TCGContext
*s
, TCGType type
,
1357 TCGReg reg
, const char *name
)
1361 if (TCG_TARGET_REG_BITS
== 32 && type
!= TCG_TYPE_I32
) {
1365 ts
= tcg_global_alloc(s
);
1366 ts
->base_type
= type
;
1368 ts
->kind
= TEMP_FIXED
;
1371 tcg_regset_set_reg(s
->reserved_regs
, reg
);
1376 void tcg_set_frame(TCGContext
*s
, TCGReg reg
, intptr_t start
, intptr_t size
)
1378 s
->frame_start
= start
;
1379 s
->frame_end
= start
+ size
;
1381 = tcg_global_reg_new_internal(s
, TCG_TYPE_PTR
, reg
, "_frame");
1384 TCGTemp
*tcg_global_mem_new_internal(TCGType type
, TCGv_ptr base
,
1385 intptr_t offset
, const char *name
)
1387 TCGContext
*s
= tcg_ctx
;
1388 TCGTemp
*base_ts
= tcgv_ptr_temp(base
);
1389 TCGTemp
*ts
= tcg_global_alloc(s
);
1390 int indirect_reg
= 0, bigendian
= 0;
1391 #ifdef HOST_WORDS_BIGENDIAN
1395 switch (base_ts
->kind
) {
1399 /* We do not support double-indirect registers. */
1400 tcg_debug_assert(!base_ts
->indirect_reg
);
1401 base_ts
->indirect_base
= 1;
1402 s
->nb_indirects
+= (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
1407 g_assert_not_reached();
1410 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1411 TCGTemp
*ts2
= tcg_global_alloc(s
);
1414 ts
->base_type
= TCG_TYPE_I64
;
1415 ts
->type
= TCG_TYPE_I32
;
1416 ts
->indirect_reg
= indirect_reg
;
1417 ts
->mem_allocated
= 1;
1418 ts
->mem_base
= base_ts
;
1419 ts
->mem_offset
= offset
+ bigendian
* 4;
1420 pstrcpy(buf
, sizeof(buf
), name
);
1421 pstrcat(buf
, sizeof(buf
), "_0");
1422 ts
->name
= strdup(buf
);
1424 tcg_debug_assert(ts2
== ts
+ 1);
1425 ts2
->base_type
= TCG_TYPE_I64
;
1426 ts2
->type
= TCG_TYPE_I32
;
1427 ts2
->indirect_reg
= indirect_reg
;
1428 ts2
->mem_allocated
= 1;
1429 ts2
->mem_base
= base_ts
;
1430 ts2
->mem_offset
= offset
+ (1 - bigendian
) * 4;
1431 pstrcpy(buf
, sizeof(buf
), name
);
1432 pstrcat(buf
, sizeof(buf
), "_1");
1433 ts2
->name
= strdup(buf
);
1435 ts
->base_type
= type
;
1437 ts
->indirect_reg
= indirect_reg
;
1438 ts
->mem_allocated
= 1;
1439 ts
->mem_base
= base_ts
;
1440 ts
->mem_offset
= offset
;
1446 TCGTemp
*tcg_temp_new_internal(TCGType type
, bool temp_local
)
1448 TCGContext
*s
= tcg_ctx
;
1449 TCGTempKind kind
= temp_local
? TEMP_LOCAL
: TEMP_NORMAL
;
1453 k
= type
+ (temp_local
? TCG_TYPE_COUNT
: 0);
1454 idx
= find_first_bit(s
->free_temps
[k
].l
, TCG_MAX_TEMPS
);
1455 if (idx
< TCG_MAX_TEMPS
) {
1456 /* There is already an available temp with the right type. */
1457 clear_bit(idx
, s
->free_temps
[k
].l
);
1459 ts
= &s
->temps
[idx
];
1460 ts
->temp_allocated
= 1;
1461 tcg_debug_assert(ts
->base_type
== type
);
1462 tcg_debug_assert(ts
->kind
== kind
);
1464 ts
= tcg_temp_alloc(s
);
1465 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1466 TCGTemp
*ts2
= tcg_temp_alloc(s
);
1468 ts
->base_type
= type
;
1469 ts
->type
= TCG_TYPE_I32
;
1470 ts
->temp_allocated
= 1;
1473 tcg_debug_assert(ts2
== ts
+ 1);
1474 ts2
->base_type
= TCG_TYPE_I64
;
1475 ts2
->type
= TCG_TYPE_I32
;
1476 ts2
->temp_allocated
= 1;
1479 ts
->base_type
= type
;
1481 ts
->temp_allocated
= 1;
1486 #if defined(CONFIG_DEBUG_TCG)
1492 TCGv_vec
tcg_temp_new_vec(TCGType type
)
1496 #ifdef CONFIG_DEBUG_TCG
1499 assert(TCG_TARGET_HAS_v64
);
1502 assert(TCG_TARGET_HAS_v128
);
1505 assert(TCG_TARGET_HAS_v256
);
1508 g_assert_not_reached();
1512 t
= tcg_temp_new_internal(type
, 0);
1513 return temp_tcgv_vec(t
);
1516 /* Create a new temp of the same type as an existing temp. */
1517 TCGv_vec
tcg_temp_new_vec_matching(TCGv_vec match
)
1519 TCGTemp
*t
= tcgv_vec_temp(match
);
1521 tcg_debug_assert(t
->temp_allocated
!= 0);
1523 t
= tcg_temp_new_internal(t
->base_type
, 0);
1524 return temp_tcgv_vec(t
);
1527 void tcg_temp_free_internal(TCGTemp
*ts
)
1529 TCGContext
*s
= tcg_ctx
;
1532 /* In order to simplify users of tcg_constant_*, silently ignore free. */
1533 if (ts
->kind
== TEMP_CONST
) {
1537 #if defined(CONFIG_DEBUG_TCG)
1539 if (s
->temps_in_use
< 0) {
1540 fprintf(stderr
, "More temporaries freed than allocated!\n");
1544 tcg_debug_assert(ts
->kind
< TEMP_GLOBAL
);
1545 tcg_debug_assert(ts
->temp_allocated
!= 0);
1546 ts
->temp_allocated
= 0;
1549 k
= ts
->base_type
+ (ts
->kind
== TEMP_NORMAL
? 0 : TCG_TYPE_COUNT
);
1550 set_bit(idx
, s
->free_temps
[k
].l
);
1553 TCGTemp
*tcg_constant_internal(TCGType type
, int64_t val
)
1555 TCGContext
*s
= tcg_ctx
;
1556 GHashTable
*h
= s
->const_table
[type
];
1560 h
= g_hash_table_new(g_int64_hash
, g_int64_equal
);
1561 s
->const_table
[type
] = h
;
1564 ts
= g_hash_table_lookup(h
, &val
);
1566 ts
= tcg_temp_alloc(s
);
1568 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1569 TCGTemp
*ts2
= tcg_temp_alloc(s
);
1571 ts
->base_type
= TCG_TYPE_I64
;
1572 ts
->type
= TCG_TYPE_I32
;
1573 ts
->kind
= TEMP_CONST
;
1574 ts
->temp_allocated
= 1;
1576 * Retain the full value of the 64-bit constant in the low
1577 * part, so that the hash table works. Actual uses will
1578 * truncate the value to the low part.
1582 tcg_debug_assert(ts2
== ts
+ 1);
1583 ts2
->base_type
= TCG_TYPE_I64
;
1584 ts2
->type
= TCG_TYPE_I32
;
1585 ts2
->kind
= TEMP_CONST
;
1586 ts2
->temp_allocated
= 1;
1587 ts2
->val
= val
>> 32;
1589 ts
->base_type
= type
;
1591 ts
->kind
= TEMP_CONST
;
1592 ts
->temp_allocated
= 1;
1595 g_hash_table_insert(h
, &ts
->val
, ts
);
1601 TCGv_vec
tcg_constant_vec(TCGType type
, unsigned vece
, int64_t val
)
1603 val
= dup_const(vece
, val
);
1604 return temp_tcgv_vec(tcg_constant_internal(type
, val
));
1607 TCGv_vec
tcg_constant_vec_matching(TCGv_vec match
, unsigned vece
, int64_t val
)
1609 TCGTemp
*t
= tcgv_vec_temp(match
);
1611 tcg_debug_assert(t
->temp_allocated
!= 0);
1612 return tcg_constant_vec(t
->base_type
, vece
, val
);
1615 TCGv_i32
tcg_const_i32(int32_t val
)
1618 t0
= tcg_temp_new_i32();
1619 tcg_gen_movi_i32(t0
, val
);
1623 TCGv_i64
tcg_const_i64(int64_t val
)
1626 t0
= tcg_temp_new_i64();
1627 tcg_gen_movi_i64(t0
, val
);
1631 TCGv_i32
tcg_const_local_i32(int32_t val
)
1634 t0
= tcg_temp_local_new_i32();
1635 tcg_gen_movi_i32(t0
, val
);
1639 TCGv_i64
tcg_const_local_i64(int64_t val
)
1642 t0
= tcg_temp_local_new_i64();
1643 tcg_gen_movi_i64(t0
, val
);
1647 #if defined(CONFIG_DEBUG_TCG)
1648 void tcg_clear_temp_count(void)
1650 TCGContext
*s
= tcg_ctx
;
1651 s
->temps_in_use
= 0;
1654 int tcg_check_temp_count(void)
1656 TCGContext
*s
= tcg_ctx
;
1657 if (s
->temps_in_use
) {
1658 /* Clear the count so that we don't give another
1659 * warning immediately next time around.
1661 s
->temps_in_use
= 0;
1668 /* Return true if OP may appear in the opcode stream.
1669 Test the runtime variable that controls each opcode. */
1670 bool tcg_op_supported(TCGOpcode op
)
1673 = TCG_TARGET_HAS_v64
| TCG_TARGET_HAS_v128
| TCG_TARGET_HAS_v256
;
1676 case INDEX_op_discard
:
1677 case INDEX_op_set_label
:
1681 case INDEX_op_insn_start
:
1682 case INDEX_op_exit_tb
:
1683 case INDEX_op_goto_tb
:
1684 case INDEX_op_qemu_ld_i32
:
1685 case INDEX_op_qemu_st_i32
:
1686 case INDEX_op_qemu_ld_i64
:
1687 case INDEX_op_qemu_st_i64
:
1690 case INDEX_op_qemu_st8_i32
:
1691 return TCG_TARGET_HAS_qemu_st8_i32
;
1693 case INDEX_op_goto_ptr
:
1694 return TCG_TARGET_HAS_goto_ptr
;
1696 case INDEX_op_mov_i32
:
1697 case INDEX_op_setcond_i32
:
1698 case INDEX_op_brcond_i32
:
1699 case INDEX_op_ld8u_i32
:
1700 case INDEX_op_ld8s_i32
:
1701 case INDEX_op_ld16u_i32
:
1702 case INDEX_op_ld16s_i32
:
1703 case INDEX_op_ld_i32
:
1704 case INDEX_op_st8_i32
:
1705 case INDEX_op_st16_i32
:
1706 case INDEX_op_st_i32
:
1707 case INDEX_op_add_i32
:
1708 case INDEX_op_sub_i32
:
1709 case INDEX_op_mul_i32
:
1710 case INDEX_op_and_i32
:
1711 case INDEX_op_or_i32
:
1712 case INDEX_op_xor_i32
:
1713 case INDEX_op_shl_i32
:
1714 case INDEX_op_shr_i32
:
1715 case INDEX_op_sar_i32
:
1718 case INDEX_op_movcond_i32
:
1719 return TCG_TARGET_HAS_movcond_i32
;
1720 case INDEX_op_div_i32
:
1721 case INDEX_op_divu_i32
:
1722 return TCG_TARGET_HAS_div_i32
;
1723 case INDEX_op_rem_i32
:
1724 case INDEX_op_remu_i32
:
1725 return TCG_TARGET_HAS_rem_i32
;
1726 case INDEX_op_div2_i32
:
1727 case INDEX_op_divu2_i32
:
1728 return TCG_TARGET_HAS_div2_i32
;
1729 case INDEX_op_rotl_i32
:
1730 case INDEX_op_rotr_i32
:
1731 return TCG_TARGET_HAS_rot_i32
;
1732 case INDEX_op_deposit_i32
:
1733 return TCG_TARGET_HAS_deposit_i32
;
1734 case INDEX_op_extract_i32
:
1735 return TCG_TARGET_HAS_extract_i32
;
1736 case INDEX_op_sextract_i32
:
1737 return TCG_TARGET_HAS_sextract_i32
;
1738 case INDEX_op_extract2_i32
:
1739 return TCG_TARGET_HAS_extract2_i32
;
1740 case INDEX_op_add2_i32
:
1741 return TCG_TARGET_HAS_add2_i32
;
1742 case INDEX_op_sub2_i32
:
1743 return TCG_TARGET_HAS_sub2_i32
;
1744 case INDEX_op_mulu2_i32
:
1745 return TCG_TARGET_HAS_mulu2_i32
;
1746 case INDEX_op_muls2_i32
:
1747 return TCG_TARGET_HAS_muls2_i32
;
1748 case INDEX_op_muluh_i32
:
1749 return TCG_TARGET_HAS_muluh_i32
;
1750 case INDEX_op_mulsh_i32
:
1751 return TCG_TARGET_HAS_mulsh_i32
;
1752 case INDEX_op_ext8s_i32
:
1753 return TCG_TARGET_HAS_ext8s_i32
;
1754 case INDEX_op_ext16s_i32
:
1755 return TCG_TARGET_HAS_ext16s_i32
;
1756 case INDEX_op_ext8u_i32
:
1757 return TCG_TARGET_HAS_ext8u_i32
;
1758 case INDEX_op_ext16u_i32
:
1759 return TCG_TARGET_HAS_ext16u_i32
;
1760 case INDEX_op_bswap16_i32
:
1761 return TCG_TARGET_HAS_bswap16_i32
;
1762 case INDEX_op_bswap32_i32
:
1763 return TCG_TARGET_HAS_bswap32_i32
;
1764 case INDEX_op_not_i32
:
1765 return TCG_TARGET_HAS_not_i32
;
1766 case INDEX_op_neg_i32
:
1767 return TCG_TARGET_HAS_neg_i32
;
1768 case INDEX_op_andc_i32
:
1769 return TCG_TARGET_HAS_andc_i32
;
1770 case INDEX_op_orc_i32
:
1771 return TCG_TARGET_HAS_orc_i32
;
1772 case INDEX_op_eqv_i32
:
1773 return TCG_TARGET_HAS_eqv_i32
;
1774 case INDEX_op_nand_i32
:
1775 return TCG_TARGET_HAS_nand_i32
;
1776 case INDEX_op_nor_i32
:
1777 return TCG_TARGET_HAS_nor_i32
;
1778 case INDEX_op_clz_i32
:
1779 return TCG_TARGET_HAS_clz_i32
;
1780 case INDEX_op_ctz_i32
:
1781 return TCG_TARGET_HAS_ctz_i32
;
1782 case INDEX_op_ctpop_i32
:
1783 return TCG_TARGET_HAS_ctpop_i32
;
1785 case INDEX_op_brcond2_i32
:
1786 case INDEX_op_setcond2_i32
:
1787 return TCG_TARGET_REG_BITS
== 32;
1789 case INDEX_op_mov_i64
:
1790 case INDEX_op_setcond_i64
:
1791 case INDEX_op_brcond_i64
:
1792 case INDEX_op_ld8u_i64
:
1793 case INDEX_op_ld8s_i64
:
1794 case INDEX_op_ld16u_i64
:
1795 case INDEX_op_ld16s_i64
:
1796 case INDEX_op_ld32u_i64
:
1797 case INDEX_op_ld32s_i64
:
1798 case INDEX_op_ld_i64
:
1799 case INDEX_op_st8_i64
:
1800 case INDEX_op_st16_i64
:
1801 case INDEX_op_st32_i64
:
1802 case INDEX_op_st_i64
:
1803 case INDEX_op_add_i64
:
1804 case INDEX_op_sub_i64
:
1805 case INDEX_op_mul_i64
:
1806 case INDEX_op_and_i64
:
1807 case INDEX_op_or_i64
:
1808 case INDEX_op_xor_i64
:
1809 case INDEX_op_shl_i64
:
1810 case INDEX_op_shr_i64
:
1811 case INDEX_op_sar_i64
:
1812 case INDEX_op_ext_i32_i64
:
1813 case INDEX_op_extu_i32_i64
:
1814 return TCG_TARGET_REG_BITS
== 64;
1816 case INDEX_op_movcond_i64
:
1817 return TCG_TARGET_HAS_movcond_i64
;
1818 case INDEX_op_div_i64
:
1819 case INDEX_op_divu_i64
:
1820 return TCG_TARGET_HAS_div_i64
;
1821 case INDEX_op_rem_i64
:
1822 case INDEX_op_remu_i64
:
1823 return TCG_TARGET_HAS_rem_i64
;
1824 case INDEX_op_div2_i64
:
1825 case INDEX_op_divu2_i64
:
1826 return TCG_TARGET_HAS_div2_i64
;
1827 case INDEX_op_rotl_i64
:
1828 case INDEX_op_rotr_i64
:
1829 return TCG_TARGET_HAS_rot_i64
;
1830 case INDEX_op_deposit_i64
:
1831 return TCG_TARGET_HAS_deposit_i64
;
1832 case INDEX_op_extract_i64
:
1833 return TCG_TARGET_HAS_extract_i64
;
1834 case INDEX_op_sextract_i64
:
1835 return TCG_TARGET_HAS_sextract_i64
;
1836 case INDEX_op_extract2_i64
:
1837 return TCG_TARGET_HAS_extract2_i64
;
1838 case INDEX_op_extrl_i64_i32
:
1839 return TCG_TARGET_HAS_extrl_i64_i32
;
1840 case INDEX_op_extrh_i64_i32
:
1841 return TCG_TARGET_HAS_extrh_i64_i32
;
1842 case INDEX_op_ext8s_i64
:
1843 return TCG_TARGET_HAS_ext8s_i64
;
1844 case INDEX_op_ext16s_i64
:
1845 return TCG_TARGET_HAS_ext16s_i64
;
1846 case INDEX_op_ext32s_i64
:
1847 return TCG_TARGET_HAS_ext32s_i64
;
1848 case INDEX_op_ext8u_i64
:
1849 return TCG_TARGET_HAS_ext8u_i64
;
1850 case INDEX_op_ext16u_i64
:
1851 return TCG_TARGET_HAS_ext16u_i64
;
1852 case INDEX_op_ext32u_i64
:
1853 return TCG_TARGET_HAS_ext32u_i64
;
1854 case INDEX_op_bswap16_i64
:
1855 return TCG_TARGET_HAS_bswap16_i64
;
1856 case INDEX_op_bswap32_i64
:
1857 return TCG_TARGET_HAS_bswap32_i64
;
1858 case INDEX_op_bswap64_i64
:
1859 return TCG_TARGET_HAS_bswap64_i64
;
1860 case INDEX_op_not_i64
:
1861 return TCG_TARGET_HAS_not_i64
;
1862 case INDEX_op_neg_i64
:
1863 return TCG_TARGET_HAS_neg_i64
;
1864 case INDEX_op_andc_i64
:
1865 return TCG_TARGET_HAS_andc_i64
;
1866 case INDEX_op_orc_i64
:
1867 return TCG_TARGET_HAS_orc_i64
;
1868 case INDEX_op_eqv_i64
:
1869 return TCG_TARGET_HAS_eqv_i64
;
1870 case INDEX_op_nand_i64
:
1871 return TCG_TARGET_HAS_nand_i64
;
1872 case INDEX_op_nor_i64
:
1873 return TCG_TARGET_HAS_nor_i64
;
1874 case INDEX_op_clz_i64
:
1875 return TCG_TARGET_HAS_clz_i64
;
1876 case INDEX_op_ctz_i64
:
1877 return TCG_TARGET_HAS_ctz_i64
;
1878 case INDEX_op_ctpop_i64
:
1879 return TCG_TARGET_HAS_ctpop_i64
;
1880 case INDEX_op_add2_i64
:
1881 return TCG_TARGET_HAS_add2_i64
;
1882 case INDEX_op_sub2_i64
:
1883 return TCG_TARGET_HAS_sub2_i64
;
1884 case INDEX_op_mulu2_i64
:
1885 return TCG_TARGET_HAS_mulu2_i64
;
1886 case INDEX_op_muls2_i64
:
1887 return TCG_TARGET_HAS_muls2_i64
;
1888 case INDEX_op_muluh_i64
:
1889 return TCG_TARGET_HAS_muluh_i64
;
1890 case INDEX_op_mulsh_i64
:
1891 return TCG_TARGET_HAS_mulsh_i64
;
1893 case INDEX_op_mov_vec
:
1894 case INDEX_op_dup_vec
:
1895 case INDEX_op_dupm_vec
:
1896 case INDEX_op_ld_vec
:
1897 case INDEX_op_st_vec
:
1898 case INDEX_op_add_vec
:
1899 case INDEX_op_sub_vec
:
1900 case INDEX_op_and_vec
:
1901 case INDEX_op_or_vec
:
1902 case INDEX_op_xor_vec
:
1903 case INDEX_op_cmp_vec
:
1905 case INDEX_op_dup2_vec
:
1906 return have_vec
&& TCG_TARGET_REG_BITS
== 32;
1907 case INDEX_op_not_vec
:
1908 return have_vec
&& TCG_TARGET_HAS_not_vec
;
1909 case INDEX_op_neg_vec
:
1910 return have_vec
&& TCG_TARGET_HAS_neg_vec
;
1911 case INDEX_op_abs_vec
:
1912 return have_vec
&& TCG_TARGET_HAS_abs_vec
;
1913 case INDEX_op_andc_vec
:
1914 return have_vec
&& TCG_TARGET_HAS_andc_vec
;
1915 case INDEX_op_orc_vec
:
1916 return have_vec
&& TCG_TARGET_HAS_orc_vec
;
1917 case INDEX_op_mul_vec
:
1918 return have_vec
&& TCG_TARGET_HAS_mul_vec
;
1919 case INDEX_op_shli_vec
:
1920 case INDEX_op_shri_vec
:
1921 case INDEX_op_sari_vec
:
1922 return have_vec
&& TCG_TARGET_HAS_shi_vec
;
1923 case INDEX_op_shls_vec
:
1924 case INDEX_op_shrs_vec
:
1925 case INDEX_op_sars_vec
:
1926 return have_vec
&& TCG_TARGET_HAS_shs_vec
;
1927 case INDEX_op_shlv_vec
:
1928 case INDEX_op_shrv_vec
:
1929 case INDEX_op_sarv_vec
:
1930 return have_vec
&& TCG_TARGET_HAS_shv_vec
;
1931 case INDEX_op_rotli_vec
:
1932 return have_vec
&& TCG_TARGET_HAS_roti_vec
;
1933 case INDEX_op_rotls_vec
:
1934 return have_vec
&& TCG_TARGET_HAS_rots_vec
;
1935 case INDEX_op_rotlv_vec
:
1936 case INDEX_op_rotrv_vec
:
1937 return have_vec
&& TCG_TARGET_HAS_rotv_vec
;
1938 case INDEX_op_ssadd_vec
:
1939 case INDEX_op_usadd_vec
:
1940 case INDEX_op_sssub_vec
:
1941 case INDEX_op_ussub_vec
:
1942 return have_vec
&& TCG_TARGET_HAS_sat_vec
;
1943 case INDEX_op_smin_vec
:
1944 case INDEX_op_umin_vec
:
1945 case INDEX_op_smax_vec
:
1946 case INDEX_op_umax_vec
:
1947 return have_vec
&& TCG_TARGET_HAS_minmax_vec
;
1948 case INDEX_op_bitsel_vec
:
1949 return have_vec
&& TCG_TARGET_HAS_bitsel_vec
;
1950 case INDEX_op_cmpsel_vec
:
1951 return have_vec
&& TCG_TARGET_HAS_cmpsel_vec
;
1954 tcg_debug_assert(op
> INDEX_op_last_generic
&& op
< NB_OPS
);
1959 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1960 and endian swap. Maybe it would be better to do the alignment
1961 and endian swap in tcg_reg_alloc_call(). */
1962 void tcg_gen_callN(void *func
, TCGTemp
*ret
, int nargs
, TCGTemp
**args
)
1964 int i
, real_args
, nb_rets
, pi
;
1965 unsigned sizemask
, flags
;
1966 TCGHelperInfo
*info
;
1969 info
= g_hash_table_lookup(helper_table
, (gpointer
)func
);
1970 flags
= info
->flags
;
1971 sizemask
= info
->sizemask
;
1973 #ifdef CONFIG_PLUGIN
1974 /* detect non-plugin helpers */
1975 if (tcg_ctx
->plugin_insn
&& unlikely(strncmp(info
->name
, "plugin_", 7))) {
1976 tcg_ctx
->plugin_insn
->calls_helpers
= true;
1980 #if defined(__sparc__) && !defined(__arch64__) \
1981 && !defined(CONFIG_TCG_INTERPRETER)
1982 /* We have 64-bit values in one register, but need to pass as two
1983 separate parameters. Split them. */
1984 int orig_sizemask
= sizemask
;
1985 int orig_nargs
= nargs
;
1986 TCGv_i64 retl
, reth
;
1987 TCGTemp
*split_args
[MAX_OPC_PARAM
];
1991 if (sizemask
!= 0) {
1992 for (i
= real_args
= 0; i
< nargs
; ++i
) {
1993 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1995 TCGv_i64 orig
= temp_tcgv_i64(args
[i
]);
1996 TCGv_i32 h
= tcg_temp_new_i32();
1997 TCGv_i32 l
= tcg_temp_new_i32();
1998 tcg_gen_extr_i64_i32(l
, h
, orig
);
1999 split_args
[real_args
++] = tcgv_i32_temp(h
);
2000 split_args
[real_args
++] = tcgv_i32_temp(l
);
2002 split_args
[real_args
++] = args
[i
];
2009 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2010 for (i
= 0; i
< nargs
; ++i
) {
2011 int is_64bit
= sizemask
& (1 << (i
+1)*2);
2012 int is_signed
= sizemask
& (2 << (i
+1)*2);
2014 TCGv_i64 temp
= tcg_temp_new_i64();
2015 TCGv_i64 orig
= temp_tcgv_i64(args
[i
]);
2017 tcg_gen_ext32s_i64(temp
, orig
);
2019 tcg_gen_ext32u_i64(temp
, orig
);
2021 args
[i
] = tcgv_i64_temp(temp
);
2024 #endif /* TCG_TARGET_EXTEND_ARGS */
2026 op
= tcg_emit_op(INDEX_op_call
);
2030 #if defined(__sparc__) && !defined(__arch64__) \
2031 && !defined(CONFIG_TCG_INTERPRETER)
2032 if (orig_sizemask
& 1) {
2033 /* The 32-bit ABI is going to return the 64-bit value in
2034 the %o0/%o1 register pair. Prepare for this by using
2035 two return temporaries, and reassemble below. */
2036 retl
= tcg_temp_new_i64();
2037 reth
= tcg_temp_new_i64();
2038 op
->args
[pi
++] = tcgv_i64_arg(reth
);
2039 op
->args
[pi
++] = tcgv_i64_arg(retl
);
2042 op
->args
[pi
++] = temp_arg(ret
);
2046 if (TCG_TARGET_REG_BITS
< 64 && (sizemask
& 1)) {
2047 #ifdef HOST_WORDS_BIGENDIAN
2048 op
->args
[pi
++] = temp_arg(ret
+ 1);
2049 op
->args
[pi
++] = temp_arg(ret
);
2051 op
->args
[pi
++] = temp_arg(ret
);
2052 op
->args
[pi
++] = temp_arg(ret
+ 1);
2056 op
->args
[pi
++] = temp_arg(ret
);
2063 TCGOP_CALLO(op
) = nb_rets
;
2066 for (i
= 0; i
< nargs
; i
++) {
2067 int is_64bit
= sizemask
& (1 << (i
+1)*2);
2068 if (TCG_TARGET_REG_BITS
< 64 && is_64bit
) {
2069 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
2070 /* some targets want aligned 64 bit args */
2071 if (real_args
& 1) {
2072 op
->args
[pi
++] = TCG_CALL_DUMMY_ARG
;
2076 /* If stack grows up, then we will be placing successive
2077 arguments at lower addresses, which means we need to
2078 reverse the order compared to how we would normally
2079 treat either big or little-endian. For those arguments
2080 that will wind up in registers, this still works for
2081 HPPA (the only current STACK_GROWSUP target) since the
2082 argument registers are *also* allocated in decreasing
2083 order. If another such target is added, this logic may
2084 have to get more complicated to differentiate between
2085 stack arguments and register arguments. */
2086 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
2087 op
->args
[pi
++] = temp_arg(args
[i
] + 1);
2088 op
->args
[pi
++] = temp_arg(args
[i
]);
2090 op
->args
[pi
++] = temp_arg(args
[i
]);
2091 op
->args
[pi
++] = temp_arg(args
[i
] + 1);
2097 op
->args
[pi
++] = temp_arg(args
[i
]);
2100 op
->args
[pi
++] = (uintptr_t)func
;
2101 op
->args
[pi
++] = flags
;
2102 TCGOP_CALLI(op
) = real_args
;
2104 /* Make sure the fields didn't overflow. */
2105 tcg_debug_assert(TCGOP_CALLI(op
) == real_args
);
2106 tcg_debug_assert(pi
<= ARRAY_SIZE(op
->args
));
2108 #if defined(__sparc__) && !defined(__arch64__) \
2109 && !defined(CONFIG_TCG_INTERPRETER)
2110 /* Free all of the parts we allocated above. */
2111 for (i
= real_args
= 0; i
< orig_nargs
; ++i
) {
2112 int is_64bit
= orig_sizemask
& (1 << (i
+1)*2);
2114 tcg_temp_free_internal(args
[real_args
++]);
2115 tcg_temp_free_internal(args
[real_args
++]);
2120 if (orig_sizemask
& 1) {
2121 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
2122 Note that describing these as TCGv_i64 eliminates an unnecessary
2123 zero-extension that tcg_gen_concat_i32_i64 would create. */
2124 tcg_gen_concat32_i64(temp_tcgv_i64(ret
), retl
, reth
);
2125 tcg_temp_free_i64(retl
);
2126 tcg_temp_free_i64(reth
);
2128 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2129 for (i
= 0; i
< nargs
; ++i
) {
2130 int is_64bit
= sizemask
& (1 << (i
+1)*2);
2132 tcg_temp_free_internal(args
[i
]);
2135 #endif /* TCG_TARGET_EXTEND_ARGS */
2138 static void tcg_reg_alloc_start(TCGContext
*s
)
2142 for (i
= 0, n
= s
->nb_temps
; i
< n
; i
++) {
2143 TCGTemp
*ts
= &s
->temps
[i
];
2144 TCGTempVal val
= TEMP_VAL_MEM
;
2148 val
= TEMP_VAL_CONST
;
2156 val
= TEMP_VAL_DEAD
;
2159 ts
->mem_allocated
= 0;
2162 g_assert_not_reached();
2167 memset(s
->reg_to_temp
, 0, sizeof(s
->reg_to_temp
));
2170 static char *tcg_get_arg_str_ptr(TCGContext
*s
, char *buf
, int buf_size
,
2173 int idx
= temp_idx(ts
);
2178 pstrcpy(buf
, buf_size
, ts
->name
);
2181 snprintf(buf
, buf_size
, "loc%d", idx
- s
->nb_globals
);
2184 snprintf(buf
, buf_size
, "tmp%d", idx
- s
->nb_globals
);
2189 snprintf(buf
, buf_size
, "$0x%x", (int32_t)ts
->val
);
2191 #if TCG_TARGET_REG_BITS > 32
2193 snprintf(buf
, buf_size
, "$0x%" PRIx64
, ts
->val
);
2199 snprintf(buf
, buf_size
, "v%d$0x%" PRIx64
,
2200 64 << (ts
->type
- TCG_TYPE_V64
), ts
->val
);
2203 g_assert_not_reached();
2210 static char *tcg_get_arg_str(TCGContext
*s
, char *buf
,
2211 int buf_size
, TCGArg arg
)
2213 return tcg_get_arg_str_ptr(s
, buf
, buf_size
, arg_temp(arg
));
2216 /* Find helper name. */
2217 static inline const char *tcg_find_helper(TCGContext
*s
, uintptr_t val
)
2219 const char *ret
= NULL
;
2221 TCGHelperInfo
*info
= g_hash_table_lookup(helper_table
, (gpointer
)val
);
2229 static const char * const cond_name
[] =
2231 [TCG_COND_NEVER
] = "never",
2232 [TCG_COND_ALWAYS
] = "always",
2233 [TCG_COND_EQ
] = "eq",
2234 [TCG_COND_NE
] = "ne",
2235 [TCG_COND_LT
] = "lt",
2236 [TCG_COND_GE
] = "ge",
2237 [TCG_COND_LE
] = "le",
2238 [TCG_COND_GT
] = "gt",
2239 [TCG_COND_LTU
] = "ltu",
2240 [TCG_COND_GEU
] = "geu",
2241 [TCG_COND_LEU
] = "leu",
2242 [TCG_COND_GTU
] = "gtu"
2245 static const char * const ldst_name
[] =
2261 static const char * const alignment_name
[(MO_AMASK
>> MO_ASHIFT
) + 1] = {
2262 #ifdef TARGET_ALIGNED_ONLY
2263 [MO_UNALN
>> MO_ASHIFT
] = "un+",
2264 [MO_ALIGN
>> MO_ASHIFT
] = "",
2266 [MO_UNALN
>> MO_ASHIFT
] = "",
2267 [MO_ALIGN
>> MO_ASHIFT
] = "al+",
2269 [MO_ALIGN_2
>> MO_ASHIFT
] = "al2+",
2270 [MO_ALIGN_4
>> MO_ASHIFT
] = "al4+",
2271 [MO_ALIGN_8
>> MO_ASHIFT
] = "al8+",
2272 [MO_ALIGN_16
>> MO_ASHIFT
] = "al16+",
2273 [MO_ALIGN_32
>> MO_ASHIFT
] = "al32+",
2274 [MO_ALIGN_64
>> MO_ASHIFT
] = "al64+",
2277 static inline bool tcg_regset_single(TCGRegSet d
)
2279 return (d
& (d
- 1)) == 0;
2282 static inline TCGReg
tcg_regset_first(TCGRegSet d
)
2284 if (TCG_TARGET_NB_REGS
<= 32) {
2291 static void tcg_dump_ops(TCGContext
*s
, bool have_prefs
)
2296 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
2297 int i
, k
, nb_oargs
, nb_iargs
, nb_cargs
;
2298 const TCGOpDef
*def
;
2303 def
= &tcg_op_defs
[c
];
2305 if (c
== INDEX_op_insn_start
) {
2307 col
+= qemu_log("\n ----");
2309 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
2311 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2312 a
= deposit64(op
->args
[i
* 2], 32, 32, op
->args
[i
* 2 + 1]);
2316 col
+= qemu_log(" " TARGET_FMT_lx
, a
);
2318 } else if (c
== INDEX_op_call
) {
2319 /* variable number of arguments */
2320 nb_oargs
= TCGOP_CALLO(op
);
2321 nb_iargs
= TCGOP_CALLI(op
);
2322 nb_cargs
= def
->nb_cargs
;
2324 /* function name, flags, out args */
2325 col
+= qemu_log(" %s %s,$0x%" TCG_PRIlx
",$%d", def
->name
,
2326 tcg_find_helper(s
, op
->args
[nb_oargs
+ nb_iargs
]),
2327 op
->args
[nb_oargs
+ nb_iargs
+ 1], nb_oargs
);
2328 for (i
= 0; i
< nb_oargs
; i
++) {
2329 col
+= qemu_log(",%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
2332 for (i
= 0; i
< nb_iargs
; i
++) {
2333 TCGArg arg
= op
->args
[nb_oargs
+ i
];
2334 const char *t
= "<dummy>";
2335 if (arg
!= TCG_CALL_DUMMY_ARG
) {
2336 t
= tcg_get_arg_str(s
, buf
, sizeof(buf
), arg
);
2338 col
+= qemu_log(",%s", t
);
2341 col
+= qemu_log(" %s ", def
->name
);
2343 nb_oargs
= def
->nb_oargs
;
2344 nb_iargs
= def
->nb_iargs
;
2345 nb_cargs
= def
->nb_cargs
;
2347 if (def
->flags
& TCG_OPF_VECTOR
) {
2348 col
+= qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op
),
2349 8 << TCGOP_VECE(op
));
2353 for (i
= 0; i
< nb_oargs
; i
++) {
2355 col
+= qemu_log(",");
2357 col
+= qemu_log("%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
2360 for (i
= 0; i
< nb_iargs
; i
++) {
2362 col
+= qemu_log(",");
2364 col
+= qemu_log("%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
2368 case INDEX_op_brcond_i32
:
2369 case INDEX_op_setcond_i32
:
2370 case INDEX_op_movcond_i32
:
2371 case INDEX_op_brcond2_i32
:
2372 case INDEX_op_setcond2_i32
:
2373 case INDEX_op_brcond_i64
:
2374 case INDEX_op_setcond_i64
:
2375 case INDEX_op_movcond_i64
:
2376 case INDEX_op_cmp_vec
:
2377 case INDEX_op_cmpsel_vec
:
2378 if (op
->args
[k
] < ARRAY_SIZE(cond_name
)
2379 && cond_name
[op
->args
[k
]]) {
2380 col
+= qemu_log(",%s", cond_name
[op
->args
[k
++]]);
2382 col
+= qemu_log(",$0x%" TCG_PRIlx
, op
->args
[k
++]);
2386 case INDEX_op_qemu_ld_i32
:
2387 case INDEX_op_qemu_st_i32
:
2388 case INDEX_op_qemu_st8_i32
:
2389 case INDEX_op_qemu_ld_i64
:
2390 case INDEX_op_qemu_st_i64
:
2392 TCGMemOpIdx oi
= op
->args
[k
++];
2393 MemOp op
= get_memop(oi
);
2394 unsigned ix
= get_mmuidx(oi
);
2396 if (op
& ~(MO_AMASK
| MO_BSWAP
| MO_SSIZE
)) {
2397 col
+= qemu_log(",$0x%x,%u", op
, ix
);
2399 const char *s_al
, *s_op
;
2400 s_al
= alignment_name
[(op
& MO_AMASK
) >> MO_ASHIFT
];
2401 s_op
= ldst_name
[op
& (MO_BSWAP
| MO_SSIZE
)];
2402 col
+= qemu_log(",%s%s,%u", s_al
, s_op
, ix
);
2412 case INDEX_op_set_label
:
2414 case INDEX_op_brcond_i32
:
2415 case INDEX_op_brcond_i64
:
2416 case INDEX_op_brcond2_i32
:
2417 col
+= qemu_log("%s$L%d", k
? "," : "",
2418 arg_label(op
->args
[k
])->id
);
2424 for (; i
< nb_cargs
; i
++, k
++) {
2425 col
+= qemu_log("%s$0x%" TCG_PRIlx
, k
? "," : "", op
->args
[k
]);
2429 if (have_prefs
|| op
->life
) {
2431 QemuLogFile
*logfile
;
2434 logfile
= qatomic_rcu_read(&qemu_logfile
);
2436 for (; col
< 40; ++col
) {
2437 putc(' ', logfile
->fd
);
2444 unsigned life
= op
->life
;
2446 if (life
& (SYNC_ARG
* 3)) {
2448 for (i
= 0; i
< 2; ++i
) {
2449 if (life
& (SYNC_ARG
<< i
)) {
2457 for (i
= 0; life
; ++i
, life
>>= 1) {
2466 for (i
= 0; i
< nb_oargs
; ++i
) {
2467 TCGRegSet set
= op
->output_pref
[i
];
2476 } else if (set
== MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS
)) {
2478 #ifdef CONFIG_DEBUG_TCG
2479 } else if (tcg_regset_single(set
)) {
2480 TCGReg reg
= tcg_regset_first(set
);
2481 qemu_log("%s", tcg_target_reg_names
[reg
]);
2483 } else if (TCG_TARGET_NB_REGS
<= 32) {
2484 qemu_log("%#x", (uint32_t)set
);
2486 qemu_log("%#" PRIx64
, (uint64_t)set
);
2495 /* we give more priority to constraints with less registers */
2496 static int get_constraint_priority(const TCGOpDef
*def
, int k
)
2498 const TCGArgConstraint
*arg_ct
= &def
->args_ct
[k
];
2501 if (arg_ct
->oalias
) {
2502 /* an alias is equivalent to a single register */
2505 n
= ctpop64(arg_ct
->regs
);
2507 return TCG_TARGET_NB_REGS
- n
+ 1;
2510 /* sort from highest priority to lowest */
2511 static void sort_constraints(TCGOpDef
*def
, int start
, int n
)
2514 TCGArgConstraint
*a
= def
->args_ct
;
2516 for (i
= 0; i
< n
; i
++) {
2517 a
[start
+ i
].sort_index
= start
+ i
;
2522 for (i
= 0; i
< n
- 1; i
++) {
2523 for (j
= i
+ 1; j
< n
; j
++) {
2524 int p1
= get_constraint_priority(def
, a
[start
+ i
].sort_index
);
2525 int p2
= get_constraint_priority(def
, a
[start
+ j
].sort_index
);
2527 int tmp
= a
[start
+ i
].sort_index
;
2528 a
[start
+ i
].sort_index
= a
[start
+ j
].sort_index
;
2529 a
[start
+ j
].sort_index
= tmp
;
2535 static void process_op_defs(TCGContext
*s
)
2539 for (op
= 0; op
< NB_OPS
; op
++) {
2540 TCGOpDef
*def
= &tcg_op_defs
[op
];
2541 const TCGTargetOpDef
*tdefs
;
2544 if (def
->flags
& TCG_OPF_NOT_PRESENT
) {
2548 nb_args
= def
->nb_iargs
+ def
->nb_oargs
;
2554 * Macro magic should make it impossible, but double-check that
2555 * the array index is in range. Since the signness of an enum
2556 * is implementation defined, force the result to unsigned.
2558 unsigned con_set
= tcg_target_op_def(op
);
2559 tcg_debug_assert(con_set
< ARRAY_SIZE(constraint_sets
));
2560 tdefs
= &constraint_sets
[con_set
];
2562 for (i
= 0; i
< nb_args
; i
++) {
2563 const char *ct_str
= tdefs
->args_ct_str
[i
];
2564 /* Incomplete TCGTargetOpDef entry. */
2565 tcg_debug_assert(ct_str
!= NULL
);
2567 while (*ct_str
!= '\0') {
2571 int oarg
= *ct_str
- '0';
2572 tcg_debug_assert(ct_str
== tdefs
->args_ct_str
[i
]);
2573 tcg_debug_assert(oarg
< def
->nb_oargs
);
2574 tcg_debug_assert(def
->args_ct
[oarg
].regs
!= 0);
2575 def
->args_ct
[i
] = def
->args_ct
[oarg
];
2576 /* The output sets oalias. */
2577 def
->args_ct
[oarg
].oalias
= true;
2578 def
->args_ct
[oarg
].alias_index
= i
;
2579 /* The input sets ialias. */
2580 def
->args_ct
[i
].ialias
= true;
2581 def
->args_ct
[i
].alias_index
= oarg
;
2586 def
->args_ct
[i
].newreg
= true;
2590 def
->args_ct
[i
].ct
|= TCG_CT_CONST
;
2594 /* Include all of the target-specific constraints. */
2597 #define CONST(CASE, MASK) \
2598 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2599 #define REGS(CASE, MASK) \
2600 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2602 #include "tcg-target-con-str.h"
2607 /* Typo in TCGTargetOpDef constraint. */
2608 g_assert_not_reached();
2613 /* TCGTargetOpDef entry with too much information? */
2614 tcg_debug_assert(i
== TCG_MAX_OP_ARGS
|| tdefs
->args_ct_str
[i
] == NULL
);
2616 /* sort the constraints (XXX: this is just an heuristic) */
2617 sort_constraints(def
, 0, def
->nb_oargs
);
2618 sort_constraints(def
, def
->nb_oargs
, def
->nb_iargs
);
2622 void tcg_op_remove(TCGContext
*s
, TCGOp
*op
)
2628 label
= arg_label(op
->args
[0]);
2631 case INDEX_op_brcond_i32
:
2632 case INDEX_op_brcond_i64
:
2633 label
= arg_label(op
->args
[3]);
2636 case INDEX_op_brcond2_i32
:
2637 label
= arg_label(op
->args
[5]);
2644 QTAILQ_REMOVE(&s
->ops
, op
, link
);
2645 QTAILQ_INSERT_TAIL(&s
->free_ops
, op
, link
);
2648 #ifdef CONFIG_PROFILER
2649 qatomic_set(&s
->prof
.del_op_count
, s
->prof
.del_op_count
+ 1);
2653 static TCGOp
*tcg_op_alloc(TCGOpcode opc
)
2655 TCGContext
*s
= tcg_ctx
;
2658 if (likely(QTAILQ_EMPTY(&s
->free_ops
))) {
2659 op
= tcg_malloc(sizeof(TCGOp
));
2661 op
= QTAILQ_FIRST(&s
->free_ops
);
2662 QTAILQ_REMOVE(&s
->free_ops
, op
, link
);
2664 memset(op
, 0, offsetof(TCGOp
, link
));
2671 TCGOp
*tcg_emit_op(TCGOpcode opc
)
2673 TCGOp
*op
= tcg_op_alloc(opc
);
2674 QTAILQ_INSERT_TAIL(&tcg_ctx
->ops
, op
, link
);
2678 TCGOp
*tcg_op_insert_before(TCGContext
*s
, TCGOp
*old_op
, TCGOpcode opc
)
2680 TCGOp
*new_op
= tcg_op_alloc(opc
);
2681 QTAILQ_INSERT_BEFORE(old_op
, new_op
, link
);
2685 TCGOp
*tcg_op_insert_after(TCGContext
*s
, TCGOp
*old_op
, TCGOpcode opc
)
2687 TCGOp
*new_op
= tcg_op_alloc(opc
);
2688 QTAILQ_INSERT_AFTER(&s
->ops
, old_op
, new_op
, link
);
2692 /* Reachable analysis : remove unreachable code. */
2693 static void reachable_code_pass(TCGContext
*s
)
2695 TCGOp
*op
, *op_next
;
2698 QTAILQ_FOREACH_SAFE(op
, &s
->ops
, link
, op_next
) {
2704 case INDEX_op_set_label
:
2705 label
= arg_label(op
->args
[0]);
2706 if (label
->refs
== 0) {
2708 * While there is an occasional backward branch, virtually
2709 * all branches generated by the translators are forward.
2710 * Which means that generally we will have already removed
2711 * all references to the label that will be, and there is
2712 * little to be gained by iterating.
2716 /* Once we see a label, insns become live again. */
2721 * Optimization can fold conditional branches to unconditional.
2722 * If we find a label with one reference which is preceded by
2723 * an unconditional branch to it, remove both. This needed to
2724 * wait until the dead code in between them was removed.
2726 if (label
->refs
== 1) {
2727 TCGOp
*op_prev
= QTAILQ_PREV(op
, link
);
2728 if (op_prev
->opc
== INDEX_op_br
&&
2729 label
== arg_label(op_prev
->args
[0])) {
2730 tcg_op_remove(s
, op_prev
);
2738 case INDEX_op_exit_tb
:
2739 case INDEX_op_goto_ptr
:
2740 /* Unconditional branches; everything following is dead. */
2745 /* Notice noreturn helper calls, raising exceptions. */
2746 call_flags
= op
->args
[TCGOP_CALLO(op
) + TCGOP_CALLI(op
) + 1];
2747 if (call_flags
& TCG_CALL_NO_RETURN
) {
2752 case INDEX_op_insn_start
:
2753 /* Never remove -- we need to keep these for unwind. */
2762 tcg_op_remove(s
, op
);
2770 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2771 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2773 /* For liveness_pass_1, the register preferences for a given temp. */
2774 static inline TCGRegSet
*la_temp_pref(TCGTemp
*ts
)
2776 return ts
->state_ptr
;
2779 /* For liveness_pass_1, reset the preferences for a given temp to the
2780 * maximal regset for its type.
2782 static inline void la_reset_pref(TCGTemp
*ts
)
2785 = (ts
->state
== TS_DEAD
? 0 : tcg_target_available_regs
[ts
->type
]);
2788 /* liveness analysis: end of function: all temps are dead, and globals
2789 should be in memory. */
2790 static void la_func_end(TCGContext
*s
, int ng
, int nt
)
2794 for (i
= 0; i
< ng
; ++i
) {
2795 s
->temps
[i
].state
= TS_DEAD
| TS_MEM
;
2796 la_reset_pref(&s
->temps
[i
]);
2798 for (i
= ng
; i
< nt
; ++i
) {
2799 s
->temps
[i
].state
= TS_DEAD
;
2800 la_reset_pref(&s
->temps
[i
]);
2804 /* liveness analysis: end of basic block: all temps are dead, globals
2805 and local temps should be in memory. */
2806 static void la_bb_end(TCGContext
*s
, int ng
, int nt
)
2810 for (i
= 0; i
< nt
; ++i
) {
2811 TCGTemp
*ts
= &s
->temps
[i
];
2818 state
= TS_DEAD
| TS_MEM
;
2825 g_assert_not_reached();
2832 /* liveness analysis: sync globals back to memory. */
2833 static void la_global_sync(TCGContext
*s
, int ng
)
2837 for (i
= 0; i
< ng
; ++i
) {
2838 int state
= s
->temps
[i
].state
;
2839 s
->temps
[i
].state
= state
| TS_MEM
;
2840 if (state
== TS_DEAD
) {
2841 /* If the global was previously dead, reset prefs. */
2842 la_reset_pref(&s
->temps
[i
]);
2848 * liveness analysis: conditional branch: all temps are dead,
2849 * globals and local temps should be synced.
2851 static void la_bb_sync(TCGContext
*s
, int ng
, int nt
)
2853 la_global_sync(s
, ng
);
2855 for (int i
= ng
; i
< nt
; ++i
) {
2856 TCGTemp
*ts
= &s
->temps
[i
];
2862 ts
->state
= state
| TS_MEM
;
2863 if (state
!= TS_DEAD
) {
2868 s
->temps
[i
].state
= TS_DEAD
;
2873 g_assert_not_reached();
2875 la_reset_pref(&s
->temps
[i
]);
2879 /* liveness analysis: sync globals back to memory and kill. */
2880 static void la_global_kill(TCGContext
*s
, int ng
)
2884 for (i
= 0; i
< ng
; i
++) {
2885 s
->temps
[i
].state
= TS_DEAD
| TS_MEM
;
2886 la_reset_pref(&s
->temps
[i
]);
2890 /* liveness analysis: note live globals crossing calls. */
2891 static void la_cross_call(TCGContext
*s
, int nt
)
2893 TCGRegSet mask
= ~tcg_target_call_clobber_regs
;
2896 for (i
= 0; i
< nt
; i
++) {
2897 TCGTemp
*ts
= &s
->temps
[i
];
2898 if (!(ts
->state
& TS_DEAD
)) {
2899 TCGRegSet
*pset
= la_temp_pref(ts
);
2900 TCGRegSet set
= *pset
;
2903 /* If the combination is not possible, restart. */
2905 set
= tcg_target_available_regs
[ts
->type
] & mask
;
2912 /* Liveness analysis : update the opc_arg_life array to tell if a
2913 given input arguments is dead. Instructions updating dead
2914 temporaries are removed. */
2915 static void liveness_pass_1(TCGContext
*s
)
2917 int nb_globals
= s
->nb_globals
;
2918 int nb_temps
= s
->nb_temps
;
2919 TCGOp
*op
, *op_prev
;
2923 prefs
= tcg_malloc(sizeof(TCGRegSet
) * nb_temps
);
2924 for (i
= 0; i
< nb_temps
; ++i
) {
2925 s
->temps
[i
].state_ptr
= prefs
+ i
;
2928 /* ??? Should be redundant with the exit_tb that ends the TB. */
2929 la_func_end(s
, nb_globals
, nb_temps
);
2931 QTAILQ_FOREACH_REVERSE_SAFE(op
, &s
->ops
, link
, op_prev
) {
2932 int nb_iargs
, nb_oargs
;
2933 TCGOpcode opc_new
, opc_new2
;
2935 TCGLifeData arg_life
= 0;
2937 TCGOpcode opc
= op
->opc
;
2938 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
2946 nb_oargs
= TCGOP_CALLO(op
);
2947 nb_iargs
= TCGOP_CALLI(op
);
2948 call_flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
2950 /* pure functions can be removed if their result is unused */
2951 if (call_flags
& TCG_CALL_NO_SIDE_EFFECTS
) {
2952 for (i
= 0; i
< nb_oargs
; i
++) {
2953 ts
= arg_temp(op
->args
[i
]);
2954 if (ts
->state
!= TS_DEAD
) {
2955 goto do_not_remove_call
;
2962 /* Output args are dead. */
2963 for (i
= 0; i
< nb_oargs
; i
++) {
2964 ts
= arg_temp(op
->args
[i
]);
2965 if (ts
->state
& TS_DEAD
) {
2966 arg_life
|= DEAD_ARG
<< i
;
2968 if (ts
->state
& TS_MEM
) {
2969 arg_life
|= SYNC_ARG
<< i
;
2971 ts
->state
= TS_DEAD
;
2974 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2975 op
->output_pref
[i
] = 0;
2978 if (!(call_flags
& (TCG_CALL_NO_WRITE_GLOBALS
|
2979 TCG_CALL_NO_READ_GLOBALS
))) {
2980 la_global_kill(s
, nb_globals
);
2981 } else if (!(call_flags
& TCG_CALL_NO_READ_GLOBALS
)) {
2982 la_global_sync(s
, nb_globals
);
2985 /* Record arguments that die in this helper. */
2986 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
2987 ts
= arg_temp(op
->args
[i
]);
2988 if (ts
&& ts
->state
& TS_DEAD
) {
2989 arg_life
|= DEAD_ARG
<< i
;
2993 /* For all live registers, remove call-clobbered prefs. */
2994 la_cross_call(s
, nb_temps
);
2996 nb_call_regs
= ARRAY_SIZE(tcg_target_call_iarg_regs
);
2998 /* Input arguments are live for preceding opcodes. */
2999 for (i
= 0; i
< nb_iargs
; i
++) {
3000 ts
= arg_temp(op
->args
[i
+ nb_oargs
]);
3001 if (ts
&& ts
->state
& TS_DEAD
) {
3002 /* For those arguments that die, and will be allocated
3003 * in registers, clear the register set for that arg,
3004 * to be filled in below. For args that will be on
3005 * the stack, reset to any available reg.
3008 = (i
< nb_call_regs
? 0 :
3009 tcg_target_available_regs
[ts
->type
]);
3010 ts
->state
&= ~TS_DEAD
;
3014 /* For each input argument, add its input register to prefs.
3015 If a temp is used once, this produces a single set bit. */
3016 for (i
= 0; i
< MIN(nb_call_regs
, nb_iargs
); i
++) {
3017 ts
= arg_temp(op
->args
[i
+ nb_oargs
]);
3019 tcg_regset_set_reg(*la_temp_pref(ts
),
3020 tcg_target_call_iarg_regs
[i
]);
3025 case INDEX_op_insn_start
:
3027 case INDEX_op_discard
:
3028 /* mark the temporary as dead */
3029 ts
= arg_temp(op
->args
[0]);
3030 ts
->state
= TS_DEAD
;
3034 case INDEX_op_add2_i32
:
3035 opc_new
= INDEX_op_add_i32
;
3037 case INDEX_op_sub2_i32
:
3038 opc_new
= INDEX_op_sub_i32
;
3040 case INDEX_op_add2_i64
:
3041 opc_new
= INDEX_op_add_i64
;
3043 case INDEX_op_sub2_i64
:
3044 opc_new
= INDEX_op_sub_i64
;
3048 /* Test if the high part of the operation is dead, but not
3049 the low part. The result can be optimized to a simple
3050 add or sub. This happens often for x86_64 guest when the
3051 cpu mode is set to 32 bit. */
3052 if (arg_temp(op
->args
[1])->state
== TS_DEAD
) {
3053 if (arg_temp(op
->args
[0])->state
== TS_DEAD
) {
3056 /* Replace the opcode and adjust the args in place,
3057 leaving 3 unused args at the end. */
3058 op
->opc
= opc
= opc_new
;
3059 op
->args
[1] = op
->args
[2];
3060 op
->args
[2] = op
->args
[4];
3061 /* Fall through and mark the single-word operation live. */
3067 case INDEX_op_mulu2_i32
:
3068 opc_new
= INDEX_op_mul_i32
;
3069 opc_new2
= INDEX_op_muluh_i32
;
3070 have_opc_new2
= TCG_TARGET_HAS_muluh_i32
;
3072 case INDEX_op_muls2_i32
:
3073 opc_new
= INDEX_op_mul_i32
;
3074 opc_new2
= INDEX_op_mulsh_i32
;
3075 have_opc_new2
= TCG_TARGET_HAS_mulsh_i32
;
3077 case INDEX_op_mulu2_i64
:
3078 opc_new
= INDEX_op_mul_i64
;
3079 opc_new2
= INDEX_op_muluh_i64
;
3080 have_opc_new2
= TCG_TARGET_HAS_muluh_i64
;
3082 case INDEX_op_muls2_i64
:
3083 opc_new
= INDEX_op_mul_i64
;
3084 opc_new2
= INDEX_op_mulsh_i64
;
3085 have_opc_new2
= TCG_TARGET_HAS_mulsh_i64
;
3090 if (arg_temp(op
->args
[1])->state
== TS_DEAD
) {
3091 if (arg_temp(op
->args
[0])->state
== TS_DEAD
) {
3092 /* Both parts of the operation are dead. */
3095 /* The high part of the operation is dead; generate the low. */
3096 op
->opc
= opc
= opc_new
;
3097 op
->args
[1] = op
->args
[2];
3098 op
->args
[2] = op
->args
[3];
3099 } else if (arg_temp(op
->args
[0])->state
== TS_DEAD
&& have_opc_new2
) {
3100 /* The low part of the operation is dead; generate the high. */
3101 op
->opc
= opc
= opc_new2
;
3102 op
->args
[0] = op
->args
[1];
3103 op
->args
[1] = op
->args
[2];
3104 op
->args
[2] = op
->args
[3];
3108 /* Mark the single-word operation live. */
3113 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3114 nb_iargs
= def
->nb_iargs
;
3115 nb_oargs
= def
->nb_oargs
;
3117 /* Test if the operation can be removed because all
3118 its outputs are dead. We assume that nb_oargs == 0
3119 implies side effects */
3120 if (!(def
->flags
& TCG_OPF_SIDE_EFFECTS
) && nb_oargs
!= 0) {
3121 for (i
= 0; i
< nb_oargs
; i
++) {
3122 if (arg_temp(op
->args
[i
])->state
!= TS_DEAD
) {
3131 tcg_op_remove(s
, op
);
3135 for (i
= 0; i
< nb_oargs
; i
++) {
3136 ts
= arg_temp(op
->args
[i
]);
3138 /* Remember the preference of the uses that followed. */
3139 op
->output_pref
[i
] = *la_temp_pref(ts
);
3141 /* Output args are dead. */
3142 if (ts
->state
& TS_DEAD
) {
3143 arg_life
|= DEAD_ARG
<< i
;
3145 if (ts
->state
& TS_MEM
) {
3146 arg_life
|= SYNC_ARG
<< i
;
3148 ts
->state
= TS_DEAD
;
3152 /* If end of basic block, update. */
3153 if (def
->flags
& TCG_OPF_BB_EXIT
) {
3154 la_func_end(s
, nb_globals
, nb_temps
);
3155 } else if (def
->flags
& TCG_OPF_COND_BRANCH
) {
3156 la_bb_sync(s
, nb_globals
, nb_temps
);
3157 } else if (def
->flags
& TCG_OPF_BB_END
) {
3158 la_bb_end(s
, nb_globals
, nb_temps
);
3159 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
3160 la_global_sync(s
, nb_globals
);
3161 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
3162 la_cross_call(s
, nb_temps
);
3166 /* Record arguments that die in this opcode. */
3167 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3168 ts
= arg_temp(op
->args
[i
]);
3169 if (ts
->state
& TS_DEAD
) {
3170 arg_life
|= DEAD_ARG
<< i
;
3174 /* Input arguments are live for preceding opcodes. */
3175 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3176 ts
= arg_temp(op
->args
[i
]);
3177 if (ts
->state
& TS_DEAD
) {
3178 /* For operands that were dead, initially allow
3179 all regs for the type. */
3180 *la_temp_pref(ts
) = tcg_target_available_regs
[ts
->type
];
3181 ts
->state
&= ~TS_DEAD
;
3185 /* Incorporate constraints for this operand. */
3187 case INDEX_op_mov_i32
:
3188 case INDEX_op_mov_i64
:
3189 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3190 have proper constraints. That said, special case
3191 moves to propagate preferences backward. */
3192 if (IS_DEAD_ARG(1)) {
3193 *la_temp_pref(arg_temp(op
->args
[0]))
3194 = *la_temp_pref(arg_temp(op
->args
[1]));
3199 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3200 const TCGArgConstraint
*ct
= &def
->args_ct
[i
];
3201 TCGRegSet set
, *pset
;
3203 ts
= arg_temp(op
->args
[i
]);
3204 pset
= la_temp_pref(ts
);
3209 set
&= op
->output_pref
[ct
->alias_index
];
3211 /* If the combination is not possible, restart. */
3221 op
->life
= arg_life
;
3225 /* Liveness analysis: Convert indirect regs to direct temporaries. */
3226 static bool liveness_pass_2(TCGContext
*s
)
3228 int nb_globals
= s
->nb_globals
;
3230 bool changes
= false;
3231 TCGOp
*op
, *op_next
;
3233 /* Create a temporary for each indirect global. */
3234 for (i
= 0; i
< nb_globals
; ++i
) {
3235 TCGTemp
*its
= &s
->temps
[i
];
3236 if (its
->indirect_reg
) {
3237 TCGTemp
*dts
= tcg_temp_alloc(s
);
3238 dts
->type
= its
->type
;
3239 dts
->base_type
= its
->base_type
;
3240 its
->state_ptr
= dts
;
3242 its
->state_ptr
= NULL
;
3244 /* All globals begin dead. */
3245 its
->state
= TS_DEAD
;
3247 for (nb_temps
= s
->nb_temps
; i
< nb_temps
; ++i
) {
3248 TCGTemp
*its
= &s
->temps
[i
];
3249 its
->state_ptr
= NULL
;
3250 its
->state
= TS_DEAD
;
3253 QTAILQ_FOREACH_SAFE(op
, &s
->ops
, link
, op_next
) {
3254 TCGOpcode opc
= op
->opc
;
3255 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
3256 TCGLifeData arg_life
= op
->life
;
3257 int nb_iargs
, nb_oargs
, call_flags
;
3258 TCGTemp
*arg_ts
, *dir_ts
;
3260 if (opc
== INDEX_op_call
) {
3261 nb_oargs
= TCGOP_CALLO(op
);
3262 nb_iargs
= TCGOP_CALLI(op
);
3263 call_flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
3265 nb_iargs
= def
->nb_iargs
;
3266 nb_oargs
= def
->nb_oargs
;
3268 /* Set flags similar to how calls require. */
3269 if (def
->flags
& TCG_OPF_COND_BRANCH
) {
3270 /* Like reading globals: sync_globals */
3271 call_flags
= TCG_CALL_NO_WRITE_GLOBALS
;
3272 } else if (def
->flags
& TCG_OPF_BB_END
) {
3273 /* Like writing globals: save_globals */
3275 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
3276 /* Like reading globals: sync_globals */
3277 call_flags
= TCG_CALL_NO_WRITE_GLOBALS
;
3279 /* No effect on globals. */
3280 call_flags
= (TCG_CALL_NO_READ_GLOBALS
|
3281 TCG_CALL_NO_WRITE_GLOBALS
);
3285 /* Make sure that input arguments are available. */
3286 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
3287 arg_ts
= arg_temp(op
->args
[i
]);
3289 dir_ts
= arg_ts
->state_ptr
;
3290 if (dir_ts
&& arg_ts
->state
== TS_DEAD
) {
3291 TCGOpcode lopc
= (arg_ts
->type
== TCG_TYPE_I32
3294 TCGOp
*lop
= tcg_op_insert_before(s
, op
, lopc
);
3296 lop
->args
[0] = temp_arg(dir_ts
);
3297 lop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3298 lop
->args
[2] = arg_ts
->mem_offset
;
3300 /* Loaded, but synced with memory. */
3301 arg_ts
->state
= TS_MEM
;
3306 /* Perform input replacement, and mark inputs that became dead.
3307 No action is required except keeping temp_state up to date
3308 so that we reload when needed. */
3309 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
3310 arg_ts
= arg_temp(op
->args
[i
]);
3312 dir_ts
= arg_ts
->state_ptr
;
3314 op
->args
[i
] = temp_arg(dir_ts
);
3316 if (IS_DEAD_ARG(i
)) {
3317 arg_ts
->state
= TS_DEAD
;
3323 /* Liveness analysis should ensure that the following are
3324 all correct, for call sites and basic block end points. */
3325 if (call_flags
& TCG_CALL_NO_READ_GLOBALS
) {
3327 } else if (call_flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
3328 for (i
= 0; i
< nb_globals
; ++i
) {
3329 /* Liveness should see that globals are synced back,
3330 that is, either TS_DEAD or TS_MEM. */
3331 arg_ts
= &s
->temps
[i
];
3332 tcg_debug_assert(arg_ts
->state_ptr
== 0
3333 || arg_ts
->state
!= 0);
3336 for (i
= 0; i
< nb_globals
; ++i
) {
3337 /* Liveness should see that globals are saved back,
3338 that is, TS_DEAD, waiting to be reloaded. */
3339 arg_ts
= &s
->temps
[i
];
3340 tcg_debug_assert(arg_ts
->state_ptr
== 0
3341 || arg_ts
->state
== TS_DEAD
);
3345 /* Outputs become available. */
3346 if (opc
== INDEX_op_mov_i32
|| opc
== INDEX_op_mov_i64
) {
3347 arg_ts
= arg_temp(op
->args
[0]);
3348 dir_ts
= arg_ts
->state_ptr
;
3350 op
->args
[0] = temp_arg(dir_ts
);
3353 /* The output is now live and modified. */
3356 if (NEED_SYNC_ARG(0)) {
3357 TCGOpcode sopc
= (arg_ts
->type
== TCG_TYPE_I32
3360 TCGOp
*sop
= tcg_op_insert_after(s
, op
, sopc
);
3361 TCGTemp
*out_ts
= dir_ts
;
3363 if (IS_DEAD_ARG(0)) {
3364 out_ts
= arg_temp(op
->args
[1]);
3365 arg_ts
->state
= TS_DEAD
;
3366 tcg_op_remove(s
, op
);
3368 arg_ts
->state
= TS_MEM
;
3371 sop
->args
[0] = temp_arg(out_ts
);
3372 sop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3373 sop
->args
[2] = arg_ts
->mem_offset
;
3375 tcg_debug_assert(!IS_DEAD_ARG(0));
3379 for (i
= 0; i
< nb_oargs
; i
++) {
3380 arg_ts
= arg_temp(op
->args
[i
]);
3381 dir_ts
= arg_ts
->state_ptr
;
3385 op
->args
[i
] = temp_arg(dir_ts
);
3388 /* The output is now live and modified. */
3391 /* Sync outputs upon their last write. */
3392 if (NEED_SYNC_ARG(i
)) {
3393 TCGOpcode sopc
= (arg_ts
->type
== TCG_TYPE_I32
3396 TCGOp
*sop
= tcg_op_insert_after(s
, op
, sopc
);
3398 sop
->args
[0] = temp_arg(dir_ts
);
3399 sop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3400 sop
->args
[2] = arg_ts
->mem_offset
;
3402 arg_ts
->state
= TS_MEM
;
3404 /* Drop outputs that are dead. */
3405 if (IS_DEAD_ARG(i
)) {
3406 arg_ts
->state
= TS_DEAD
;
3415 #ifdef CONFIG_DEBUG_TCG
3416 static void dump_regs(TCGContext
*s
)
3422 for(i
= 0; i
< s
->nb_temps
; i
++) {
3424 printf(" %10s: ", tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), ts
));
3425 switch(ts
->val_type
) {
3427 printf("%s", tcg_target_reg_names
[ts
->reg
]);
3430 printf("%d(%s)", (int)ts
->mem_offset
,
3431 tcg_target_reg_names
[ts
->mem_base
->reg
]);
3433 case TEMP_VAL_CONST
:
3434 printf("$0x%" PRIx64
, ts
->val
);
3446 for(i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
3447 if (s
->reg_to_temp
[i
] != NULL
) {
3449 tcg_target_reg_names
[i
],
3450 tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), s
->reg_to_temp
[i
]));
3455 static void check_regs(TCGContext
*s
)
3462 for (reg
= 0; reg
< TCG_TARGET_NB_REGS
; reg
++) {
3463 ts
= s
->reg_to_temp
[reg
];
3465 if (ts
->val_type
!= TEMP_VAL_REG
|| ts
->reg
!= reg
) {
3466 printf("Inconsistency for register %s:\n",
3467 tcg_target_reg_names
[reg
]);
3472 for (k
= 0; k
< s
->nb_temps
; k
++) {
3474 if (ts
->val_type
== TEMP_VAL_REG
3475 && ts
->kind
!= TEMP_FIXED
3476 && s
->reg_to_temp
[ts
->reg
] != ts
) {
3477 printf("Inconsistency for temp %s:\n",
3478 tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), ts
));
3480 printf("reg state:\n");
3488 static void temp_allocate_frame(TCGContext
*s
, TCGTemp
*ts
)
3490 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3491 /* Sparc64 stack is accessed with offset of 2047 */
3492 s
->current_frame_offset
= (s
->current_frame_offset
+
3493 (tcg_target_long
)sizeof(tcg_target_long
) - 1) &
3494 ~(sizeof(tcg_target_long
) - 1);
3496 if (s
->current_frame_offset
+ (tcg_target_long
)sizeof(tcg_target_long
) >
3500 ts
->mem_offset
= s
->current_frame_offset
;
3501 ts
->mem_base
= s
->frame_temp
;
3502 ts
->mem_allocated
= 1;
3503 s
->current_frame_offset
+= sizeof(tcg_target_long
);
3506 static void temp_load(TCGContext
*, TCGTemp
*, TCGRegSet
, TCGRegSet
, TCGRegSet
);
3508 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3509 mark it free; otherwise mark it dead. */
3510 static void temp_free_or_dead(TCGContext
*s
, TCGTemp
*ts
, int free_or_dead
)
3512 TCGTempVal new_type
;
3519 new_type
= TEMP_VAL_MEM
;
3522 new_type
= free_or_dead
< 0 ? TEMP_VAL_MEM
: TEMP_VAL_DEAD
;
3525 new_type
= TEMP_VAL_CONST
;
3528 g_assert_not_reached();
3530 if (ts
->val_type
== TEMP_VAL_REG
) {
3531 s
->reg_to_temp
[ts
->reg
] = NULL
;
3533 ts
->val_type
= new_type
;
3536 /* Mark a temporary as dead. */
3537 static inline void temp_dead(TCGContext
*s
, TCGTemp
*ts
)
3539 temp_free_or_dead(s
, ts
, 1);
3542 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3543 registers needs to be allocated to store a constant. If 'free_or_dead'
3544 is non-zero, subsequently release the temporary; if it is positive, the
3545 temp is dead; if it is negative, the temp is free. */
3546 static void temp_sync(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet allocated_regs
,
3547 TCGRegSet preferred_regs
, int free_or_dead
)
3549 if (!temp_readonly(ts
) && !ts
->mem_coherent
) {
3550 if (!ts
->mem_allocated
) {
3551 temp_allocate_frame(s
, ts
);
3553 switch (ts
->val_type
) {
3554 case TEMP_VAL_CONST
:
3555 /* If we're going to free the temp immediately, then we won't
3556 require it later in a register, so attempt to store the
3557 constant to memory directly. */
3559 && tcg_out_sti(s
, ts
->type
, ts
->val
,
3560 ts
->mem_base
->reg
, ts
->mem_offset
)) {
3563 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
3564 allocated_regs
, preferred_regs
);
3568 tcg_out_st(s
, ts
->type
, ts
->reg
,
3569 ts
->mem_base
->reg
, ts
->mem_offset
);
3579 ts
->mem_coherent
= 1;
3582 temp_free_or_dead(s
, ts
, free_or_dead
);
3586 /* free register 'reg' by spilling the corresponding temporary if necessary */
3587 static void tcg_reg_free(TCGContext
*s
, TCGReg reg
, TCGRegSet allocated_regs
)
3589 TCGTemp
*ts
= s
->reg_to_temp
[reg
];
3591 temp_sync(s
, ts
, allocated_regs
, 0, -1);
3597 * @required_regs: Set of registers in which we must allocate.
3598 * @allocated_regs: Set of registers which must be avoided.
3599 * @preferred_regs: Set of registers we should prefer.
3600 * @rev: True if we search the registers in "indirect" order.
3602 * The allocated register must be in @required_regs & ~@allocated_regs,
3603 * but if we can put it in @preferred_regs we may save a move later.
3605 static TCGReg
tcg_reg_alloc(TCGContext
*s
, TCGRegSet required_regs
,
3606 TCGRegSet allocated_regs
,
3607 TCGRegSet preferred_regs
, bool rev
)
3609 int i
, j
, f
, n
= ARRAY_SIZE(tcg_target_reg_alloc_order
);
3610 TCGRegSet reg_ct
[2];
3613 reg_ct
[1] = required_regs
& ~allocated_regs
;
3614 tcg_debug_assert(reg_ct
[1] != 0);
3615 reg_ct
[0] = reg_ct
[1] & preferred_regs
;
3617 /* Skip the preferred_regs option if it cannot be satisfied,
3618 or if the preference made no difference. */
3619 f
= reg_ct
[0] == 0 || reg_ct
[0] == reg_ct
[1];
3621 order
= rev
? indirect_reg_alloc_order
: tcg_target_reg_alloc_order
;
3623 /* Try free registers, preferences first. */
3624 for (j
= f
; j
< 2; j
++) {
3625 TCGRegSet set
= reg_ct
[j
];
3627 if (tcg_regset_single(set
)) {
3628 /* One register in the set. */
3629 TCGReg reg
= tcg_regset_first(set
);
3630 if (s
->reg_to_temp
[reg
] == NULL
) {
3634 for (i
= 0; i
< n
; i
++) {
3635 TCGReg reg
= order
[i
];
3636 if (s
->reg_to_temp
[reg
] == NULL
&&
3637 tcg_regset_test_reg(set
, reg
)) {
3644 /* We must spill something. */
3645 for (j
= f
; j
< 2; j
++) {
3646 TCGRegSet set
= reg_ct
[j
];
3648 if (tcg_regset_single(set
)) {
3649 /* One register in the set. */
3650 TCGReg reg
= tcg_regset_first(set
);
3651 tcg_reg_free(s
, reg
, allocated_regs
);
3654 for (i
= 0; i
< n
; i
++) {
3655 TCGReg reg
= order
[i
];
3656 if (tcg_regset_test_reg(set
, reg
)) {
3657 tcg_reg_free(s
, reg
, allocated_regs
);
3667 /* Make sure the temporary is in a register. If needed, allocate the register
3668 from DESIRED while avoiding ALLOCATED. */
3669 static void temp_load(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet desired_regs
,
3670 TCGRegSet allocated_regs
, TCGRegSet preferred_regs
)
3674 switch (ts
->val_type
) {
3677 case TEMP_VAL_CONST
:
3678 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
,
3679 preferred_regs
, ts
->indirect_base
);
3680 if (ts
->type
<= TCG_TYPE_I64
) {
3681 tcg_out_movi(s
, ts
->type
, reg
, ts
->val
);
3683 uint64_t val
= ts
->val
;
3687 * Find the minimal vector element that matches the constant.
3688 * The targets will, in general, have to do this search anyway,
3689 * do this generically.
3691 if (val
== dup_const(MO_8
, val
)) {
3693 } else if (val
== dup_const(MO_16
, val
)) {
3695 } else if (val
== dup_const(MO_32
, val
)) {
3699 tcg_out_dupi_vec(s
, ts
->type
, vece
, reg
, ts
->val
);
3701 ts
->mem_coherent
= 0;
3704 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
,
3705 preferred_regs
, ts
->indirect_base
);
3706 tcg_out_ld(s
, ts
->type
, reg
, ts
->mem_base
->reg
, ts
->mem_offset
);
3707 ts
->mem_coherent
= 1;
3714 ts
->val_type
= TEMP_VAL_REG
;
3715 s
->reg_to_temp
[reg
] = ts
;
3718 /* Save a temporary to memory. 'allocated_regs' is used in case a
3719 temporary registers needs to be allocated to store a constant. */
3720 static void temp_save(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet allocated_regs
)
3722 /* The liveness analysis already ensures that globals are back
3723 in memory. Keep an tcg_debug_assert for safety. */
3724 tcg_debug_assert(ts
->val_type
== TEMP_VAL_MEM
|| temp_readonly(ts
));
3727 /* save globals to their canonical location and assume they can be
3728 modified be the following code. 'allocated_regs' is used in case a
3729 temporary registers needs to be allocated to store a constant. */
3730 static void save_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
3734 for (i
= 0, n
= s
->nb_globals
; i
< n
; i
++) {
3735 temp_save(s
, &s
->temps
[i
], allocated_regs
);
3739 /* sync globals to their canonical location and assume they can be
3740 read by the following code. 'allocated_regs' is used in case a
3741 temporary registers needs to be allocated to store a constant. */
3742 static void sync_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
3746 for (i
= 0, n
= s
->nb_globals
; i
< n
; i
++) {
3747 TCGTemp
*ts
= &s
->temps
[i
];
3748 tcg_debug_assert(ts
->val_type
!= TEMP_VAL_REG
3749 || ts
->kind
== TEMP_FIXED
3750 || ts
->mem_coherent
);
3754 /* at the end of a basic block, we assume all temporaries are dead and
3755 all globals are stored at their canonical location. */
3756 static void tcg_reg_alloc_bb_end(TCGContext
*s
, TCGRegSet allocated_regs
)
3760 for (i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
3761 TCGTemp
*ts
= &s
->temps
[i
];
3765 temp_save(s
, ts
, allocated_regs
);
3768 /* The liveness analysis already ensures that temps are dead.
3769 Keep an tcg_debug_assert for safety. */
3770 tcg_debug_assert(ts
->val_type
== TEMP_VAL_DEAD
);
3773 /* Similarly, we should have freed any allocated register. */
3774 tcg_debug_assert(ts
->val_type
== TEMP_VAL_CONST
);
3777 g_assert_not_reached();
3781 save_globals(s
, allocated_regs
);
3785 * At a conditional branch, we assume all temporaries are dead and
3786 * all globals and local temps are synced to their location.
3788 static void tcg_reg_alloc_cbranch(TCGContext
*s
, TCGRegSet allocated_regs
)
3790 sync_globals(s
, allocated_regs
);
3792 for (int i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
3793 TCGTemp
*ts
= &s
->temps
[i
];
3795 * The liveness analysis already ensures that temps are dead.
3796 * Keep tcg_debug_asserts for safety.
3800 tcg_debug_assert(ts
->val_type
!= TEMP_VAL_REG
|| ts
->mem_coherent
);
3803 tcg_debug_assert(ts
->val_type
== TEMP_VAL_DEAD
);
3808 g_assert_not_reached();
3814 * Specialized code generation for INDEX_op_mov_* with a constant.
3816 static void tcg_reg_alloc_do_movi(TCGContext
*s
, TCGTemp
*ots
,
3817 tcg_target_ulong val
, TCGLifeData arg_life
,
3818 TCGRegSet preferred_regs
)
3820 /* ENV should not be modified. */
3821 tcg_debug_assert(!temp_readonly(ots
));
3823 /* The movi is not explicitly generated here. */
3824 if (ots
->val_type
== TEMP_VAL_REG
) {
3825 s
->reg_to_temp
[ots
->reg
] = NULL
;
3827 ots
->val_type
= TEMP_VAL_CONST
;
3829 ots
->mem_coherent
= 0;
3830 if (NEED_SYNC_ARG(0)) {
3831 temp_sync(s
, ots
, s
->reserved_regs
, preferred_regs
, IS_DEAD_ARG(0));
3832 } else if (IS_DEAD_ARG(0)) {
3838 * Specialized code generation for INDEX_op_mov_*.
3840 static void tcg_reg_alloc_mov(TCGContext
*s
, const TCGOp
*op
)
3842 const TCGLifeData arg_life
= op
->life
;
3843 TCGRegSet allocated_regs
, preferred_regs
;
3845 TCGType otype
, itype
;
3847 allocated_regs
= s
->reserved_regs
;
3848 preferred_regs
= op
->output_pref
[0];
3849 ots
= arg_temp(op
->args
[0]);
3850 ts
= arg_temp(op
->args
[1]);
3852 /* ENV should not be modified. */
3853 tcg_debug_assert(!temp_readonly(ots
));
3855 /* Note that otype != itype for no-op truncation. */
3859 if (ts
->val_type
== TEMP_VAL_CONST
) {
3860 /* propagate constant or generate sti */
3861 tcg_target_ulong val
= ts
->val
;
3862 if (IS_DEAD_ARG(1)) {
3865 tcg_reg_alloc_do_movi(s
, ots
, val
, arg_life
, preferred_regs
);
3869 /* If the source value is in memory we're going to be forced
3870 to have it in a register in order to perform the copy. Copy
3871 the SOURCE value into its own register first, that way we
3872 don't have to reload SOURCE the next time it is used. */
3873 if (ts
->val_type
== TEMP_VAL_MEM
) {
3874 temp_load(s
, ts
, tcg_target_available_regs
[itype
],
3875 allocated_regs
, preferred_regs
);
3878 tcg_debug_assert(ts
->val_type
== TEMP_VAL_REG
);
3879 if (IS_DEAD_ARG(0)) {
3880 /* mov to a non-saved dead register makes no sense (even with
3881 liveness analysis disabled). */
3882 tcg_debug_assert(NEED_SYNC_ARG(0));
3883 if (!ots
->mem_allocated
) {
3884 temp_allocate_frame(s
, ots
);
3886 tcg_out_st(s
, otype
, ts
->reg
, ots
->mem_base
->reg
, ots
->mem_offset
);
3887 if (IS_DEAD_ARG(1)) {
3892 if (IS_DEAD_ARG(1) && ts
->kind
!= TEMP_FIXED
) {
3893 /* the mov can be suppressed */
3894 if (ots
->val_type
== TEMP_VAL_REG
) {
3895 s
->reg_to_temp
[ots
->reg
] = NULL
;
3900 if (ots
->val_type
!= TEMP_VAL_REG
) {
3901 /* When allocating a new register, make sure to not spill the
3903 tcg_regset_set_reg(allocated_regs
, ts
->reg
);
3904 ots
->reg
= tcg_reg_alloc(s
, tcg_target_available_regs
[otype
],
3905 allocated_regs
, preferred_regs
,
3906 ots
->indirect_base
);
3908 if (!tcg_out_mov(s
, otype
, ots
->reg
, ts
->reg
)) {
3910 * Cross register class move not supported.
3911 * Store the source register into the destination slot
3912 * and leave the destination temp as TEMP_VAL_MEM.
3914 assert(!temp_readonly(ots
));
3915 if (!ts
->mem_allocated
) {
3916 temp_allocate_frame(s
, ots
);
3918 tcg_out_st(s
, ts
->type
, ts
->reg
,
3919 ots
->mem_base
->reg
, ots
->mem_offset
);
3920 ots
->mem_coherent
= 1;
3921 temp_free_or_dead(s
, ots
, -1);
3925 ots
->val_type
= TEMP_VAL_REG
;
3926 ots
->mem_coherent
= 0;
3927 s
->reg_to_temp
[ots
->reg
] = ots
;
3928 if (NEED_SYNC_ARG(0)) {
3929 temp_sync(s
, ots
, allocated_regs
, 0, 0);
3935 * Specialized code generation for INDEX_op_dup_vec.
3937 static void tcg_reg_alloc_dup(TCGContext
*s
, const TCGOp
*op
)
3939 const TCGLifeData arg_life
= op
->life
;
3940 TCGRegSet dup_out_regs
, dup_in_regs
;
3942 TCGType itype
, vtype
;
3943 intptr_t endian_fixup
;
3947 ots
= arg_temp(op
->args
[0]);
3948 its
= arg_temp(op
->args
[1]);
3950 /* ENV should not be modified. */
3951 tcg_debug_assert(!temp_readonly(ots
));
3954 vece
= TCGOP_VECE(op
);
3955 vtype
= TCGOP_VECL(op
) + TCG_TYPE_V64
;
3957 if (its
->val_type
== TEMP_VAL_CONST
) {
3958 /* Propagate constant via movi -> dupi. */
3959 tcg_target_ulong val
= its
->val
;
3960 if (IS_DEAD_ARG(1)) {
3963 tcg_reg_alloc_do_movi(s
, ots
, val
, arg_life
, op
->output_pref
[0]);
3967 dup_out_regs
= tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[0].regs
;
3968 dup_in_regs
= tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[1].regs
;
3970 /* Allocate the output register now. */
3971 if (ots
->val_type
!= TEMP_VAL_REG
) {
3972 TCGRegSet allocated_regs
= s
->reserved_regs
;
3974 if (!IS_DEAD_ARG(1) && its
->val_type
== TEMP_VAL_REG
) {
3975 /* Make sure to not spill the input register. */
3976 tcg_regset_set_reg(allocated_regs
, its
->reg
);
3978 ots
->reg
= tcg_reg_alloc(s
, dup_out_regs
, allocated_regs
,
3979 op
->output_pref
[0], ots
->indirect_base
);
3980 ots
->val_type
= TEMP_VAL_REG
;
3981 ots
->mem_coherent
= 0;
3982 s
->reg_to_temp
[ots
->reg
] = ots
;
3985 switch (its
->val_type
) {
3988 * The dup constriaints must be broad, covering all possible VECE.
3989 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3990 * to fail, indicating that extra moves are required for that case.
3992 if (tcg_regset_test_reg(dup_in_regs
, its
->reg
)) {
3993 if (tcg_out_dup_vec(s
, vtype
, vece
, ots
->reg
, its
->reg
)) {
3996 /* Try again from memory or a vector input register. */
3998 if (!its
->mem_coherent
) {
4000 * The input register is not synced, and so an extra store
4001 * would be required to use memory. Attempt an integer-vector
4002 * register move first. We do not have a TCGRegSet for this.
4004 if (tcg_out_mov(s
, itype
, ots
->reg
, its
->reg
)) {
4007 /* Sync the temp back to its slot and load from there. */
4008 temp_sync(s
, its
, s
->reserved_regs
, 0, 0);
4013 #ifdef HOST_WORDS_BIGENDIAN
4014 endian_fixup
= itype
== TCG_TYPE_I32
? 4 : 8;
4015 endian_fixup
-= 1 << vece
;
4019 if (tcg_out_dupm_vec(s
, vtype
, vece
, ots
->reg
, its
->mem_base
->reg
,
4020 its
->mem_offset
+ endian_fixup
)) {
4023 tcg_out_ld(s
, itype
, ots
->reg
, its
->mem_base
->reg
, its
->mem_offset
);
4027 g_assert_not_reached();
4030 /* We now have a vector input register, so dup must succeed. */
4031 ok
= tcg_out_dup_vec(s
, vtype
, vece
, ots
->reg
, ots
->reg
);
4032 tcg_debug_assert(ok
);
4035 if (IS_DEAD_ARG(1)) {
4038 if (NEED_SYNC_ARG(0)) {
4039 temp_sync(s
, ots
, s
->reserved_regs
, 0, 0);
4041 if (IS_DEAD_ARG(0)) {
4046 static void tcg_reg_alloc_op(TCGContext
*s
, const TCGOp
*op
)
4048 const TCGLifeData arg_life
= op
->life
;
4049 const TCGOpDef
* const def
= &tcg_op_defs
[op
->opc
];
4050 TCGRegSet i_allocated_regs
;
4051 TCGRegSet o_allocated_regs
;
4052 int i
, k
, nb_iargs
, nb_oargs
;
4055 const TCGArgConstraint
*arg_ct
;
4057 TCGArg new_args
[TCG_MAX_OP_ARGS
];
4058 int const_args
[TCG_MAX_OP_ARGS
];
4060 nb_oargs
= def
->nb_oargs
;
4061 nb_iargs
= def
->nb_iargs
;
4063 /* copy constants */
4064 memcpy(new_args
+ nb_oargs
+ nb_iargs
,
4065 op
->args
+ nb_oargs
+ nb_iargs
,
4066 sizeof(TCGArg
) * def
->nb_cargs
);
4068 i_allocated_regs
= s
->reserved_regs
;
4069 o_allocated_regs
= s
->reserved_regs
;
4071 /* satisfy input constraints */
4072 for (k
= 0; k
< nb_iargs
; k
++) {
4073 TCGRegSet i_preferred_regs
, o_preferred_regs
;
4075 i
= def
->args_ct
[nb_oargs
+ k
].sort_index
;
4077 arg_ct
= &def
->args_ct
[i
];
4080 if (ts
->val_type
== TEMP_VAL_CONST
4081 && tcg_target_const_match(ts
->val
, ts
->type
, arg_ct
)) {
4082 /* constant is OK for instruction */
4084 new_args
[i
] = ts
->val
;
4088 i_preferred_regs
= o_preferred_regs
= 0;
4089 if (arg_ct
->ialias
) {
4090 o_preferred_regs
= op
->output_pref
[arg_ct
->alias_index
];
4093 * If the input is readonly, then it cannot also be an
4094 * output and aliased to itself. If the input is not
4095 * dead after the instruction, we must allocate a new
4096 * register and move it.
4098 if (temp_readonly(ts
) || !IS_DEAD_ARG(i
)) {
4099 goto allocate_in_reg
;
4103 * Check if the current register has already been allocated
4104 * for another input aliased to an output.
4106 if (ts
->val_type
== TEMP_VAL_REG
) {
4108 for (int k2
= 0; k2
< k
; k2
++) {
4109 int i2
= def
->args_ct
[nb_oargs
+ k2
].sort_index
;
4110 if (def
->args_ct
[i2
].ialias
&& reg
== new_args
[i2
]) {
4111 goto allocate_in_reg
;
4115 i_preferred_regs
= o_preferred_regs
;
4118 temp_load(s
, ts
, arg_ct
->regs
, i_allocated_regs
, i_preferred_regs
);
4121 if (!tcg_regset_test_reg(arg_ct
->regs
, reg
)) {
4124 * Allocate a new register matching the constraint
4125 * and move the temporary register into it.
4127 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
4128 i_allocated_regs
, 0);
4129 reg
= tcg_reg_alloc(s
, arg_ct
->regs
, i_allocated_regs
,
4130 o_preferred_regs
, ts
->indirect_base
);
4131 if (!tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
)) {
4133 * Cross register class move not supported. Sync the
4134 * temp back to its slot and load from there.
4136 temp_sync(s
, ts
, i_allocated_regs
, 0, 0);
4137 tcg_out_ld(s
, ts
->type
, reg
,
4138 ts
->mem_base
->reg
, ts
->mem_offset
);
4143 tcg_regset_set_reg(i_allocated_regs
, reg
);
4146 /* mark dead temporaries and free the associated registers */
4147 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
4148 if (IS_DEAD_ARG(i
)) {
4149 temp_dead(s
, arg_temp(op
->args
[i
]));
4153 if (def
->flags
& TCG_OPF_COND_BRANCH
) {
4154 tcg_reg_alloc_cbranch(s
, i_allocated_regs
);
4155 } else if (def
->flags
& TCG_OPF_BB_END
) {
4156 tcg_reg_alloc_bb_end(s
, i_allocated_regs
);
4158 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
4159 /* XXX: permit generic clobber register list ? */
4160 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
4161 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
4162 tcg_reg_free(s
, i
, i_allocated_regs
);
4166 if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
4167 /* sync globals if the op has side effects and might trigger
4169 sync_globals(s
, i_allocated_regs
);
4172 /* satisfy the output constraints */
4173 for(k
= 0; k
< nb_oargs
; k
++) {
4174 i
= def
->args_ct
[k
].sort_index
;
4176 arg_ct
= &def
->args_ct
[i
];
4179 /* ENV should not be modified. */
4180 tcg_debug_assert(!temp_readonly(ts
));
4182 if (arg_ct
->oalias
&& !const_args
[arg_ct
->alias_index
]) {
4183 reg
= new_args
[arg_ct
->alias_index
];
4184 } else if (arg_ct
->newreg
) {
4185 reg
= tcg_reg_alloc(s
, arg_ct
->regs
,
4186 i_allocated_regs
| o_allocated_regs
,
4187 op
->output_pref
[k
], ts
->indirect_base
);
4189 reg
= tcg_reg_alloc(s
, arg_ct
->regs
, o_allocated_regs
,
4190 op
->output_pref
[k
], ts
->indirect_base
);
4192 tcg_regset_set_reg(o_allocated_regs
, reg
);
4193 if (ts
->val_type
== TEMP_VAL_REG
) {
4194 s
->reg_to_temp
[ts
->reg
] = NULL
;
4196 ts
->val_type
= TEMP_VAL_REG
;
4199 * Temp value is modified, so the value kept in memory is
4200 * potentially not the same.
4202 ts
->mem_coherent
= 0;
4203 s
->reg_to_temp
[reg
] = ts
;
4208 /* emit instruction */
4209 if (def
->flags
& TCG_OPF_VECTOR
) {
4210 tcg_out_vec_op(s
, op
->opc
, TCGOP_VECL(op
), TCGOP_VECE(op
),
4211 new_args
, const_args
);
4213 tcg_out_op(s
, op
->opc
, new_args
, const_args
);
4216 /* move the outputs in the correct register if needed */
4217 for(i
= 0; i
< nb_oargs
; i
++) {
4218 ts
= arg_temp(op
->args
[i
]);
4220 /* ENV should not be modified. */
4221 tcg_debug_assert(!temp_readonly(ts
));
4223 if (NEED_SYNC_ARG(i
)) {
4224 temp_sync(s
, ts
, o_allocated_regs
, 0, IS_DEAD_ARG(i
));
4225 } else if (IS_DEAD_ARG(i
)) {
4231 static bool tcg_reg_alloc_dup2(TCGContext
*s
, const TCGOp
*op
)
4233 const TCGLifeData arg_life
= op
->life
;
4234 TCGTemp
*ots
, *itsl
, *itsh
;
4235 TCGType vtype
= TCGOP_VECL(op
) + TCG_TYPE_V64
;
4237 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4238 tcg_debug_assert(TCG_TARGET_REG_BITS
== 32);
4239 tcg_debug_assert(TCGOP_VECE(op
) == MO_64
);
4241 ots
= arg_temp(op
->args
[0]);
4242 itsl
= arg_temp(op
->args
[1]);
4243 itsh
= arg_temp(op
->args
[2]);
4245 /* ENV should not be modified. */
4246 tcg_debug_assert(!temp_readonly(ots
));
4248 /* Allocate the output register now. */
4249 if (ots
->val_type
!= TEMP_VAL_REG
) {
4250 TCGRegSet allocated_regs
= s
->reserved_regs
;
4251 TCGRegSet dup_out_regs
=
4252 tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[0].regs
;
4254 /* Make sure to not spill the input registers. */
4255 if (!IS_DEAD_ARG(1) && itsl
->val_type
== TEMP_VAL_REG
) {
4256 tcg_regset_set_reg(allocated_regs
, itsl
->reg
);
4258 if (!IS_DEAD_ARG(2) && itsh
->val_type
== TEMP_VAL_REG
) {
4259 tcg_regset_set_reg(allocated_regs
, itsh
->reg
);
4262 ots
->reg
= tcg_reg_alloc(s
, dup_out_regs
, allocated_regs
,
4263 op
->output_pref
[0], ots
->indirect_base
);
4264 ots
->val_type
= TEMP_VAL_REG
;
4265 ots
->mem_coherent
= 0;
4266 s
->reg_to_temp
[ots
->reg
] = ots
;
4269 /* Promote dup2 of immediates to dupi_vec. */
4270 if (itsl
->val_type
== TEMP_VAL_CONST
&& itsh
->val_type
== TEMP_VAL_CONST
) {
4271 uint64_t val
= deposit64(itsl
->val
, 32, 32, itsh
->val
);
4274 if (val
== dup_const(MO_8
, val
)) {
4276 } else if (val
== dup_const(MO_16
, val
)) {
4278 } else if (val
== dup_const(MO_32
, val
)) {
4282 tcg_out_dupi_vec(s
, vtype
, vece
, ots
->reg
, val
);
4286 /* If the two inputs form one 64-bit value, try dupm_vec. */
4287 if (itsl
+ 1 == itsh
&& itsl
->base_type
== TCG_TYPE_I64
) {
4288 if (!itsl
->mem_coherent
) {
4289 temp_sync(s
, itsl
, s
->reserved_regs
, 0, 0);
4291 if (!itsh
->mem_coherent
) {
4292 temp_sync(s
, itsh
, s
->reserved_regs
, 0, 0);
4294 #ifdef HOST_WORDS_BIGENDIAN
4295 TCGTemp
*its
= itsh
;
4297 TCGTemp
*its
= itsl
;
4299 if (tcg_out_dupm_vec(s
, vtype
, MO_64
, ots
->reg
,
4300 its
->mem_base
->reg
, its
->mem_offset
)) {
4305 /* Fall back to generic expansion. */
4309 if (IS_DEAD_ARG(1)) {
4312 if (IS_DEAD_ARG(2)) {
4315 if (NEED_SYNC_ARG(0)) {
4316 temp_sync(s
, ots
, s
->reserved_regs
, 0, IS_DEAD_ARG(0));
4317 } else if (IS_DEAD_ARG(0)) {
4323 #ifdef TCG_TARGET_STACK_GROWSUP
4324 #define STACK_DIR(x) (-(x))
4326 #define STACK_DIR(x) (x)
4329 static void tcg_reg_alloc_call(TCGContext
*s
, TCGOp
*op
)
4331 const int nb_oargs
= TCGOP_CALLO(op
);
4332 const int nb_iargs
= TCGOP_CALLI(op
);
4333 const TCGLifeData arg_life
= op
->life
;
4334 int flags
, nb_regs
, i
;
4338 intptr_t stack_offset
;
4339 size_t call_stack_size
;
4340 tcg_insn_unit
*func_addr
;
4342 TCGRegSet allocated_regs
;
4344 func_addr
= (tcg_insn_unit
*)(intptr_t)op
->args
[nb_oargs
+ nb_iargs
];
4345 flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
4347 nb_regs
= ARRAY_SIZE(tcg_target_call_iarg_regs
);
4348 if (nb_regs
> nb_iargs
) {
4352 /* assign stack slots first */
4353 call_stack_size
= (nb_iargs
- nb_regs
) * sizeof(tcg_target_long
);
4354 call_stack_size
= (call_stack_size
+ TCG_TARGET_STACK_ALIGN
- 1) &
4355 ~(TCG_TARGET_STACK_ALIGN
- 1);
4356 allocate_args
= (call_stack_size
> TCG_STATIC_CALL_ARGS_SIZE
);
4357 if (allocate_args
) {
4358 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4359 preallocate call stack */
4363 stack_offset
= TCG_TARGET_CALL_STACK_OFFSET
;
4364 for (i
= nb_regs
; i
< nb_iargs
; i
++) {
4365 arg
= op
->args
[nb_oargs
+ i
];
4366 #ifdef TCG_TARGET_STACK_GROWSUP
4367 stack_offset
-= sizeof(tcg_target_long
);
4369 if (arg
!= TCG_CALL_DUMMY_ARG
) {
4371 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
4372 s
->reserved_regs
, 0);
4373 tcg_out_st(s
, ts
->type
, ts
->reg
, TCG_REG_CALL_STACK
, stack_offset
);
4375 #ifndef TCG_TARGET_STACK_GROWSUP
4376 stack_offset
+= sizeof(tcg_target_long
);
4380 /* assign input registers */
4381 allocated_regs
= s
->reserved_regs
;
4382 for (i
= 0; i
< nb_regs
; i
++) {
4383 arg
= op
->args
[nb_oargs
+ i
];
4384 if (arg
!= TCG_CALL_DUMMY_ARG
) {
4386 reg
= tcg_target_call_iarg_regs
[i
];
4388 if (ts
->val_type
== TEMP_VAL_REG
) {
4389 if (ts
->reg
!= reg
) {
4390 tcg_reg_free(s
, reg
, allocated_regs
);
4391 if (!tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
)) {
4393 * Cross register class move not supported. Sync the
4394 * temp back to its slot and load from there.
4396 temp_sync(s
, ts
, allocated_regs
, 0, 0);
4397 tcg_out_ld(s
, ts
->type
, reg
,
4398 ts
->mem_base
->reg
, ts
->mem_offset
);
4402 TCGRegSet arg_set
= 0;
4404 tcg_reg_free(s
, reg
, allocated_regs
);
4405 tcg_regset_set_reg(arg_set
, reg
);
4406 temp_load(s
, ts
, arg_set
, allocated_regs
, 0);
4409 tcg_regset_set_reg(allocated_regs
, reg
);
4413 /* mark dead temporaries and free the associated registers */
4414 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
4415 if (IS_DEAD_ARG(i
)) {
4416 temp_dead(s
, arg_temp(op
->args
[i
]));
4420 /* clobber call registers */
4421 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
4422 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
4423 tcg_reg_free(s
, i
, allocated_regs
);
4427 /* Save globals if they might be written by the helper, sync them if
4428 they might be read. */
4429 if (flags
& TCG_CALL_NO_READ_GLOBALS
) {
4431 } else if (flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
4432 sync_globals(s
, allocated_regs
);
4434 save_globals(s
, allocated_regs
);
4437 tcg_out_call(s
, func_addr
);
4439 /* assign output registers and emit moves if needed */
4440 for(i
= 0; i
< nb_oargs
; i
++) {
4444 /* ENV should not be modified. */
4445 tcg_debug_assert(!temp_readonly(ts
));
4447 reg
= tcg_target_call_oarg_regs
[i
];
4448 tcg_debug_assert(s
->reg_to_temp
[reg
] == NULL
);
4449 if (ts
->val_type
== TEMP_VAL_REG
) {
4450 s
->reg_to_temp
[ts
->reg
] = NULL
;
4452 ts
->val_type
= TEMP_VAL_REG
;
4454 ts
->mem_coherent
= 0;
4455 s
->reg_to_temp
[reg
] = ts
;
4456 if (NEED_SYNC_ARG(i
)) {
4457 temp_sync(s
, ts
, allocated_regs
, 0, IS_DEAD_ARG(i
));
4458 } else if (IS_DEAD_ARG(i
)) {
4464 #ifdef CONFIG_PROFILER
4466 /* avoid copy/paste errors */
4467 #define PROF_ADD(to, from, field) \
4469 (to)->field += qatomic_read(&((from)->field)); \
4472 #define PROF_MAX(to, from, field) \
4474 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4475 if (val__ > (to)->field) { \
4476 (to)->field = val__; \
4480 /* Pass in a zero'ed @prof */
4482 void tcg_profile_snapshot(TCGProfile
*prof
, bool counters
, bool table
)
4484 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
4487 for (i
= 0; i
< n_ctxs
; i
++) {
4488 TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
4489 const TCGProfile
*orig
= &s
->prof
;
4492 PROF_ADD(prof
, orig
, cpu_exec_time
);
4493 PROF_ADD(prof
, orig
, tb_count1
);
4494 PROF_ADD(prof
, orig
, tb_count
);
4495 PROF_ADD(prof
, orig
, op_count
);
4496 PROF_MAX(prof
, orig
, op_count_max
);
4497 PROF_ADD(prof
, orig
, temp_count
);
4498 PROF_MAX(prof
, orig
, temp_count_max
);
4499 PROF_ADD(prof
, orig
, del_op_count
);
4500 PROF_ADD(prof
, orig
, code_in_len
);
4501 PROF_ADD(prof
, orig
, code_out_len
);
4502 PROF_ADD(prof
, orig
, search_out_len
);
4503 PROF_ADD(prof
, orig
, interm_time
);
4504 PROF_ADD(prof
, orig
, code_time
);
4505 PROF_ADD(prof
, orig
, la_time
);
4506 PROF_ADD(prof
, orig
, opt_time
);
4507 PROF_ADD(prof
, orig
, restore_count
);
4508 PROF_ADD(prof
, orig
, restore_time
);
4513 for (i
= 0; i
< NB_OPS
; i
++) {
4514 PROF_ADD(prof
, orig
, table_op_count
[i
]);
4523 static void tcg_profile_snapshot_counters(TCGProfile
*prof
)
4525 tcg_profile_snapshot(prof
, true, false);
4528 static void tcg_profile_snapshot_table(TCGProfile
*prof
)
4530 tcg_profile_snapshot(prof
, false, true);
4533 void tcg_dump_op_count(void)
4535 TCGProfile prof
= {};
4538 tcg_profile_snapshot_table(&prof
);
4539 for (i
= 0; i
< NB_OPS
; i
++) {
4540 qemu_printf("%s %" PRId64
"\n", tcg_op_defs
[i
].name
,
4541 prof
.table_op_count
[i
]);
4545 int64_t tcg_cpu_exec_time(void)
4547 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
4551 for (i
= 0; i
< n_ctxs
; i
++) {
4552 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
4553 const TCGProfile
*prof
= &s
->prof
;
4555 ret
+= qatomic_read(&prof
->cpu_exec_time
);
4560 void tcg_dump_op_count(void)
4562 qemu_printf("[TCG profiler not compiled]\n");
4565 int64_t tcg_cpu_exec_time(void)
4567 error_report("%s: TCG profiler not compiled", __func__
);
4573 int tcg_gen_code(TCGContext
*s
, TranslationBlock
*tb
)
4575 #ifdef CONFIG_PROFILER
4576 TCGProfile
*prof
= &s
->prof
;
4581 #ifdef CONFIG_PROFILER
4585 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
4588 qatomic_set(&prof
->op_count
, prof
->op_count
+ n
);
4589 if (n
> prof
->op_count_max
) {
4590 qatomic_set(&prof
->op_count_max
, n
);
4594 qatomic_set(&prof
->temp_count
, prof
->temp_count
+ n
);
4595 if (n
> prof
->temp_count_max
) {
4596 qatomic_set(&prof
->temp_count_max
, n
);
4602 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP
)
4603 && qemu_log_in_addr_range(tb
->pc
))) {
4604 FILE *logfile
= qemu_log_lock();
4606 tcg_dump_ops(s
, false);
4608 qemu_log_unlock(logfile
);
4612 #ifdef CONFIG_DEBUG_TCG
4613 /* Ensure all labels referenced have been emitted. */
4618 QSIMPLEQ_FOREACH(l
, &s
->labels
, next
) {
4619 if (unlikely(!l
->present
) && l
->refs
) {
4620 qemu_log_mask(CPU_LOG_TB_OP
,
4621 "$L%d referenced but not present.\n", l
->id
);
4629 #ifdef CONFIG_PROFILER
4630 qatomic_set(&prof
->opt_time
, prof
->opt_time
- profile_getclock());
4633 #ifdef USE_TCG_OPTIMIZATIONS
4637 #ifdef CONFIG_PROFILER
4638 qatomic_set(&prof
->opt_time
, prof
->opt_time
+ profile_getclock());
4639 qatomic_set(&prof
->la_time
, prof
->la_time
- profile_getclock());
4642 reachable_code_pass(s
);
4645 if (s
->nb_indirects
> 0) {
4647 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND
)
4648 && qemu_log_in_addr_range(tb
->pc
))) {
4649 FILE *logfile
= qemu_log_lock();
4650 qemu_log("OP before indirect lowering:\n");
4651 tcg_dump_ops(s
, false);
4653 qemu_log_unlock(logfile
);
4656 /* Replace indirect temps with direct temps. */
4657 if (liveness_pass_2(s
)) {
4658 /* If changes were made, re-run liveness. */
4663 #ifdef CONFIG_PROFILER
4664 qatomic_set(&prof
->la_time
, prof
->la_time
+ profile_getclock());
4668 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT
)
4669 && qemu_log_in_addr_range(tb
->pc
))) {
4670 FILE *logfile
= qemu_log_lock();
4671 qemu_log("OP after optimization and liveness analysis:\n");
4672 tcg_dump_ops(s
, true);
4674 qemu_log_unlock(logfile
);
4678 tcg_reg_alloc_start(s
);
4681 * Reset the buffer pointers when restarting after overflow.
4682 * TODO: Move this into translate-all.c with the rest of the
4683 * buffer management. Having only this done here is confusing.
4685 s
->code_buf
= tcg_splitwx_to_rw(tb
->tc
.ptr
);
4686 s
->code_ptr
= s
->code_buf
;
4688 #ifdef TCG_TARGET_NEED_LDST_LABELS
4689 QSIMPLEQ_INIT(&s
->ldst_labels
);
4691 #ifdef TCG_TARGET_NEED_POOL_LABELS
4692 s
->pool_labels
= NULL
;
4696 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
4697 TCGOpcode opc
= op
->opc
;
4699 #ifdef CONFIG_PROFILER
4700 qatomic_set(&prof
->table_op_count
[opc
], prof
->table_op_count
[opc
] + 1);
4704 case INDEX_op_mov_i32
:
4705 case INDEX_op_mov_i64
:
4706 case INDEX_op_mov_vec
:
4707 tcg_reg_alloc_mov(s
, op
);
4709 case INDEX_op_dup_vec
:
4710 tcg_reg_alloc_dup(s
, op
);
4712 case INDEX_op_insn_start
:
4713 if (num_insns
>= 0) {
4714 size_t off
= tcg_current_code_size(s
);
4715 s
->gen_insn_end_off
[num_insns
] = off
;
4716 /* Assert that we do not overflow our stored offset. */
4717 assert(s
->gen_insn_end_off
[num_insns
] == off
);
4720 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
4722 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4723 a
= deposit64(op
->args
[i
* 2], 32, 32, op
->args
[i
* 2 + 1]);
4727 s
->gen_insn_data
[num_insns
][i
] = a
;
4730 case INDEX_op_discard
:
4731 temp_dead(s
, arg_temp(op
->args
[0]));
4733 case INDEX_op_set_label
:
4734 tcg_reg_alloc_bb_end(s
, s
->reserved_regs
);
4735 tcg_out_label(s
, arg_label(op
->args
[0]));
4738 tcg_reg_alloc_call(s
, op
);
4740 case INDEX_op_dup2_vec
:
4741 if (tcg_reg_alloc_dup2(s
, op
)) {
4746 /* Sanity check that we've not introduced any unhandled opcodes. */
4747 tcg_debug_assert(tcg_op_supported(opc
));
4748 /* Note: in order to speed up the code, it would be much
4749 faster to have specialized register allocator functions for
4750 some common argument patterns */
4751 tcg_reg_alloc_op(s
, op
);
4754 #ifdef CONFIG_DEBUG_TCG
4757 /* Test for (pending) buffer overflow. The assumption is that any
4758 one operation beginning below the high water mark cannot overrun
4759 the buffer completely. Thus we can test for overflow after
4760 generating code without having to check during generation. */
4761 if (unlikely((void *)s
->code_ptr
> s
->code_gen_highwater
)) {
4764 /* Test for TB overflow, as seen by gen_insn_end_off. */
4765 if (unlikely(tcg_current_code_size(s
) > UINT16_MAX
)) {
4769 tcg_debug_assert(num_insns
>= 0);
4770 s
->gen_insn_end_off
[num_insns
] = tcg_current_code_size(s
);
4772 /* Generate TB finalization at the end of block */
4773 #ifdef TCG_TARGET_NEED_LDST_LABELS
4774 i
= tcg_out_ldst_finalize(s
);
4779 #ifdef TCG_TARGET_NEED_POOL_LABELS
4780 i
= tcg_out_pool_finalize(s
);
4785 if (!tcg_resolve_relocs(s
)) {
4789 #ifndef CONFIG_TCG_INTERPRETER
4790 /* flush instruction cache */
4791 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s
->code_buf
),
4792 (uintptr_t)s
->code_buf
,
4793 tcg_ptr_byte_diff(s
->code_ptr
, s
->code_buf
));
4796 return tcg_current_code_size(s
);
4799 #ifdef CONFIG_PROFILER
4800 void tcg_dump_info(void)
4802 TCGProfile prof
= {};
4803 const TCGProfile
*s
;
4805 int64_t tb_div_count
;
4808 tcg_profile_snapshot_counters(&prof
);
4810 tb_count
= s
->tb_count
;
4811 tb_div_count
= tb_count
? tb_count
: 1;
4812 tot
= s
->interm_time
+ s
->code_time
;
4814 qemu_printf("JIT cycles %" PRId64
" (%0.3f s at 2.4 GHz)\n",
4816 qemu_printf("translated TBs %" PRId64
" (aborted=%" PRId64
4818 tb_count
, s
->tb_count1
- tb_count
,
4819 (double)(s
->tb_count1
- s
->tb_count
)
4820 / (s
->tb_count1
? s
->tb_count1
: 1) * 100.0);
4821 qemu_printf("avg ops/TB %0.1f max=%d\n",
4822 (double)s
->op_count
/ tb_div_count
, s
->op_count_max
);
4823 qemu_printf("deleted ops/TB %0.2f\n",
4824 (double)s
->del_op_count
/ tb_div_count
);
4825 qemu_printf("avg temps/TB %0.2f max=%d\n",
4826 (double)s
->temp_count
/ tb_div_count
, s
->temp_count_max
);
4827 qemu_printf("avg host code/TB %0.1f\n",
4828 (double)s
->code_out_len
/ tb_div_count
);
4829 qemu_printf("avg search data/TB %0.1f\n",
4830 (double)s
->search_out_len
/ tb_div_count
);
4832 qemu_printf("cycles/op %0.1f\n",
4833 s
->op_count
? (double)tot
/ s
->op_count
: 0);
4834 qemu_printf("cycles/in byte %0.1f\n",
4835 s
->code_in_len
? (double)tot
/ s
->code_in_len
: 0);
4836 qemu_printf("cycles/out byte %0.1f\n",
4837 s
->code_out_len
? (double)tot
/ s
->code_out_len
: 0);
4838 qemu_printf("cycles/search byte %0.1f\n",
4839 s
->search_out_len
? (double)tot
/ s
->search_out_len
: 0);
4843 qemu_printf(" gen_interm time %0.1f%%\n",
4844 (double)s
->interm_time
/ tot
* 100.0);
4845 qemu_printf(" gen_code time %0.1f%%\n",
4846 (double)s
->code_time
/ tot
* 100.0);
4847 qemu_printf("optim./code time %0.1f%%\n",
4848 (double)s
->opt_time
/ (s
->code_time
? s
->code_time
: 1)
4850 qemu_printf("liveness/code time %0.1f%%\n",
4851 (double)s
->la_time
/ (s
->code_time
? s
->code_time
: 1) * 100.0);
4852 qemu_printf("cpu_restore count %" PRId64
"\n",
4854 qemu_printf(" avg cycles %0.1f\n",
4855 s
->restore_count
? (double)s
->restore_time
/ s
->restore_count
: 0);
4858 void tcg_dump_info(void)
4860 qemu_printf("[TCG profiler not compiled]\n");
4864 #ifdef ELF_HOST_MACHINE
4865 /* In order to use this feature, the backend needs to do three things:
4867 (1) Define ELF_HOST_MACHINE to indicate both what value to
4868 put into the ELF image and to indicate support for the feature.
4870 (2) Define tcg_register_jit. This should create a buffer containing
4871 the contents of a .debug_frame section that describes the post-
4872 prologue unwind info for the tcg machine.
4874 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4877 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4884 struct jit_code_entry
{
4885 struct jit_code_entry
*next_entry
;
4886 struct jit_code_entry
*prev_entry
;
4887 const void *symfile_addr
;
4888 uint64_t symfile_size
;
4891 struct jit_descriptor
{
4893 uint32_t action_flag
;
4894 struct jit_code_entry
*relevant_entry
;
4895 struct jit_code_entry
*first_entry
;
4898 void __jit_debug_register_code(void) __attribute__((noinline
));
4899 void __jit_debug_register_code(void)
4904 /* Must statically initialize the version, because GDB may check
4905 the version before we can set it. */
4906 struct jit_descriptor __jit_debug_descriptor
= { 1, 0, 0, 0 };
4908 /* End GDB interface. */
4910 static int find_string(const char *strtab
, const char *str
)
4912 const char *p
= strtab
+ 1;
4915 if (strcmp(p
, str
) == 0) {
4922 static void tcg_register_jit_int(const void *buf_ptr
, size_t buf_size
,
4923 const void *debug_frame
,
4924 size_t debug_frame_size
)
4926 struct __attribute__((packed
)) DebugInfo
{
4933 uintptr_t cu_low_pc
;
4934 uintptr_t cu_high_pc
;
4937 uintptr_t fn_low_pc
;
4938 uintptr_t fn_high_pc
;
4947 struct DebugInfo di
;
4952 struct ElfImage
*img
;
4954 static const struct ElfImage img_template
= {
4956 .e_ident
[EI_MAG0
] = ELFMAG0
,
4957 .e_ident
[EI_MAG1
] = ELFMAG1
,
4958 .e_ident
[EI_MAG2
] = ELFMAG2
,
4959 .e_ident
[EI_MAG3
] = ELFMAG3
,
4960 .e_ident
[EI_CLASS
] = ELF_CLASS
,
4961 .e_ident
[EI_DATA
] = ELF_DATA
,
4962 .e_ident
[EI_VERSION
] = EV_CURRENT
,
4964 .e_machine
= ELF_HOST_MACHINE
,
4965 .e_version
= EV_CURRENT
,
4966 .e_phoff
= offsetof(struct ElfImage
, phdr
),
4967 .e_shoff
= offsetof(struct ElfImage
, shdr
),
4968 .e_ehsize
= sizeof(ElfW(Shdr
)),
4969 .e_phentsize
= sizeof(ElfW(Phdr
)),
4971 .e_shentsize
= sizeof(ElfW(Shdr
)),
4972 .e_shnum
= ARRAY_SIZE(img
->shdr
),
4973 .e_shstrndx
= ARRAY_SIZE(img
->shdr
) - 1,
4974 #ifdef ELF_HOST_FLAGS
4975 .e_flags
= ELF_HOST_FLAGS
,
4978 .e_ident
[EI_OSABI
] = ELF_OSABI
,
4986 [0] = { .sh_type
= SHT_NULL
},
4987 /* Trick: The contents of code_gen_buffer are not present in
4988 this fake ELF file; that got allocated elsewhere. Therefore
4989 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4990 will not look for contents. We can record any address. */
4992 .sh_type
= SHT_NOBITS
,
4993 .sh_flags
= SHF_EXECINSTR
| SHF_ALLOC
,
4995 [2] = { /* .debug_info */
4996 .sh_type
= SHT_PROGBITS
,
4997 .sh_offset
= offsetof(struct ElfImage
, di
),
4998 .sh_size
= sizeof(struct DebugInfo
),
5000 [3] = { /* .debug_abbrev */
5001 .sh_type
= SHT_PROGBITS
,
5002 .sh_offset
= offsetof(struct ElfImage
, da
),
5003 .sh_size
= sizeof(img
->da
),
5005 [4] = { /* .debug_frame */
5006 .sh_type
= SHT_PROGBITS
,
5007 .sh_offset
= sizeof(struct ElfImage
),
5009 [5] = { /* .symtab */
5010 .sh_type
= SHT_SYMTAB
,
5011 .sh_offset
= offsetof(struct ElfImage
, sym
),
5012 .sh_size
= sizeof(img
->sym
),
5014 .sh_link
= ARRAY_SIZE(img
->shdr
) - 1,
5015 .sh_entsize
= sizeof(ElfW(Sym
)),
5017 [6] = { /* .strtab */
5018 .sh_type
= SHT_STRTAB
,
5019 .sh_offset
= offsetof(struct ElfImage
, str
),
5020 .sh_size
= sizeof(img
->str
),
5024 [1] = { /* code_gen_buffer */
5025 .st_info
= ELF_ST_INFO(STB_GLOBAL
, STT_FUNC
),
5030 .len
= sizeof(struct DebugInfo
) - 4,
5032 .ptr_size
= sizeof(void *),
5034 .cu_lang
= 0x8001, /* DW_LANG_Mips_Assembler */
5036 .fn_name
= "code_gen_buffer"
5039 1, /* abbrev number (the cu) */
5040 0x11, 1, /* DW_TAG_compile_unit, has children */
5041 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
5042 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5043 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5044 0, 0, /* end of abbrev */
5045 2, /* abbrev number (the fn) */
5046 0x2e, 0, /* DW_TAG_subprogram, no children */
5047 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
5048 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5049 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5050 0, 0, /* end of abbrev */
5051 0 /* no more abbrev */
5053 .str
= "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5054 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5057 /* We only need a single jit entry; statically allocate it. */
5058 static struct jit_code_entry one_entry
;
5060 uintptr_t buf
= (uintptr_t)buf_ptr
;
5061 size_t img_size
= sizeof(struct ElfImage
) + debug_frame_size
;
5062 DebugFrameHeader
*dfh
;
5064 img
= g_malloc(img_size
);
5065 *img
= img_template
;
5067 img
->phdr
.p_vaddr
= buf
;
5068 img
->phdr
.p_paddr
= buf
;
5069 img
->phdr
.p_memsz
= buf_size
;
5071 img
->shdr
[1].sh_name
= find_string(img
->str
, ".text");
5072 img
->shdr
[1].sh_addr
= buf
;
5073 img
->shdr
[1].sh_size
= buf_size
;
5075 img
->shdr
[2].sh_name
= find_string(img
->str
, ".debug_info");
5076 img
->shdr
[3].sh_name
= find_string(img
->str
, ".debug_abbrev");
5078 img
->shdr
[4].sh_name
= find_string(img
->str
, ".debug_frame");
5079 img
->shdr
[4].sh_size
= debug_frame_size
;
5081 img
->shdr
[5].sh_name
= find_string(img
->str
, ".symtab");
5082 img
->shdr
[6].sh_name
= find_string(img
->str
, ".strtab");
5084 img
->sym
[1].st_name
= find_string(img
->str
, "code_gen_buffer");
5085 img
->sym
[1].st_value
= buf
;
5086 img
->sym
[1].st_size
= buf_size
;
5088 img
->di
.cu_low_pc
= buf
;
5089 img
->di
.cu_high_pc
= buf
+ buf_size
;
5090 img
->di
.fn_low_pc
= buf
;
5091 img
->di
.fn_high_pc
= buf
+ buf_size
;
5093 dfh
= (DebugFrameHeader
*)(img
+ 1);
5094 memcpy(dfh
, debug_frame
, debug_frame_size
);
5095 dfh
->fde
.func_start
= buf
;
5096 dfh
->fde
.func_len
= buf_size
;
5099 /* Enable this block to be able to debug the ELF image file creation.
5100 One can use readelf, objdump, or other inspection utilities. */
5102 FILE *f
= fopen("/tmp/qemu.jit", "w+b");
5104 if (fwrite(img
, img_size
, 1, f
) != img_size
) {
5105 /* Avoid stupid unused return value warning for fwrite. */
5112 one_entry
.symfile_addr
= img
;
5113 one_entry
.symfile_size
= img_size
;
5115 __jit_debug_descriptor
.action_flag
= JIT_REGISTER_FN
;
5116 __jit_debug_descriptor
.relevant_entry
= &one_entry
;
5117 __jit_debug_descriptor
.first_entry
= &one_entry
;
5118 __jit_debug_register_code();
5121 /* No support for the feature. Provide the entry point expected by exec.c,
5122 and implement the internal function we declared earlier. */
5124 static void tcg_register_jit_int(const void *buf
, size_t size
,
5125 const void *debug_frame
,
5126 size_t debug_frame_size
)
5130 void tcg_register_jit(const void *buf
, size_t buf_size
)
5133 #endif /* ELF_HOST_MACHINE */
5135 #if !TCG_TARGET_MAYBE_vec
5136 void tcg_expand_vec_op(TCGOpcode o
, TCGType t
, unsigned e
, TCGArg a0
, ...)
5138 g_assert_not_reached();