2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
43 #define NO_CPU_IO_DEFS
46 #include "exec/exec-all.h"
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
52 #include "tcg/tcg-op.h"
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS ELFCLASS32
57 # define ELF_CLASS ELFCLASS64
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA ELFDATA2MSB
62 # define ELF_DATA ELFDATA2LSB
67 #include "sysemu/sysemu.h"
69 /* Forward declarations for functions declared in tcg-target.c.inc and
71 static void tcg_target_init(TCGContext
*s
);
72 static void tcg_target_qemu_prologue(TCGContext
*s
);
73 static bool patch_reloc(tcg_insn_unit
*code_ptr
, int type
,
74 intptr_t value
, intptr_t addend
);
76 /* The CIE and FDE header definitions will be common to all hosts. */
78 uint32_t len
__attribute__((aligned((sizeof(void *)))));
84 uint8_t return_column
;
87 typedef struct QEMU_PACKED
{
88 uint32_t len
__attribute__((aligned((sizeof(void *)))));
92 } DebugFrameFDEHeader
;
94 typedef struct QEMU_PACKED
{
96 DebugFrameFDEHeader fde
;
99 static void tcg_register_jit_int(const void *buf
, size_t size
,
100 const void *debug_frame
,
101 size_t debug_frame_size
)
102 __attribute__((unused
));
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg1
,
107 static bool tcg_out_mov(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg
);
108 static void tcg_out_movi(TCGContext
*s
, TCGType type
,
109 TCGReg ret
, tcg_target_long arg
);
110 static void tcg_out_op(TCGContext
*s
, TCGOpcode opc
, const TCGArg
*args
,
111 const int *const_args
);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
114 TCGReg dst
, TCGReg src
);
115 static bool tcg_out_dupm_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
116 TCGReg dst
, TCGReg base
, intptr_t offset
);
117 static void tcg_out_dupi_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
118 TCGReg dst
, int64_t arg
);
119 static void tcg_out_vec_op(TCGContext
*s
, TCGOpcode opc
, unsigned vecl
,
120 unsigned vece
, const TCGArg
*args
,
121 const int *const_args
);
123 static inline bool tcg_out_dup_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
124 TCGReg dst
, TCGReg src
)
126 g_assert_not_reached();
128 static inline bool tcg_out_dupm_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
129 TCGReg dst
, TCGReg base
, intptr_t offset
)
131 g_assert_not_reached();
133 static inline void tcg_out_dupi_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
134 TCGReg dst
, int64_t arg
)
136 g_assert_not_reached();
138 static inline void tcg_out_vec_op(TCGContext
*s
, TCGOpcode opc
, unsigned vecl
,
139 unsigned vece
, const TCGArg
*args
,
140 const int *const_args
)
142 g_assert_not_reached();
145 static void tcg_out_st(TCGContext
*s
, TCGType type
, TCGReg arg
, TCGReg arg1
,
147 static bool tcg_out_sti(TCGContext
*s
, TCGType type
, TCGArg val
,
148 TCGReg base
, intptr_t ofs
);
149 static void tcg_out_call(TCGContext
*s
, const tcg_insn_unit
*target
);
150 static int tcg_target_const_match(tcg_target_long val
, TCGType type
,
151 const TCGArgConstraint
*arg_ct
);
152 #ifdef TCG_TARGET_NEED_LDST_LABELS
153 static int tcg_out_ldst_finalize(TCGContext
*s
);
156 #define TCG_HIGHWATER 1024
158 static TCGContext
**tcg_ctxs
;
159 static unsigned int n_tcg_ctxs
;
160 TCGv_env cpu_env
= 0;
161 const void *tcg_code_gen_epilogue
;
162 uintptr_t tcg_splitwx_diff
;
164 #ifndef CONFIG_TCG_INTERPRETER
165 tcg_prologue_fn
*tcg_qemu_tb_exec
;
168 struct tcg_region_tree
{
171 /* padding to avoid false sharing is computed at run-time */
175 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
176 * dynamically allocate from as demand dictates. Given appropriate region
177 * sizing, this minimizes flushes even when some TCG threads generate a lot
178 * more code than others.
180 struct tcg_region_state
{
183 /* fields set at init time */
188 size_t size
; /* size of one region */
189 size_t stride
; /* .size + guard size */
191 /* fields protected by the lock */
192 size_t current
; /* current region index */
193 size_t agg_size_full
; /* aggregate size of full regions */
196 static struct tcg_region_state region
;
198 * This is an array of struct tcg_region_tree's, with padding.
199 * We use void * to simplify the computation of region_trees[i]; each
200 * struct is found every tree_size bytes.
202 static void *region_trees
;
203 static size_t tree_size
;
204 static TCGRegSet tcg_target_available_regs
[TCG_TYPE_COUNT
];
205 static TCGRegSet tcg_target_call_clobber_regs
;
207 #if TCG_TARGET_INSN_UNIT_SIZE == 1
208 static __attribute__((unused
)) inline void tcg_out8(TCGContext
*s
, uint8_t v
)
213 static __attribute__((unused
)) inline void tcg_patch8(tcg_insn_unit
*p
,
220 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
221 static __attribute__((unused
)) inline void tcg_out16(TCGContext
*s
, uint16_t v
)
223 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
226 tcg_insn_unit
*p
= s
->code_ptr
;
227 memcpy(p
, &v
, sizeof(v
));
228 s
->code_ptr
= p
+ (2 / TCG_TARGET_INSN_UNIT_SIZE
);
232 static __attribute__((unused
)) inline void tcg_patch16(tcg_insn_unit
*p
,
235 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
238 memcpy(p
, &v
, sizeof(v
));
243 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
244 static __attribute__((unused
)) inline void tcg_out32(TCGContext
*s
, uint32_t v
)
246 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
249 tcg_insn_unit
*p
= s
->code_ptr
;
250 memcpy(p
, &v
, sizeof(v
));
251 s
->code_ptr
= p
+ (4 / TCG_TARGET_INSN_UNIT_SIZE
);
255 static __attribute__((unused
)) inline void tcg_patch32(tcg_insn_unit
*p
,
258 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
261 memcpy(p
, &v
, sizeof(v
));
266 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
267 static __attribute__((unused
)) inline void tcg_out64(TCGContext
*s
, uint64_t v
)
269 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
272 tcg_insn_unit
*p
= s
->code_ptr
;
273 memcpy(p
, &v
, sizeof(v
));
274 s
->code_ptr
= p
+ (8 / TCG_TARGET_INSN_UNIT_SIZE
);
278 static __attribute__((unused
)) inline void tcg_patch64(tcg_insn_unit
*p
,
281 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
284 memcpy(p
, &v
, sizeof(v
));
289 /* label relocation processing */
291 static void tcg_out_reloc(TCGContext
*s
, tcg_insn_unit
*code_ptr
, int type
,
292 TCGLabel
*l
, intptr_t addend
)
294 TCGRelocation
*r
= tcg_malloc(sizeof(TCGRelocation
));
299 QSIMPLEQ_INSERT_TAIL(&l
->relocs
, r
, next
);
302 static void tcg_out_label(TCGContext
*s
, TCGLabel
*l
)
304 tcg_debug_assert(!l
->has_value
);
306 l
->u
.value_ptr
= tcg_splitwx_to_rx(s
->code_ptr
);
309 TCGLabel
*gen_new_label(void)
311 TCGContext
*s
= tcg_ctx
;
312 TCGLabel
*l
= tcg_malloc(sizeof(TCGLabel
));
314 memset(l
, 0, sizeof(TCGLabel
));
315 l
->id
= s
->nb_labels
++;
316 QSIMPLEQ_INIT(&l
->relocs
);
318 QSIMPLEQ_INSERT_TAIL(&s
->labels
, l
, next
);
323 static bool tcg_resolve_relocs(TCGContext
*s
)
327 QSIMPLEQ_FOREACH(l
, &s
->labels
, next
) {
329 uintptr_t value
= l
->u
.value
;
331 QSIMPLEQ_FOREACH(r
, &l
->relocs
, next
) {
332 if (!patch_reloc(r
->ptr
, r
->type
, value
, r
->addend
)) {
340 static void set_jmp_reset_offset(TCGContext
*s
, int which
)
343 * We will check for overflow at the end of the opcode loop in
344 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
346 s
->tb_jmp_reset_offset
[which
] = tcg_current_code_size(s
);
349 #define C_PFX1(P, A) P##A
350 #define C_PFX2(P, A, B) P##A##_##B
351 #define C_PFX3(P, A, B, C) P##A##_##B##_##C
352 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
353 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
354 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
356 /* Define an enumeration for the various combinations. */
358 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
359 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
360 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
361 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
363 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
364 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
365 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
366 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
368 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
370 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
371 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
372 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
373 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
376 #include "tcg-target-con-set.h"
377 } TCGConstraintSetIndex
;
379 static TCGConstraintSetIndex
tcg_target_op_def(TCGOpcode
);
395 /* Put all of the constraint sets into an array, indexed by the enum. */
397 #define C_O0_I1(I1) { .args_ct_str = { #I1 } },
398 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
399 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
400 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
402 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
403 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
404 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
405 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
407 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
409 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
410 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
411 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
412 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
414 static const TCGTargetOpDef constraint_sets
[] = {
415 #include "tcg-target-con-set.h"
433 /* Expand the enumerator to be returned from tcg_target_op_def(). */
435 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
436 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
437 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
438 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
440 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
441 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
442 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
443 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
445 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
447 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
448 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
449 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
450 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
452 #include "tcg-target.c.inc"
454 /* compare a pointer @ptr and a tb_tc @s */
455 static int ptr_cmp_tb_tc(const void *ptr
, const struct tb_tc
*s
)
457 if (ptr
>= s
->ptr
+ s
->size
) {
459 } else if (ptr
< s
->ptr
) {
465 static gint
tb_tc_cmp(gconstpointer ap
, gconstpointer bp
)
467 const struct tb_tc
*a
= ap
;
468 const struct tb_tc
*b
= bp
;
471 * When both sizes are set, we know this isn't a lookup.
472 * This is the most likely case: every TB must be inserted; lookups
473 * are a lot less frequent.
475 if (likely(a
->size
&& b
->size
)) {
476 if (a
->ptr
> b
->ptr
) {
478 } else if (a
->ptr
< b
->ptr
) {
481 /* a->ptr == b->ptr should happen only on deletions */
482 g_assert(a
->size
== b
->size
);
486 * All lookups have either .size field set to 0.
487 * From the glib sources we see that @ap is always the lookup key. However
488 * the docs provide no guarantee, so we just mark this case as likely.
490 if (likely(a
->size
== 0)) {
491 return ptr_cmp_tb_tc(a
->ptr
, b
);
493 return ptr_cmp_tb_tc(b
->ptr
, a
);
496 static void tcg_region_trees_init(void)
500 tree_size
= ROUND_UP(sizeof(struct tcg_region_tree
), qemu_dcache_linesize
);
501 region_trees
= qemu_memalign(qemu_dcache_linesize
, region
.n
* tree_size
);
502 for (i
= 0; i
< region
.n
; i
++) {
503 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
505 qemu_mutex_init(&rt
->lock
);
506 rt
->tree
= g_tree_new(tb_tc_cmp
);
510 static struct tcg_region_tree
*tc_ptr_to_region_tree(const void *cp
)
512 void *p
= tcg_splitwx_to_rw(cp
);
515 if (p
< region
.start_aligned
) {
518 ptrdiff_t offset
= p
- region
.start_aligned
;
520 if (offset
> region
.stride
* (region
.n
- 1)) {
521 region_idx
= region
.n
- 1;
523 region_idx
= offset
/ region
.stride
;
526 return region_trees
+ region_idx
* tree_size
;
529 void tcg_tb_insert(TranslationBlock
*tb
)
531 struct tcg_region_tree
*rt
= tc_ptr_to_region_tree(tb
->tc
.ptr
);
533 qemu_mutex_lock(&rt
->lock
);
534 g_tree_insert(rt
->tree
, &tb
->tc
, tb
);
535 qemu_mutex_unlock(&rt
->lock
);
538 void tcg_tb_remove(TranslationBlock
*tb
)
540 struct tcg_region_tree
*rt
= tc_ptr_to_region_tree(tb
->tc
.ptr
);
542 qemu_mutex_lock(&rt
->lock
);
543 g_tree_remove(rt
->tree
, &tb
->tc
);
544 qemu_mutex_unlock(&rt
->lock
);
548 * Find the TB 'tb' such that
549 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
550 * Return NULL if not found.
552 TranslationBlock
*tcg_tb_lookup(uintptr_t tc_ptr
)
554 struct tcg_region_tree
*rt
= tc_ptr_to_region_tree((void *)tc_ptr
);
555 TranslationBlock
*tb
;
556 struct tb_tc s
= { .ptr
= (void *)tc_ptr
};
558 qemu_mutex_lock(&rt
->lock
);
559 tb
= g_tree_lookup(rt
->tree
, &s
);
560 qemu_mutex_unlock(&rt
->lock
);
564 static void tcg_region_tree_lock_all(void)
568 for (i
= 0; i
< region
.n
; i
++) {
569 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
571 qemu_mutex_lock(&rt
->lock
);
575 static void tcg_region_tree_unlock_all(void)
579 for (i
= 0; i
< region
.n
; i
++) {
580 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
582 qemu_mutex_unlock(&rt
->lock
);
586 void tcg_tb_foreach(GTraverseFunc func
, gpointer user_data
)
590 tcg_region_tree_lock_all();
591 for (i
= 0; i
< region
.n
; i
++) {
592 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
594 g_tree_foreach(rt
->tree
, func
, user_data
);
596 tcg_region_tree_unlock_all();
599 size_t tcg_nb_tbs(void)
604 tcg_region_tree_lock_all();
605 for (i
= 0; i
< region
.n
; i
++) {
606 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
608 nb_tbs
+= g_tree_nnodes(rt
->tree
);
610 tcg_region_tree_unlock_all();
614 static gboolean
tcg_region_tree_traverse(gpointer k
, gpointer v
, gpointer data
)
616 TranslationBlock
*tb
= v
;
622 static void tcg_region_tree_reset_all(void)
626 tcg_region_tree_lock_all();
627 for (i
= 0; i
< region
.n
; i
++) {
628 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
630 g_tree_foreach(rt
->tree
, tcg_region_tree_traverse
, NULL
);
631 /* Increment the refcount first so that destroy acts as a reset */
632 g_tree_ref(rt
->tree
);
633 g_tree_destroy(rt
->tree
);
635 tcg_region_tree_unlock_all();
638 static void tcg_region_bounds(size_t curr_region
, void **pstart
, void **pend
)
642 start
= region
.start_aligned
+ curr_region
* region
.stride
;
643 end
= start
+ region
.size
;
645 if (curr_region
== 0) {
646 start
= region
.start
;
648 if (curr_region
== region
.n
- 1) {
656 static void tcg_region_assign(TCGContext
*s
, size_t curr_region
)
660 tcg_region_bounds(curr_region
, &start
, &end
);
662 s
->code_gen_buffer
= start
;
663 s
->code_gen_ptr
= start
;
664 s
->code_gen_buffer_size
= end
- start
;
665 s
->code_gen_highwater
= end
- TCG_HIGHWATER
;
668 static bool tcg_region_alloc__locked(TCGContext
*s
)
670 if (region
.current
== region
.n
) {
673 tcg_region_assign(s
, region
.current
);
679 * Request a new region once the one in use has filled up.
680 * Returns true on error.
682 static bool tcg_region_alloc(TCGContext
*s
)
685 /* read the region size now; alloc__locked will overwrite it on success */
686 size_t size_full
= s
->code_gen_buffer_size
;
688 qemu_mutex_lock(®ion
.lock
);
689 err
= tcg_region_alloc__locked(s
);
691 region
.agg_size_full
+= size_full
- TCG_HIGHWATER
;
693 qemu_mutex_unlock(®ion
.lock
);
698 * Perform a context's first region allocation.
699 * This function does _not_ increment region.agg_size_full.
701 static inline bool tcg_region_initial_alloc__locked(TCGContext
*s
)
703 return tcg_region_alloc__locked(s
);
706 /* Call from a safe-work context */
707 void tcg_region_reset_all(void)
709 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
712 qemu_mutex_lock(®ion
.lock
);
714 region
.agg_size_full
= 0;
716 for (i
= 0; i
< n_ctxs
; i
++) {
717 TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
718 bool err
= tcg_region_initial_alloc__locked(s
);
722 qemu_mutex_unlock(®ion
.lock
);
724 tcg_region_tree_reset_all();
727 #ifdef CONFIG_USER_ONLY
728 static size_t tcg_n_regions(void)
734 * It is likely that some vCPUs will translate more code than others, so we
735 * first try to set more regions than max_cpus, with those regions being of
736 * reasonable size. If that's not possible we make do by evenly dividing
737 * the code_gen_buffer among the vCPUs.
739 static size_t tcg_n_regions(void)
743 /* Use a single region if all we have is one vCPU thread */
744 #if !defined(CONFIG_USER_ONLY)
745 MachineState
*ms
= MACHINE(qdev_get_machine());
746 unsigned int max_cpus
= ms
->smp
.max_cpus
;
748 if (max_cpus
== 1 || !qemu_tcg_mttcg_enabled()) {
752 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
753 for (i
= 8; i
> 0; i
--) {
754 size_t regions_per_thread
= i
;
757 region_size
= tcg_init_ctx
.code_gen_buffer_size
;
758 region_size
/= max_cpus
* regions_per_thread
;
760 if (region_size
>= 2 * 1024u * 1024) {
761 return max_cpus
* regions_per_thread
;
764 /* If we can't, then just allocate one region per vCPU thread */
770 * Initializes region partitioning.
772 * Called at init time from the parent thread (i.e. the one calling
773 * tcg_context_init), after the target's TCG globals have been set.
775 * Region partitioning works by splitting code_gen_buffer into separate regions,
776 * and then assigning regions to TCG threads so that the threads can translate
777 * code in parallel without synchronization.
779 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
780 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
781 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
782 * must have been parsed before calling this function, since it calls
783 * qemu_tcg_mttcg_enabled().
785 * In user-mode we use a single region. Having multiple regions in user-mode
786 * is not supported, because the number of vCPU threads (recall that each thread
787 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
788 * OS, and usually this number is huge (tens of thousands is not uncommon).
789 * Thus, given this large bound on the number of vCPU threads and the fact
790 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
791 * that the availability of at least one region per vCPU thread.
793 * However, this user-mode limitation is unlikely to be a significant problem
794 * in practice. Multi-threaded guests share most if not all of their translated
795 * code, which makes parallel code generation less appealing than in softmmu.
797 void tcg_region_init(void)
799 void *buf
= tcg_init_ctx
.code_gen_buffer
;
801 size_t size
= tcg_init_ctx
.code_gen_buffer_size
;
802 size_t page_size
= qemu_real_host_page_size
;
806 uintptr_t splitwx_diff
;
808 n_regions
= tcg_n_regions();
810 /* The first region will be 'aligned - buf' bytes larger than the others */
811 aligned
= QEMU_ALIGN_PTR_UP(buf
, page_size
);
812 g_assert(aligned
< tcg_init_ctx
.code_gen_buffer
+ size
);
814 * Make region_size a multiple of page_size, using aligned as the start.
815 * As a result of this we might end up with a few extra pages at the end of
816 * the buffer; we will assign those to the last region.
818 region_size
= (size
- (aligned
- buf
)) / n_regions
;
819 region_size
= QEMU_ALIGN_DOWN(region_size
, page_size
);
821 /* A region must have at least 2 pages; one code, one guard */
822 g_assert(region_size
>= 2 * page_size
);
824 /* init the region struct */
825 qemu_mutex_init(®ion
.lock
);
826 region
.n
= n_regions
;
827 region
.size
= region_size
- page_size
;
828 region
.stride
= region_size
;
830 region
.start_aligned
= aligned
;
831 /* page-align the end, since its last page will be a guard page */
832 region
.end
= QEMU_ALIGN_PTR_DOWN(buf
+ size
, page_size
);
833 /* account for that last guard page */
834 region
.end
-= page_size
;
836 /* set guard pages */
837 splitwx_diff
= tcg_splitwx_diff
;
838 for (i
= 0; i
< region
.n
; i
++) {
842 tcg_region_bounds(i
, &start
, &end
);
843 rc
= qemu_mprotect_none(end
, page_size
);
846 rc
= qemu_mprotect_none(end
+ splitwx_diff
, page_size
);
851 tcg_region_trees_init();
853 /* In user-mode we support only one ctx, so do the initial allocation now */
854 #ifdef CONFIG_USER_ONLY
856 bool err
= tcg_region_initial_alloc__locked(tcg_ctx
);
863 #ifdef CONFIG_DEBUG_TCG
864 const void *tcg_splitwx_to_rx(void *rw
)
866 /* Pass NULL pointers unchanged. */
868 g_assert(in_code_gen_buffer(rw
));
869 rw
+= tcg_splitwx_diff
;
874 void *tcg_splitwx_to_rw(const void *rx
)
876 /* Pass NULL pointers unchanged. */
878 rx
-= tcg_splitwx_diff
;
879 /* Assert that we end with a pointer in the rw region. */
880 g_assert(in_code_gen_buffer(rx
));
884 #endif /* CONFIG_DEBUG_TCG */
886 static void alloc_tcg_plugin_context(TCGContext
*s
)
889 s
->plugin_tb
= g_new0(struct qemu_plugin_tb
, 1);
890 s
->plugin_tb
->insns
=
891 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn
);
896 * All TCG threads except the parent (i.e. the one that called tcg_context_init
897 * and registered the target's TCG globals) must register with this function
898 * before initiating translation.
900 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
901 * of tcg_region_init() for the reasoning behind this.
903 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
904 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
905 * is not used anymore for translation once this function is called.
907 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
908 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
910 #ifdef CONFIG_USER_ONLY
911 void tcg_register_thread(void)
913 tcg_ctx
= &tcg_init_ctx
;
916 void tcg_register_thread(void)
918 MachineState
*ms
= MACHINE(qdev_get_machine());
919 TCGContext
*s
= g_malloc(sizeof(*s
));
925 /* Relink mem_base. */
926 for (i
= 0, n
= tcg_init_ctx
.nb_globals
; i
< n
; ++i
) {
927 if (tcg_init_ctx
.temps
[i
].mem_base
) {
928 ptrdiff_t b
= tcg_init_ctx
.temps
[i
].mem_base
- tcg_init_ctx
.temps
;
929 tcg_debug_assert(b
>= 0 && b
< n
);
930 s
->temps
[i
].mem_base
= &s
->temps
[b
];
934 /* Claim an entry in tcg_ctxs */
935 n
= qatomic_fetch_inc(&n_tcg_ctxs
);
936 g_assert(n
< ms
->smp
.max_cpus
);
937 qatomic_set(&tcg_ctxs
[n
], s
);
940 alloc_tcg_plugin_context(s
);
944 qemu_mutex_lock(®ion
.lock
);
945 err
= tcg_region_initial_alloc__locked(tcg_ctx
);
947 qemu_mutex_unlock(®ion
.lock
);
949 #endif /* !CONFIG_USER_ONLY */
952 * Returns the size (in bytes) of all translated code (i.e. from all regions)
953 * currently in the cache.
954 * See also: tcg_code_capacity()
955 * Do not confuse with tcg_current_code_size(); that one applies to a single
958 size_t tcg_code_size(void)
960 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
964 qemu_mutex_lock(®ion
.lock
);
965 total
= region
.agg_size_full
;
966 for (i
= 0; i
< n_ctxs
; i
++) {
967 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
970 size
= qatomic_read(&s
->code_gen_ptr
) - s
->code_gen_buffer
;
971 g_assert(size
<= s
->code_gen_buffer_size
);
974 qemu_mutex_unlock(®ion
.lock
);
979 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
981 * See also: tcg_code_size()
983 size_t tcg_code_capacity(void)
985 size_t guard_size
, capacity
;
987 /* no need for synchronization; these variables are set at init time */
988 guard_size
= region
.stride
- region
.size
;
989 capacity
= region
.end
+ guard_size
- region
.start
;
990 capacity
-= region
.n
* (guard_size
+ TCG_HIGHWATER
);
994 size_t tcg_tb_phys_invalidate_count(void)
996 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
1000 for (i
= 0; i
< n_ctxs
; i
++) {
1001 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
1003 total
+= qatomic_read(&s
->tb_phys_invalidate_count
);
1008 /* pool based memory allocation */
1009 void *tcg_malloc_internal(TCGContext
*s
, int size
)
1014 if (size
> TCG_POOL_CHUNK_SIZE
) {
1015 /* big malloc: insert a new pool (XXX: could optimize) */
1016 p
= g_malloc(sizeof(TCGPool
) + size
);
1018 p
->next
= s
->pool_first_large
;
1019 s
->pool_first_large
= p
;
1022 p
= s
->pool_current
;
1030 pool_size
= TCG_POOL_CHUNK_SIZE
;
1031 p
= g_malloc(sizeof(TCGPool
) + pool_size
);
1032 p
->size
= pool_size
;
1034 if (s
->pool_current
)
1035 s
->pool_current
->next
= p
;
1043 s
->pool_current
= p
;
1044 s
->pool_cur
= p
->data
+ size
;
1045 s
->pool_end
= p
->data
+ p
->size
;
1049 void tcg_pool_reset(TCGContext
*s
)
1052 for (p
= s
->pool_first_large
; p
; p
= t
) {
1056 s
->pool_first_large
= NULL
;
1057 s
->pool_cur
= s
->pool_end
= NULL
;
1058 s
->pool_current
= NULL
;
1061 typedef struct TCGHelperInfo
{
1068 #include "exec/helper-proto.h"
1070 static const TCGHelperInfo all_helpers
[] = {
1071 #include "exec/helper-tcg.h"
1073 static GHashTable
*helper_table
;
1075 static int indirect_reg_alloc_order
[ARRAY_SIZE(tcg_target_reg_alloc_order
)];
1076 static void process_op_defs(TCGContext
*s
);
1077 static TCGTemp
*tcg_global_reg_new_internal(TCGContext
*s
, TCGType type
,
1078 TCGReg reg
, const char *name
);
1080 void tcg_context_init(TCGContext
*s
)
1082 int op
, total_args
, n
, i
;
1084 TCGArgConstraint
*args_ct
;
1087 memset(s
, 0, sizeof(*s
));
1090 /* Count total number of arguments and allocate the corresponding
1093 for(op
= 0; op
< NB_OPS
; op
++) {
1094 def
= &tcg_op_defs
[op
];
1095 n
= def
->nb_iargs
+ def
->nb_oargs
;
1099 args_ct
= g_new0(TCGArgConstraint
, total_args
);
1101 for(op
= 0; op
< NB_OPS
; op
++) {
1102 def
= &tcg_op_defs
[op
];
1103 def
->args_ct
= args_ct
;
1104 n
= def
->nb_iargs
+ def
->nb_oargs
;
1108 /* Register helpers. */
1109 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
1110 helper_table
= g_hash_table_new(NULL
, NULL
);
1112 for (i
= 0; i
< ARRAY_SIZE(all_helpers
); ++i
) {
1113 g_hash_table_insert(helper_table
, (gpointer
)all_helpers
[i
].func
,
1114 (gpointer
)&all_helpers
[i
]);
1120 /* Reverse the order of the saved registers, assuming they're all at
1121 the start of tcg_target_reg_alloc_order. */
1122 for (n
= 0; n
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++n
) {
1123 int r
= tcg_target_reg_alloc_order
[n
];
1124 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, r
)) {
1128 for (i
= 0; i
< n
; ++i
) {
1129 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[n
- 1 - i
];
1131 for (; i
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++i
) {
1132 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[i
];
1135 alloc_tcg_plugin_context(s
);
1139 * In user-mode we simply share the init context among threads, since we
1140 * use a single region. See the documentation tcg_region_init() for the
1141 * reasoning behind this.
1142 * In softmmu we will have at most max_cpus TCG threads.
1144 #ifdef CONFIG_USER_ONLY
1145 tcg_ctxs
= &tcg_ctx
;
1148 MachineState
*ms
= MACHINE(qdev_get_machine());
1149 unsigned int max_cpus
= ms
->smp
.max_cpus
;
1150 tcg_ctxs
= g_new(TCGContext
*, max_cpus
);
1153 tcg_debug_assert(!tcg_regset_test_reg(s
->reserved_regs
, TCG_AREG0
));
1154 ts
= tcg_global_reg_new_internal(s
, TCG_TYPE_PTR
, TCG_AREG0
, "env");
1155 cpu_env
= temp_tcgv_ptr(ts
);
1159 * Allocate TBs right before their corresponding translated code, making
1160 * sure that TBs and code are on different cache lines.
1162 TranslationBlock
*tcg_tb_alloc(TCGContext
*s
)
1164 uintptr_t align
= qemu_icache_linesize
;
1165 TranslationBlock
*tb
;
1169 tb
= (void *)ROUND_UP((uintptr_t)s
->code_gen_ptr
, align
);
1170 next
= (void *)ROUND_UP((uintptr_t)(tb
+ 1), align
);
1172 if (unlikely(next
> s
->code_gen_highwater
)) {
1173 if (tcg_region_alloc(s
)) {
1178 qatomic_set(&s
->code_gen_ptr
, next
);
1179 s
->data_gen_ptr
= NULL
;
1183 void tcg_prologue_init(TCGContext
*s
)
1185 size_t prologue_size
, total_size
;
1188 /* Put the prologue at the beginning of code_gen_buffer. */
1189 buf0
= s
->code_gen_buffer
;
1190 total_size
= s
->code_gen_buffer_size
;
1193 s
->data_gen_ptr
= NULL
;
1196 * The region trees are not yet configured, but tcg_splitwx_to_rx
1197 * needs the bounds for an assert.
1199 region
.start
= buf0
;
1200 region
.end
= buf0
+ total_size
;
1202 #ifndef CONFIG_TCG_INTERPRETER
1203 tcg_qemu_tb_exec
= (tcg_prologue_fn
*)tcg_splitwx_to_rx(buf0
);
1206 /* Compute a high-water mark, at which we voluntarily flush the buffer
1207 and start over. The size here is arbitrary, significantly larger
1208 than we expect the code generation for any one opcode to require. */
1209 s
->code_gen_highwater
= s
->code_gen_buffer
+ (total_size
- TCG_HIGHWATER
);
1211 #ifdef TCG_TARGET_NEED_POOL_LABELS
1212 s
->pool_labels
= NULL
;
1215 qemu_thread_jit_write();
1216 /* Generate the prologue. */
1217 tcg_target_qemu_prologue(s
);
1219 #ifdef TCG_TARGET_NEED_POOL_LABELS
1220 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1222 int result
= tcg_out_pool_finalize(s
);
1223 tcg_debug_assert(result
== 0);
1228 #ifndef CONFIG_TCG_INTERPRETER
1229 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0
), (uintptr_t)buf0
,
1230 tcg_ptr_byte_diff(buf1
, buf0
));
1233 /* Deduct the prologue from the buffer. */
1234 prologue_size
= tcg_current_code_size(s
);
1235 s
->code_gen_ptr
= buf1
;
1236 s
->code_gen_buffer
= buf1
;
1238 total_size
-= prologue_size
;
1239 s
->code_gen_buffer_size
= total_size
;
1241 tcg_register_jit(tcg_splitwx_to_rx(s
->code_gen_buffer
), total_size
);
1244 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM
)) {
1245 FILE *logfile
= qemu_log_lock();
1246 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size
);
1247 if (s
->data_gen_ptr
) {
1248 size_t code_size
= s
->data_gen_ptr
- buf0
;
1249 size_t data_size
= prologue_size
- code_size
;
1252 log_disas(buf0
, code_size
);
1254 for (i
= 0; i
< data_size
; i
+= sizeof(tcg_target_ulong
)) {
1255 if (sizeof(tcg_target_ulong
) == 8) {
1256 qemu_log("0x%08" PRIxPTR
": .quad 0x%016" PRIx64
"\n",
1257 (uintptr_t)s
->data_gen_ptr
+ i
,
1258 *(uint64_t *)(s
->data_gen_ptr
+ i
));
1260 qemu_log("0x%08" PRIxPTR
": .long 0x%08x\n",
1261 (uintptr_t)s
->data_gen_ptr
+ i
,
1262 *(uint32_t *)(s
->data_gen_ptr
+ i
));
1266 log_disas(buf0
, prologue_size
);
1270 qemu_log_unlock(logfile
);
1274 /* Assert that goto_ptr is implemented completely. */
1275 if (TCG_TARGET_HAS_goto_ptr
) {
1276 tcg_debug_assert(tcg_code_gen_epilogue
!= NULL
);
1280 void tcg_func_start(TCGContext
*s
)
1283 s
->nb_temps
= s
->nb_globals
;
1285 /* No temps have been previously allocated for size or locality. */
1286 memset(s
->free_temps
, 0, sizeof(s
->free_temps
));
1288 /* No constant temps have been previously allocated. */
1289 for (int i
= 0; i
< TCG_TYPE_COUNT
; ++i
) {
1290 if (s
->const_table
[i
]) {
1291 g_hash_table_remove_all(s
->const_table
[i
]);
1297 s
->current_frame_offset
= s
->frame_start
;
1299 #ifdef CONFIG_DEBUG_TCG
1300 s
->goto_tb_issue_mask
= 0;
1303 QTAILQ_INIT(&s
->ops
);
1304 QTAILQ_INIT(&s
->free_ops
);
1305 QSIMPLEQ_INIT(&s
->labels
);
1308 static TCGTemp
*tcg_temp_alloc(TCGContext
*s
)
1310 int n
= s
->nb_temps
++;
1312 if (n
>= TCG_MAX_TEMPS
) {
1313 /* Signal overflow, starting over with fewer guest insns. */
1314 siglongjmp(s
->jmp_trans
, -2);
1316 return memset(&s
->temps
[n
], 0, sizeof(TCGTemp
));
1319 static TCGTemp
*tcg_global_alloc(TCGContext
*s
)
1323 tcg_debug_assert(s
->nb_globals
== s
->nb_temps
);
1324 tcg_debug_assert(s
->nb_globals
< TCG_MAX_TEMPS
);
1326 ts
= tcg_temp_alloc(s
);
1327 ts
->kind
= TEMP_GLOBAL
;
1332 static TCGTemp
*tcg_global_reg_new_internal(TCGContext
*s
, TCGType type
,
1333 TCGReg reg
, const char *name
)
1337 if (TCG_TARGET_REG_BITS
== 32 && type
!= TCG_TYPE_I32
) {
1341 ts
= tcg_global_alloc(s
);
1342 ts
->base_type
= type
;
1344 ts
->kind
= TEMP_FIXED
;
1347 tcg_regset_set_reg(s
->reserved_regs
, reg
);
1352 void tcg_set_frame(TCGContext
*s
, TCGReg reg
, intptr_t start
, intptr_t size
)
1354 s
->frame_start
= start
;
1355 s
->frame_end
= start
+ size
;
1357 = tcg_global_reg_new_internal(s
, TCG_TYPE_PTR
, reg
, "_frame");
1360 TCGTemp
*tcg_global_mem_new_internal(TCGType type
, TCGv_ptr base
,
1361 intptr_t offset
, const char *name
)
1363 TCGContext
*s
= tcg_ctx
;
1364 TCGTemp
*base_ts
= tcgv_ptr_temp(base
);
1365 TCGTemp
*ts
= tcg_global_alloc(s
);
1366 int indirect_reg
= 0, bigendian
= 0;
1367 #ifdef HOST_WORDS_BIGENDIAN
1371 switch (base_ts
->kind
) {
1375 /* We do not support double-indirect registers. */
1376 tcg_debug_assert(!base_ts
->indirect_reg
);
1377 base_ts
->indirect_base
= 1;
1378 s
->nb_indirects
+= (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
1383 g_assert_not_reached();
1386 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1387 TCGTemp
*ts2
= tcg_global_alloc(s
);
1390 ts
->base_type
= TCG_TYPE_I64
;
1391 ts
->type
= TCG_TYPE_I32
;
1392 ts
->indirect_reg
= indirect_reg
;
1393 ts
->mem_allocated
= 1;
1394 ts
->mem_base
= base_ts
;
1395 ts
->mem_offset
= offset
+ bigendian
* 4;
1396 pstrcpy(buf
, sizeof(buf
), name
);
1397 pstrcat(buf
, sizeof(buf
), "_0");
1398 ts
->name
= strdup(buf
);
1400 tcg_debug_assert(ts2
== ts
+ 1);
1401 ts2
->base_type
= TCG_TYPE_I64
;
1402 ts2
->type
= TCG_TYPE_I32
;
1403 ts2
->indirect_reg
= indirect_reg
;
1404 ts2
->mem_allocated
= 1;
1405 ts2
->mem_base
= base_ts
;
1406 ts2
->mem_offset
= offset
+ (1 - bigendian
) * 4;
1407 pstrcpy(buf
, sizeof(buf
), name
);
1408 pstrcat(buf
, sizeof(buf
), "_1");
1409 ts2
->name
= strdup(buf
);
1411 ts
->base_type
= type
;
1413 ts
->indirect_reg
= indirect_reg
;
1414 ts
->mem_allocated
= 1;
1415 ts
->mem_base
= base_ts
;
1416 ts
->mem_offset
= offset
;
1422 TCGTemp
*tcg_temp_new_internal(TCGType type
, bool temp_local
)
1424 TCGContext
*s
= tcg_ctx
;
1425 TCGTempKind kind
= temp_local
? TEMP_LOCAL
: TEMP_NORMAL
;
1429 k
= type
+ (temp_local
? TCG_TYPE_COUNT
: 0);
1430 idx
= find_first_bit(s
->free_temps
[k
].l
, TCG_MAX_TEMPS
);
1431 if (idx
< TCG_MAX_TEMPS
) {
1432 /* There is already an available temp with the right type. */
1433 clear_bit(idx
, s
->free_temps
[k
].l
);
1435 ts
= &s
->temps
[idx
];
1436 ts
->temp_allocated
= 1;
1437 tcg_debug_assert(ts
->base_type
== type
);
1438 tcg_debug_assert(ts
->kind
== kind
);
1440 ts
= tcg_temp_alloc(s
);
1441 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1442 TCGTemp
*ts2
= tcg_temp_alloc(s
);
1444 ts
->base_type
= type
;
1445 ts
->type
= TCG_TYPE_I32
;
1446 ts
->temp_allocated
= 1;
1449 tcg_debug_assert(ts2
== ts
+ 1);
1450 ts2
->base_type
= TCG_TYPE_I64
;
1451 ts2
->type
= TCG_TYPE_I32
;
1452 ts2
->temp_allocated
= 1;
1455 ts
->base_type
= type
;
1457 ts
->temp_allocated
= 1;
1462 #if defined(CONFIG_DEBUG_TCG)
1468 TCGv_vec
tcg_temp_new_vec(TCGType type
)
1472 #ifdef CONFIG_DEBUG_TCG
1475 assert(TCG_TARGET_HAS_v64
);
1478 assert(TCG_TARGET_HAS_v128
);
1481 assert(TCG_TARGET_HAS_v256
);
1484 g_assert_not_reached();
1488 t
= tcg_temp_new_internal(type
, 0);
1489 return temp_tcgv_vec(t
);
1492 /* Create a new temp of the same type as an existing temp. */
1493 TCGv_vec
tcg_temp_new_vec_matching(TCGv_vec match
)
1495 TCGTemp
*t
= tcgv_vec_temp(match
);
1497 tcg_debug_assert(t
->temp_allocated
!= 0);
1499 t
= tcg_temp_new_internal(t
->base_type
, 0);
1500 return temp_tcgv_vec(t
);
1503 void tcg_temp_free_internal(TCGTemp
*ts
)
1505 TCGContext
*s
= tcg_ctx
;
1508 /* In order to simplify users of tcg_constant_*, silently ignore free. */
1509 if (ts
->kind
== TEMP_CONST
) {
1513 #if defined(CONFIG_DEBUG_TCG)
1515 if (s
->temps_in_use
< 0) {
1516 fprintf(stderr
, "More temporaries freed than allocated!\n");
1520 tcg_debug_assert(ts
->kind
< TEMP_GLOBAL
);
1521 tcg_debug_assert(ts
->temp_allocated
!= 0);
1522 ts
->temp_allocated
= 0;
1525 k
= ts
->base_type
+ (ts
->kind
== TEMP_NORMAL
? 0 : TCG_TYPE_COUNT
);
1526 set_bit(idx
, s
->free_temps
[k
].l
);
1529 TCGTemp
*tcg_constant_internal(TCGType type
, int64_t val
)
1531 TCGContext
*s
= tcg_ctx
;
1532 GHashTable
*h
= s
->const_table
[type
];
1536 h
= g_hash_table_new(g_int64_hash
, g_int64_equal
);
1537 s
->const_table
[type
] = h
;
1540 ts
= g_hash_table_lookup(h
, &val
);
1542 ts
= tcg_temp_alloc(s
);
1544 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1545 TCGTemp
*ts2
= tcg_temp_alloc(s
);
1547 ts
->base_type
= TCG_TYPE_I64
;
1548 ts
->type
= TCG_TYPE_I32
;
1549 ts
->kind
= TEMP_CONST
;
1550 ts
->temp_allocated
= 1;
1552 * Retain the full value of the 64-bit constant in the low
1553 * part, so that the hash table works. Actual uses will
1554 * truncate the value to the low part.
1558 tcg_debug_assert(ts2
== ts
+ 1);
1559 ts2
->base_type
= TCG_TYPE_I64
;
1560 ts2
->type
= TCG_TYPE_I32
;
1561 ts2
->kind
= TEMP_CONST
;
1562 ts2
->temp_allocated
= 1;
1563 ts2
->val
= val
>> 32;
1565 ts
->base_type
= type
;
1567 ts
->kind
= TEMP_CONST
;
1568 ts
->temp_allocated
= 1;
1571 g_hash_table_insert(h
, &ts
->val
, ts
);
1577 TCGv_vec
tcg_constant_vec(TCGType type
, unsigned vece
, int64_t val
)
1579 val
= dup_const(vece
, val
);
1580 return temp_tcgv_vec(tcg_constant_internal(type
, val
));
1583 TCGv_vec
tcg_constant_vec_matching(TCGv_vec match
, unsigned vece
, int64_t val
)
1585 TCGTemp
*t
= tcgv_vec_temp(match
);
1587 tcg_debug_assert(t
->temp_allocated
!= 0);
1588 return tcg_constant_vec(t
->base_type
, vece
, val
);
1591 TCGv_i32
tcg_const_i32(int32_t val
)
1594 t0
= tcg_temp_new_i32();
1595 tcg_gen_movi_i32(t0
, val
);
1599 TCGv_i64
tcg_const_i64(int64_t val
)
1602 t0
= tcg_temp_new_i64();
1603 tcg_gen_movi_i64(t0
, val
);
1607 TCGv_i32
tcg_const_local_i32(int32_t val
)
1610 t0
= tcg_temp_local_new_i32();
1611 tcg_gen_movi_i32(t0
, val
);
1615 TCGv_i64
tcg_const_local_i64(int64_t val
)
1618 t0
= tcg_temp_local_new_i64();
1619 tcg_gen_movi_i64(t0
, val
);
1623 #if defined(CONFIG_DEBUG_TCG)
1624 void tcg_clear_temp_count(void)
1626 TCGContext
*s
= tcg_ctx
;
1627 s
->temps_in_use
= 0;
1630 int tcg_check_temp_count(void)
1632 TCGContext
*s
= tcg_ctx
;
1633 if (s
->temps_in_use
) {
1634 /* Clear the count so that we don't give another
1635 * warning immediately next time around.
1637 s
->temps_in_use
= 0;
1644 /* Return true if OP may appear in the opcode stream.
1645 Test the runtime variable that controls each opcode. */
1646 bool tcg_op_supported(TCGOpcode op
)
1649 = TCG_TARGET_HAS_v64
| TCG_TARGET_HAS_v128
| TCG_TARGET_HAS_v256
;
1652 case INDEX_op_discard
:
1653 case INDEX_op_set_label
:
1657 case INDEX_op_insn_start
:
1658 case INDEX_op_exit_tb
:
1659 case INDEX_op_goto_tb
:
1660 case INDEX_op_qemu_ld_i32
:
1661 case INDEX_op_qemu_st_i32
:
1662 case INDEX_op_qemu_ld_i64
:
1663 case INDEX_op_qemu_st_i64
:
1666 case INDEX_op_qemu_st8_i32
:
1667 return TCG_TARGET_HAS_qemu_st8_i32
;
1669 case INDEX_op_goto_ptr
:
1670 return TCG_TARGET_HAS_goto_ptr
;
1672 case INDEX_op_mov_i32
:
1673 case INDEX_op_setcond_i32
:
1674 case INDEX_op_brcond_i32
:
1675 case INDEX_op_ld8u_i32
:
1676 case INDEX_op_ld8s_i32
:
1677 case INDEX_op_ld16u_i32
:
1678 case INDEX_op_ld16s_i32
:
1679 case INDEX_op_ld_i32
:
1680 case INDEX_op_st8_i32
:
1681 case INDEX_op_st16_i32
:
1682 case INDEX_op_st_i32
:
1683 case INDEX_op_add_i32
:
1684 case INDEX_op_sub_i32
:
1685 case INDEX_op_mul_i32
:
1686 case INDEX_op_and_i32
:
1687 case INDEX_op_or_i32
:
1688 case INDEX_op_xor_i32
:
1689 case INDEX_op_shl_i32
:
1690 case INDEX_op_shr_i32
:
1691 case INDEX_op_sar_i32
:
1694 case INDEX_op_movcond_i32
:
1695 return TCG_TARGET_HAS_movcond_i32
;
1696 case INDEX_op_div_i32
:
1697 case INDEX_op_divu_i32
:
1698 return TCG_TARGET_HAS_div_i32
;
1699 case INDEX_op_rem_i32
:
1700 case INDEX_op_remu_i32
:
1701 return TCG_TARGET_HAS_rem_i32
;
1702 case INDEX_op_div2_i32
:
1703 case INDEX_op_divu2_i32
:
1704 return TCG_TARGET_HAS_div2_i32
;
1705 case INDEX_op_rotl_i32
:
1706 case INDEX_op_rotr_i32
:
1707 return TCG_TARGET_HAS_rot_i32
;
1708 case INDEX_op_deposit_i32
:
1709 return TCG_TARGET_HAS_deposit_i32
;
1710 case INDEX_op_extract_i32
:
1711 return TCG_TARGET_HAS_extract_i32
;
1712 case INDEX_op_sextract_i32
:
1713 return TCG_TARGET_HAS_sextract_i32
;
1714 case INDEX_op_extract2_i32
:
1715 return TCG_TARGET_HAS_extract2_i32
;
1716 case INDEX_op_add2_i32
:
1717 return TCG_TARGET_HAS_add2_i32
;
1718 case INDEX_op_sub2_i32
:
1719 return TCG_TARGET_HAS_sub2_i32
;
1720 case INDEX_op_mulu2_i32
:
1721 return TCG_TARGET_HAS_mulu2_i32
;
1722 case INDEX_op_muls2_i32
:
1723 return TCG_TARGET_HAS_muls2_i32
;
1724 case INDEX_op_muluh_i32
:
1725 return TCG_TARGET_HAS_muluh_i32
;
1726 case INDEX_op_mulsh_i32
:
1727 return TCG_TARGET_HAS_mulsh_i32
;
1728 case INDEX_op_ext8s_i32
:
1729 return TCG_TARGET_HAS_ext8s_i32
;
1730 case INDEX_op_ext16s_i32
:
1731 return TCG_TARGET_HAS_ext16s_i32
;
1732 case INDEX_op_ext8u_i32
:
1733 return TCG_TARGET_HAS_ext8u_i32
;
1734 case INDEX_op_ext16u_i32
:
1735 return TCG_TARGET_HAS_ext16u_i32
;
1736 case INDEX_op_bswap16_i32
:
1737 return TCG_TARGET_HAS_bswap16_i32
;
1738 case INDEX_op_bswap32_i32
:
1739 return TCG_TARGET_HAS_bswap32_i32
;
1740 case INDEX_op_not_i32
:
1741 return TCG_TARGET_HAS_not_i32
;
1742 case INDEX_op_neg_i32
:
1743 return TCG_TARGET_HAS_neg_i32
;
1744 case INDEX_op_andc_i32
:
1745 return TCG_TARGET_HAS_andc_i32
;
1746 case INDEX_op_orc_i32
:
1747 return TCG_TARGET_HAS_orc_i32
;
1748 case INDEX_op_eqv_i32
:
1749 return TCG_TARGET_HAS_eqv_i32
;
1750 case INDEX_op_nand_i32
:
1751 return TCG_TARGET_HAS_nand_i32
;
1752 case INDEX_op_nor_i32
:
1753 return TCG_TARGET_HAS_nor_i32
;
1754 case INDEX_op_clz_i32
:
1755 return TCG_TARGET_HAS_clz_i32
;
1756 case INDEX_op_ctz_i32
:
1757 return TCG_TARGET_HAS_ctz_i32
;
1758 case INDEX_op_ctpop_i32
:
1759 return TCG_TARGET_HAS_ctpop_i32
;
1761 case INDEX_op_brcond2_i32
:
1762 case INDEX_op_setcond2_i32
:
1763 return TCG_TARGET_REG_BITS
== 32;
1765 case INDEX_op_mov_i64
:
1766 case INDEX_op_setcond_i64
:
1767 case INDEX_op_brcond_i64
:
1768 case INDEX_op_ld8u_i64
:
1769 case INDEX_op_ld8s_i64
:
1770 case INDEX_op_ld16u_i64
:
1771 case INDEX_op_ld16s_i64
:
1772 case INDEX_op_ld32u_i64
:
1773 case INDEX_op_ld32s_i64
:
1774 case INDEX_op_ld_i64
:
1775 case INDEX_op_st8_i64
:
1776 case INDEX_op_st16_i64
:
1777 case INDEX_op_st32_i64
:
1778 case INDEX_op_st_i64
:
1779 case INDEX_op_add_i64
:
1780 case INDEX_op_sub_i64
:
1781 case INDEX_op_mul_i64
:
1782 case INDEX_op_and_i64
:
1783 case INDEX_op_or_i64
:
1784 case INDEX_op_xor_i64
:
1785 case INDEX_op_shl_i64
:
1786 case INDEX_op_shr_i64
:
1787 case INDEX_op_sar_i64
:
1788 case INDEX_op_ext_i32_i64
:
1789 case INDEX_op_extu_i32_i64
:
1790 return TCG_TARGET_REG_BITS
== 64;
1792 case INDEX_op_movcond_i64
:
1793 return TCG_TARGET_HAS_movcond_i64
;
1794 case INDEX_op_div_i64
:
1795 case INDEX_op_divu_i64
:
1796 return TCG_TARGET_HAS_div_i64
;
1797 case INDEX_op_rem_i64
:
1798 case INDEX_op_remu_i64
:
1799 return TCG_TARGET_HAS_rem_i64
;
1800 case INDEX_op_div2_i64
:
1801 case INDEX_op_divu2_i64
:
1802 return TCG_TARGET_HAS_div2_i64
;
1803 case INDEX_op_rotl_i64
:
1804 case INDEX_op_rotr_i64
:
1805 return TCG_TARGET_HAS_rot_i64
;
1806 case INDEX_op_deposit_i64
:
1807 return TCG_TARGET_HAS_deposit_i64
;
1808 case INDEX_op_extract_i64
:
1809 return TCG_TARGET_HAS_extract_i64
;
1810 case INDEX_op_sextract_i64
:
1811 return TCG_TARGET_HAS_sextract_i64
;
1812 case INDEX_op_extract2_i64
:
1813 return TCG_TARGET_HAS_extract2_i64
;
1814 case INDEX_op_extrl_i64_i32
:
1815 return TCG_TARGET_HAS_extrl_i64_i32
;
1816 case INDEX_op_extrh_i64_i32
:
1817 return TCG_TARGET_HAS_extrh_i64_i32
;
1818 case INDEX_op_ext8s_i64
:
1819 return TCG_TARGET_HAS_ext8s_i64
;
1820 case INDEX_op_ext16s_i64
:
1821 return TCG_TARGET_HAS_ext16s_i64
;
1822 case INDEX_op_ext32s_i64
:
1823 return TCG_TARGET_HAS_ext32s_i64
;
1824 case INDEX_op_ext8u_i64
:
1825 return TCG_TARGET_HAS_ext8u_i64
;
1826 case INDEX_op_ext16u_i64
:
1827 return TCG_TARGET_HAS_ext16u_i64
;
1828 case INDEX_op_ext32u_i64
:
1829 return TCG_TARGET_HAS_ext32u_i64
;
1830 case INDEX_op_bswap16_i64
:
1831 return TCG_TARGET_HAS_bswap16_i64
;
1832 case INDEX_op_bswap32_i64
:
1833 return TCG_TARGET_HAS_bswap32_i64
;
1834 case INDEX_op_bswap64_i64
:
1835 return TCG_TARGET_HAS_bswap64_i64
;
1836 case INDEX_op_not_i64
:
1837 return TCG_TARGET_HAS_not_i64
;
1838 case INDEX_op_neg_i64
:
1839 return TCG_TARGET_HAS_neg_i64
;
1840 case INDEX_op_andc_i64
:
1841 return TCG_TARGET_HAS_andc_i64
;
1842 case INDEX_op_orc_i64
:
1843 return TCG_TARGET_HAS_orc_i64
;
1844 case INDEX_op_eqv_i64
:
1845 return TCG_TARGET_HAS_eqv_i64
;
1846 case INDEX_op_nand_i64
:
1847 return TCG_TARGET_HAS_nand_i64
;
1848 case INDEX_op_nor_i64
:
1849 return TCG_TARGET_HAS_nor_i64
;
1850 case INDEX_op_clz_i64
:
1851 return TCG_TARGET_HAS_clz_i64
;
1852 case INDEX_op_ctz_i64
:
1853 return TCG_TARGET_HAS_ctz_i64
;
1854 case INDEX_op_ctpop_i64
:
1855 return TCG_TARGET_HAS_ctpop_i64
;
1856 case INDEX_op_add2_i64
:
1857 return TCG_TARGET_HAS_add2_i64
;
1858 case INDEX_op_sub2_i64
:
1859 return TCG_TARGET_HAS_sub2_i64
;
1860 case INDEX_op_mulu2_i64
:
1861 return TCG_TARGET_HAS_mulu2_i64
;
1862 case INDEX_op_muls2_i64
:
1863 return TCG_TARGET_HAS_muls2_i64
;
1864 case INDEX_op_muluh_i64
:
1865 return TCG_TARGET_HAS_muluh_i64
;
1866 case INDEX_op_mulsh_i64
:
1867 return TCG_TARGET_HAS_mulsh_i64
;
1869 case INDEX_op_mov_vec
:
1870 case INDEX_op_dup_vec
:
1871 case INDEX_op_dupm_vec
:
1872 case INDEX_op_ld_vec
:
1873 case INDEX_op_st_vec
:
1874 case INDEX_op_add_vec
:
1875 case INDEX_op_sub_vec
:
1876 case INDEX_op_and_vec
:
1877 case INDEX_op_or_vec
:
1878 case INDEX_op_xor_vec
:
1879 case INDEX_op_cmp_vec
:
1881 case INDEX_op_dup2_vec
:
1882 return have_vec
&& TCG_TARGET_REG_BITS
== 32;
1883 case INDEX_op_not_vec
:
1884 return have_vec
&& TCG_TARGET_HAS_not_vec
;
1885 case INDEX_op_neg_vec
:
1886 return have_vec
&& TCG_TARGET_HAS_neg_vec
;
1887 case INDEX_op_abs_vec
:
1888 return have_vec
&& TCG_TARGET_HAS_abs_vec
;
1889 case INDEX_op_andc_vec
:
1890 return have_vec
&& TCG_TARGET_HAS_andc_vec
;
1891 case INDEX_op_orc_vec
:
1892 return have_vec
&& TCG_TARGET_HAS_orc_vec
;
1893 case INDEX_op_mul_vec
:
1894 return have_vec
&& TCG_TARGET_HAS_mul_vec
;
1895 case INDEX_op_shli_vec
:
1896 case INDEX_op_shri_vec
:
1897 case INDEX_op_sari_vec
:
1898 return have_vec
&& TCG_TARGET_HAS_shi_vec
;
1899 case INDEX_op_shls_vec
:
1900 case INDEX_op_shrs_vec
:
1901 case INDEX_op_sars_vec
:
1902 return have_vec
&& TCG_TARGET_HAS_shs_vec
;
1903 case INDEX_op_shlv_vec
:
1904 case INDEX_op_shrv_vec
:
1905 case INDEX_op_sarv_vec
:
1906 return have_vec
&& TCG_TARGET_HAS_shv_vec
;
1907 case INDEX_op_rotli_vec
:
1908 return have_vec
&& TCG_TARGET_HAS_roti_vec
;
1909 case INDEX_op_rotls_vec
:
1910 return have_vec
&& TCG_TARGET_HAS_rots_vec
;
1911 case INDEX_op_rotlv_vec
:
1912 case INDEX_op_rotrv_vec
:
1913 return have_vec
&& TCG_TARGET_HAS_rotv_vec
;
1914 case INDEX_op_ssadd_vec
:
1915 case INDEX_op_usadd_vec
:
1916 case INDEX_op_sssub_vec
:
1917 case INDEX_op_ussub_vec
:
1918 return have_vec
&& TCG_TARGET_HAS_sat_vec
;
1919 case INDEX_op_smin_vec
:
1920 case INDEX_op_umin_vec
:
1921 case INDEX_op_smax_vec
:
1922 case INDEX_op_umax_vec
:
1923 return have_vec
&& TCG_TARGET_HAS_minmax_vec
;
1924 case INDEX_op_bitsel_vec
:
1925 return have_vec
&& TCG_TARGET_HAS_bitsel_vec
;
1926 case INDEX_op_cmpsel_vec
:
1927 return have_vec
&& TCG_TARGET_HAS_cmpsel_vec
;
1930 tcg_debug_assert(op
> INDEX_op_last_generic
&& op
< NB_OPS
);
1935 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1936 and endian swap. Maybe it would be better to do the alignment
1937 and endian swap in tcg_reg_alloc_call(). */
1938 void tcg_gen_callN(void *func
, TCGTemp
*ret
, int nargs
, TCGTemp
**args
)
1940 int i
, real_args
, nb_rets
, pi
;
1941 unsigned sizemask
, flags
;
1942 TCGHelperInfo
*info
;
1945 info
= g_hash_table_lookup(helper_table
, (gpointer
)func
);
1946 flags
= info
->flags
;
1947 sizemask
= info
->sizemask
;
1949 #ifdef CONFIG_PLUGIN
1950 /* detect non-plugin helpers */
1951 if (tcg_ctx
->plugin_insn
&& unlikely(strncmp(info
->name
, "plugin_", 7))) {
1952 tcg_ctx
->plugin_insn
->calls_helpers
= true;
1956 #if defined(__sparc__) && !defined(__arch64__) \
1957 && !defined(CONFIG_TCG_INTERPRETER)
1958 /* We have 64-bit values in one register, but need to pass as two
1959 separate parameters. Split them. */
1960 int orig_sizemask
= sizemask
;
1961 int orig_nargs
= nargs
;
1962 TCGv_i64 retl
, reth
;
1963 TCGTemp
*split_args
[MAX_OPC_PARAM
];
1967 if (sizemask
!= 0) {
1968 for (i
= real_args
= 0; i
< nargs
; ++i
) {
1969 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1971 TCGv_i64 orig
= temp_tcgv_i64(args
[i
]);
1972 TCGv_i32 h
= tcg_temp_new_i32();
1973 TCGv_i32 l
= tcg_temp_new_i32();
1974 tcg_gen_extr_i64_i32(l
, h
, orig
);
1975 split_args
[real_args
++] = tcgv_i32_temp(h
);
1976 split_args
[real_args
++] = tcgv_i32_temp(l
);
1978 split_args
[real_args
++] = args
[i
];
1985 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1986 for (i
= 0; i
< nargs
; ++i
) {
1987 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1988 int is_signed
= sizemask
& (2 << (i
+1)*2);
1990 TCGv_i64 temp
= tcg_temp_new_i64();
1991 TCGv_i64 orig
= temp_tcgv_i64(args
[i
]);
1993 tcg_gen_ext32s_i64(temp
, orig
);
1995 tcg_gen_ext32u_i64(temp
, orig
);
1997 args
[i
] = tcgv_i64_temp(temp
);
2000 #endif /* TCG_TARGET_EXTEND_ARGS */
2002 op
= tcg_emit_op(INDEX_op_call
);
2006 #if defined(__sparc__) && !defined(__arch64__) \
2007 && !defined(CONFIG_TCG_INTERPRETER)
2008 if (orig_sizemask
& 1) {
2009 /* The 32-bit ABI is going to return the 64-bit value in
2010 the %o0/%o1 register pair. Prepare for this by using
2011 two return temporaries, and reassemble below. */
2012 retl
= tcg_temp_new_i64();
2013 reth
= tcg_temp_new_i64();
2014 op
->args
[pi
++] = tcgv_i64_arg(reth
);
2015 op
->args
[pi
++] = tcgv_i64_arg(retl
);
2018 op
->args
[pi
++] = temp_arg(ret
);
2022 if (TCG_TARGET_REG_BITS
< 64 && (sizemask
& 1)) {
2023 #ifdef HOST_WORDS_BIGENDIAN
2024 op
->args
[pi
++] = temp_arg(ret
+ 1);
2025 op
->args
[pi
++] = temp_arg(ret
);
2027 op
->args
[pi
++] = temp_arg(ret
);
2028 op
->args
[pi
++] = temp_arg(ret
+ 1);
2032 op
->args
[pi
++] = temp_arg(ret
);
2039 TCGOP_CALLO(op
) = nb_rets
;
2042 for (i
= 0; i
< nargs
; i
++) {
2043 int is_64bit
= sizemask
& (1 << (i
+1)*2);
2044 if (TCG_TARGET_REG_BITS
< 64 && is_64bit
) {
2045 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
2046 /* some targets want aligned 64 bit args */
2047 if (real_args
& 1) {
2048 op
->args
[pi
++] = TCG_CALL_DUMMY_ARG
;
2052 /* If stack grows up, then we will be placing successive
2053 arguments at lower addresses, which means we need to
2054 reverse the order compared to how we would normally
2055 treat either big or little-endian. For those arguments
2056 that will wind up in registers, this still works for
2057 HPPA (the only current STACK_GROWSUP target) since the
2058 argument registers are *also* allocated in decreasing
2059 order. If another such target is added, this logic may
2060 have to get more complicated to differentiate between
2061 stack arguments and register arguments. */
2062 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
2063 op
->args
[pi
++] = temp_arg(args
[i
] + 1);
2064 op
->args
[pi
++] = temp_arg(args
[i
]);
2066 op
->args
[pi
++] = temp_arg(args
[i
]);
2067 op
->args
[pi
++] = temp_arg(args
[i
] + 1);
2073 op
->args
[pi
++] = temp_arg(args
[i
]);
2076 op
->args
[pi
++] = (uintptr_t)func
;
2077 op
->args
[pi
++] = flags
;
2078 TCGOP_CALLI(op
) = real_args
;
2080 /* Make sure the fields didn't overflow. */
2081 tcg_debug_assert(TCGOP_CALLI(op
) == real_args
);
2082 tcg_debug_assert(pi
<= ARRAY_SIZE(op
->args
));
2084 #if defined(__sparc__) && !defined(__arch64__) \
2085 && !defined(CONFIG_TCG_INTERPRETER)
2086 /* Free all of the parts we allocated above. */
2087 for (i
= real_args
= 0; i
< orig_nargs
; ++i
) {
2088 int is_64bit
= orig_sizemask
& (1 << (i
+1)*2);
2090 tcg_temp_free_internal(args
[real_args
++]);
2091 tcg_temp_free_internal(args
[real_args
++]);
2096 if (orig_sizemask
& 1) {
2097 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
2098 Note that describing these as TCGv_i64 eliminates an unnecessary
2099 zero-extension that tcg_gen_concat_i32_i64 would create. */
2100 tcg_gen_concat32_i64(temp_tcgv_i64(ret
), retl
, reth
);
2101 tcg_temp_free_i64(retl
);
2102 tcg_temp_free_i64(reth
);
2104 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2105 for (i
= 0; i
< nargs
; ++i
) {
2106 int is_64bit
= sizemask
& (1 << (i
+1)*2);
2108 tcg_temp_free_internal(args
[i
]);
2111 #endif /* TCG_TARGET_EXTEND_ARGS */
2114 static void tcg_reg_alloc_start(TCGContext
*s
)
2118 for (i
= 0, n
= s
->nb_temps
; i
< n
; i
++) {
2119 TCGTemp
*ts
= &s
->temps
[i
];
2120 TCGTempVal val
= TEMP_VAL_MEM
;
2124 val
= TEMP_VAL_CONST
;
2132 val
= TEMP_VAL_DEAD
;
2135 ts
->mem_allocated
= 0;
2138 g_assert_not_reached();
2143 memset(s
->reg_to_temp
, 0, sizeof(s
->reg_to_temp
));
2146 static char *tcg_get_arg_str_ptr(TCGContext
*s
, char *buf
, int buf_size
,
2149 int idx
= temp_idx(ts
);
2154 pstrcpy(buf
, buf_size
, ts
->name
);
2157 snprintf(buf
, buf_size
, "loc%d", idx
- s
->nb_globals
);
2160 snprintf(buf
, buf_size
, "tmp%d", idx
- s
->nb_globals
);
2165 snprintf(buf
, buf_size
, "$0x%x", (int32_t)ts
->val
);
2167 #if TCG_TARGET_REG_BITS > 32
2169 snprintf(buf
, buf_size
, "$0x%" PRIx64
, ts
->val
);
2175 snprintf(buf
, buf_size
, "v%d$0x%" PRIx64
,
2176 64 << (ts
->type
- TCG_TYPE_V64
), ts
->val
);
2179 g_assert_not_reached();
2186 static char *tcg_get_arg_str(TCGContext
*s
, char *buf
,
2187 int buf_size
, TCGArg arg
)
2189 return tcg_get_arg_str_ptr(s
, buf
, buf_size
, arg_temp(arg
));
2192 /* Find helper name. */
2193 static inline const char *tcg_find_helper(TCGContext
*s
, uintptr_t val
)
2195 const char *ret
= NULL
;
2197 TCGHelperInfo
*info
= g_hash_table_lookup(helper_table
, (gpointer
)val
);
2205 static const char * const cond_name
[] =
2207 [TCG_COND_NEVER
] = "never",
2208 [TCG_COND_ALWAYS
] = "always",
2209 [TCG_COND_EQ
] = "eq",
2210 [TCG_COND_NE
] = "ne",
2211 [TCG_COND_LT
] = "lt",
2212 [TCG_COND_GE
] = "ge",
2213 [TCG_COND_LE
] = "le",
2214 [TCG_COND_GT
] = "gt",
2215 [TCG_COND_LTU
] = "ltu",
2216 [TCG_COND_GEU
] = "geu",
2217 [TCG_COND_LEU
] = "leu",
2218 [TCG_COND_GTU
] = "gtu"
2221 static const char * const ldst_name
[] =
2237 static const char * const alignment_name
[(MO_AMASK
>> MO_ASHIFT
) + 1] = {
2238 #ifdef TARGET_ALIGNED_ONLY
2239 [MO_UNALN
>> MO_ASHIFT
] = "un+",
2240 [MO_ALIGN
>> MO_ASHIFT
] = "",
2242 [MO_UNALN
>> MO_ASHIFT
] = "",
2243 [MO_ALIGN
>> MO_ASHIFT
] = "al+",
2245 [MO_ALIGN_2
>> MO_ASHIFT
] = "al2+",
2246 [MO_ALIGN_4
>> MO_ASHIFT
] = "al4+",
2247 [MO_ALIGN_8
>> MO_ASHIFT
] = "al8+",
2248 [MO_ALIGN_16
>> MO_ASHIFT
] = "al16+",
2249 [MO_ALIGN_32
>> MO_ASHIFT
] = "al32+",
2250 [MO_ALIGN_64
>> MO_ASHIFT
] = "al64+",
2253 static inline bool tcg_regset_single(TCGRegSet d
)
2255 return (d
& (d
- 1)) == 0;
2258 static inline TCGReg
tcg_regset_first(TCGRegSet d
)
2260 if (TCG_TARGET_NB_REGS
<= 32) {
2267 static void tcg_dump_ops(TCGContext
*s
, bool have_prefs
)
2272 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
2273 int i
, k
, nb_oargs
, nb_iargs
, nb_cargs
;
2274 const TCGOpDef
*def
;
2279 def
= &tcg_op_defs
[c
];
2281 if (c
== INDEX_op_insn_start
) {
2283 col
+= qemu_log("\n ----");
2285 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
2287 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2288 a
= deposit64(op
->args
[i
* 2], 32, 32, op
->args
[i
* 2 + 1]);
2292 col
+= qemu_log(" " TARGET_FMT_lx
, a
);
2294 } else if (c
== INDEX_op_call
) {
2295 /* variable number of arguments */
2296 nb_oargs
= TCGOP_CALLO(op
);
2297 nb_iargs
= TCGOP_CALLI(op
);
2298 nb_cargs
= def
->nb_cargs
;
2300 /* function name, flags, out args */
2301 col
+= qemu_log(" %s %s,$0x%" TCG_PRIlx
",$%d", def
->name
,
2302 tcg_find_helper(s
, op
->args
[nb_oargs
+ nb_iargs
]),
2303 op
->args
[nb_oargs
+ nb_iargs
+ 1], nb_oargs
);
2304 for (i
= 0; i
< nb_oargs
; i
++) {
2305 col
+= qemu_log(",%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
2308 for (i
= 0; i
< nb_iargs
; i
++) {
2309 TCGArg arg
= op
->args
[nb_oargs
+ i
];
2310 const char *t
= "<dummy>";
2311 if (arg
!= TCG_CALL_DUMMY_ARG
) {
2312 t
= tcg_get_arg_str(s
, buf
, sizeof(buf
), arg
);
2314 col
+= qemu_log(",%s", t
);
2317 col
+= qemu_log(" %s ", def
->name
);
2319 nb_oargs
= def
->nb_oargs
;
2320 nb_iargs
= def
->nb_iargs
;
2321 nb_cargs
= def
->nb_cargs
;
2323 if (def
->flags
& TCG_OPF_VECTOR
) {
2324 col
+= qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op
),
2325 8 << TCGOP_VECE(op
));
2329 for (i
= 0; i
< nb_oargs
; i
++) {
2331 col
+= qemu_log(",");
2333 col
+= qemu_log("%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
2336 for (i
= 0; i
< nb_iargs
; i
++) {
2338 col
+= qemu_log(",");
2340 col
+= qemu_log("%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
2344 case INDEX_op_brcond_i32
:
2345 case INDEX_op_setcond_i32
:
2346 case INDEX_op_movcond_i32
:
2347 case INDEX_op_brcond2_i32
:
2348 case INDEX_op_setcond2_i32
:
2349 case INDEX_op_brcond_i64
:
2350 case INDEX_op_setcond_i64
:
2351 case INDEX_op_movcond_i64
:
2352 case INDEX_op_cmp_vec
:
2353 case INDEX_op_cmpsel_vec
:
2354 if (op
->args
[k
] < ARRAY_SIZE(cond_name
)
2355 && cond_name
[op
->args
[k
]]) {
2356 col
+= qemu_log(",%s", cond_name
[op
->args
[k
++]]);
2358 col
+= qemu_log(",$0x%" TCG_PRIlx
, op
->args
[k
++]);
2362 case INDEX_op_qemu_ld_i32
:
2363 case INDEX_op_qemu_st_i32
:
2364 case INDEX_op_qemu_st8_i32
:
2365 case INDEX_op_qemu_ld_i64
:
2366 case INDEX_op_qemu_st_i64
:
2368 TCGMemOpIdx oi
= op
->args
[k
++];
2369 MemOp op
= get_memop(oi
);
2370 unsigned ix
= get_mmuidx(oi
);
2372 if (op
& ~(MO_AMASK
| MO_BSWAP
| MO_SSIZE
)) {
2373 col
+= qemu_log(",$0x%x,%u", op
, ix
);
2375 const char *s_al
, *s_op
;
2376 s_al
= alignment_name
[(op
& MO_AMASK
) >> MO_ASHIFT
];
2377 s_op
= ldst_name
[op
& (MO_BSWAP
| MO_SSIZE
)];
2378 col
+= qemu_log(",%s%s,%u", s_al
, s_op
, ix
);
2388 case INDEX_op_set_label
:
2390 case INDEX_op_brcond_i32
:
2391 case INDEX_op_brcond_i64
:
2392 case INDEX_op_brcond2_i32
:
2393 col
+= qemu_log("%s$L%d", k
? "," : "",
2394 arg_label(op
->args
[k
])->id
);
2400 for (; i
< nb_cargs
; i
++, k
++) {
2401 col
+= qemu_log("%s$0x%" TCG_PRIlx
, k
? "," : "", op
->args
[k
]);
2405 if (have_prefs
|| op
->life
) {
2407 QemuLogFile
*logfile
;
2410 logfile
= qatomic_rcu_read(&qemu_logfile
);
2412 for (; col
< 40; ++col
) {
2413 putc(' ', logfile
->fd
);
2420 unsigned life
= op
->life
;
2422 if (life
& (SYNC_ARG
* 3)) {
2424 for (i
= 0; i
< 2; ++i
) {
2425 if (life
& (SYNC_ARG
<< i
)) {
2433 for (i
= 0; life
; ++i
, life
>>= 1) {
2442 for (i
= 0; i
< nb_oargs
; ++i
) {
2443 TCGRegSet set
= op
->output_pref
[i
];
2452 } else if (set
== MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS
)) {
2454 #ifdef CONFIG_DEBUG_TCG
2455 } else if (tcg_regset_single(set
)) {
2456 TCGReg reg
= tcg_regset_first(set
);
2457 qemu_log("%s", tcg_target_reg_names
[reg
]);
2459 } else if (TCG_TARGET_NB_REGS
<= 32) {
2460 qemu_log("%#x", (uint32_t)set
);
2462 qemu_log("%#" PRIx64
, (uint64_t)set
);
2471 /* we give more priority to constraints with less registers */
2472 static int get_constraint_priority(const TCGOpDef
*def
, int k
)
2474 const TCGArgConstraint
*arg_ct
= &def
->args_ct
[k
];
2477 if (arg_ct
->oalias
) {
2478 /* an alias is equivalent to a single register */
2481 n
= ctpop64(arg_ct
->regs
);
2483 return TCG_TARGET_NB_REGS
- n
+ 1;
2486 /* sort from highest priority to lowest */
2487 static void sort_constraints(TCGOpDef
*def
, int start
, int n
)
2490 TCGArgConstraint
*a
= def
->args_ct
;
2492 for (i
= 0; i
< n
; i
++) {
2493 a
[start
+ i
].sort_index
= start
+ i
;
2498 for (i
= 0; i
< n
- 1; i
++) {
2499 for (j
= i
+ 1; j
< n
; j
++) {
2500 int p1
= get_constraint_priority(def
, a
[start
+ i
].sort_index
);
2501 int p2
= get_constraint_priority(def
, a
[start
+ j
].sort_index
);
2503 int tmp
= a
[start
+ i
].sort_index
;
2504 a
[start
+ i
].sort_index
= a
[start
+ j
].sort_index
;
2505 a
[start
+ j
].sort_index
= tmp
;
2511 static void process_op_defs(TCGContext
*s
)
2515 for (op
= 0; op
< NB_OPS
; op
++) {
2516 TCGOpDef
*def
= &tcg_op_defs
[op
];
2517 const TCGTargetOpDef
*tdefs
;
2520 if (def
->flags
& TCG_OPF_NOT_PRESENT
) {
2524 nb_args
= def
->nb_iargs
+ def
->nb_oargs
;
2530 * Macro magic should make it impossible, but double-check that
2531 * the array index is in range. Since the signness of an enum
2532 * is implementation defined, force the result to unsigned.
2534 unsigned con_set
= tcg_target_op_def(op
);
2535 tcg_debug_assert(con_set
< ARRAY_SIZE(constraint_sets
));
2536 tdefs
= &constraint_sets
[con_set
];
2538 for (i
= 0; i
< nb_args
; i
++) {
2539 const char *ct_str
= tdefs
->args_ct_str
[i
];
2540 /* Incomplete TCGTargetOpDef entry. */
2541 tcg_debug_assert(ct_str
!= NULL
);
2543 while (*ct_str
!= '\0') {
2547 int oarg
= *ct_str
- '0';
2548 tcg_debug_assert(ct_str
== tdefs
->args_ct_str
[i
]);
2549 tcg_debug_assert(oarg
< def
->nb_oargs
);
2550 tcg_debug_assert(def
->args_ct
[oarg
].regs
!= 0);
2551 def
->args_ct
[i
] = def
->args_ct
[oarg
];
2552 /* The output sets oalias. */
2553 def
->args_ct
[oarg
].oalias
= true;
2554 def
->args_ct
[oarg
].alias_index
= i
;
2555 /* The input sets ialias. */
2556 def
->args_ct
[i
].ialias
= true;
2557 def
->args_ct
[i
].alias_index
= oarg
;
2562 def
->args_ct
[i
].newreg
= true;
2566 def
->args_ct
[i
].ct
|= TCG_CT_CONST
;
2570 /* Include all of the target-specific constraints. */
2573 #define CONST(CASE, MASK) \
2574 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2575 #define REGS(CASE, MASK) \
2576 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2578 #include "tcg-target-con-str.h"
2583 /* Typo in TCGTargetOpDef constraint. */
2584 g_assert_not_reached();
2589 /* TCGTargetOpDef entry with too much information? */
2590 tcg_debug_assert(i
== TCG_MAX_OP_ARGS
|| tdefs
->args_ct_str
[i
] == NULL
);
2592 /* sort the constraints (XXX: this is just an heuristic) */
2593 sort_constraints(def
, 0, def
->nb_oargs
);
2594 sort_constraints(def
, def
->nb_oargs
, def
->nb_iargs
);
2598 void tcg_op_remove(TCGContext
*s
, TCGOp
*op
)
2604 label
= arg_label(op
->args
[0]);
2607 case INDEX_op_brcond_i32
:
2608 case INDEX_op_brcond_i64
:
2609 label
= arg_label(op
->args
[3]);
2612 case INDEX_op_brcond2_i32
:
2613 label
= arg_label(op
->args
[5]);
2620 QTAILQ_REMOVE(&s
->ops
, op
, link
);
2621 QTAILQ_INSERT_TAIL(&s
->free_ops
, op
, link
);
2624 #ifdef CONFIG_PROFILER
2625 qatomic_set(&s
->prof
.del_op_count
, s
->prof
.del_op_count
+ 1);
2629 static TCGOp
*tcg_op_alloc(TCGOpcode opc
)
2631 TCGContext
*s
= tcg_ctx
;
2634 if (likely(QTAILQ_EMPTY(&s
->free_ops
))) {
2635 op
= tcg_malloc(sizeof(TCGOp
));
2637 op
= QTAILQ_FIRST(&s
->free_ops
);
2638 QTAILQ_REMOVE(&s
->free_ops
, op
, link
);
2640 memset(op
, 0, offsetof(TCGOp
, link
));
2647 TCGOp
*tcg_emit_op(TCGOpcode opc
)
2649 TCGOp
*op
= tcg_op_alloc(opc
);
2650 QTAILQ_INSERT_TAIL(&tcg_ctx
->ops
, op
, link
);
2654 TCGOp
*tcg_op_insert_before(TCGContext
*s
, TCGOp
*old_op
, TCGOpcode opc
)
2656 TCGOp
*new_op
= tcg_op_alloc(opc
);
2657 QTAILQ_INSERT_BEFORE(old_op
, new_op
, link
);
2661 TCGOp
*tcg_op_insert_after(TCGContext
*s
, TCGOp
*old_op
, TCGOpcode opc
)
2663 TCGOp
*new_op
= tcg_op_alloc(opc
);
2664 QTAILQ_INSERT_AFTER(&s
->ops
, old_op
, new_op
, link
);
2668 /* Reachable analysis : remove unreachable code. */
2669 static void reachable_code_pass(TCGContext
*s
)
2671 TCGOp
*op
, *op_next
;
2674 QTAILQ_FOREACH_SAFE(op
, &s
->ops
, link
, op_next
) {
2680 case INDEX_op_set_label
:
2681 label
= arg_label(op
->args
[0]);
2682 if (label
->refs
== 0) {
2684 * While there is an occasional backward branch, virtually
2685 * all branches generated by the translators are forward.
2686 * Which means that generally we will have already removed
2687 * all references to the label that will be, and there is
2688 * little to be gained by iterating.
2692 /* Once we see a label, insns become live again. */
2697 * Optimization can fold conditional branches to unconditional.
2698 * If we find a label with one reference which is preceded by
2699 * an unconditional branch to it, remove both. This needed to
2700 * wait until the dead code in between them was removed.
2702 if (label
->refs
== 1) {
2703 TCGOp
*op_prev
= QTAILQ_PREV(op
, link
);
2704 if (op_prev
->opc
== INDEX_op_br
&&
2705 label
== arg_label(op_prev
->args
[0])) {
2706 tcg_op_remove(s
, op_prev
);
2714 case INDEX_op_exit_tb
:
2715 case INDEX_op_goto_ptr
:
2716 /* Unconditional branches; everything following is dead. */
2721 /* Notice noreturn helper calls, raising exceptions. */
2722 call_flags
= op
->args
[TCGOP_CALLO(op
) + TCGOP_CALLI(op
) + 1];
2723 if (call_flags
& TCG_CALL_NO_RETURN
) {
2728 case INDEX_op_insn_start
:
2729 /* Never remove -- we need to keep these for unwind. */
2738 tcg_op_remove(s
, op
);
2746 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2747 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2749 /* For liveness_pass_1, the register preferences for a given temp. */
2750 static inline TCGRegSet
*la_temp_pref(TCGTemp
*ts
)
2752 return ts
->state_ptr
;
2755 /* For liveness_pass_1, reset the preferences for a given temp to the
2756 * maximal regset for its type.
2758 static inline void la_reset_pref(TCGTemp
*ts
)
2761 = (ts
->state
== TS_DEAD
? 0 : tcg_target_available_regs
[ts
->type
]);
2764 /* liveness analysis: end of function: all temps are dead, and globals
2765 should be in memory. */
2766 static void la_func_end(TCGContext
*s
, int ng
, int nt
)
2770 for (i
= 0; i
< ng
; ++i
) {
2771 s
->temps
[i
].state
= TS_DEAD
| TS_MEM
;
2772 la_reset_pref(&s
->temps
[i
]);
2774 for (i
= ng
; i
< nt
; ++i
) {
2775 s
->temps
[i
].state
= TS_DEAD
;
2776 la_reset_pref(&s
->temps
[i
]);
2780 /* liveness analysis: end of basic block: all temps are dead, globals
2781 and local temps should be in memory. */
2782 static void la_bb_end(TCGContext
*s
, int ng
, int nt
)
2786 for (i
= 0; i
< nt
; ++i
) {
2787 TCGTemp
*ts
= &s
->temps
[i
];
2794 state
= TS_DEAD
| TS_MEM
;
2801 g_assert_not_reached();
2808 /* liveness analysis: sync globals back to memory. */
2809 static void la_global_sync(TCGContext
*s
, int ng
)
2813 for (i
= 0; i
< ng
; ++i
) {
2814 int state
= s
->temps
[i
].state
;
2815 s
->temps
[i
].state
= state
| TS_MEM
;
2816 if (state
== TS_DEAD
) {
2817 /* If the global was previously dead, reset prefs. */
2818 la_reset_pref(&s
->temps
[i
]);
2824 * liveness analysis: conditional branch: all temps are dead,
2825 * globals and local temps should be synced.
2827 static void la_bb_sync(TCGContext
*s
, int ng
, int nt
)
2829 la_global_sync(s
, ng
);
2831 for (int i
= ng
; i
< nt
; ++i
) {
2832 TCGTemp
*ts
= &s
->temps
[i
];
2838 ts
->state
= state
| TS_MEM
;
2839 if (state
!= TS_DEAD
) {
2844 s
->temps
[i
].state
= TS_DEAD
;
2849 g_assert_not_reached();
2851 la_reset_pref(&s
->temps
[i
]);
2855 /* liveness analysis: sync globals back to memory and kill. */
2856 static void la_global_kill(TCGContext
*s
, int ng
)
2860 for (i
= 0; i
< ng
; i
++) {
2861 s
->temps
[i
].state
= TS_DEAD
| TS_MEM
;
2862 la_reset_pref(&s
->temps
[i
]);
2866 /* liveness analysis: note live globals crossing calls. */
2867 static void la_cross_call(TCGContext
*s
, int nt
)
2869 TCGRegSet mask
= ~tcg_target_call_clobber_regs
;
2872 for (i
= 0; i
< nt
; i
++) {
2873 TCGTemp
*ts
= &s
->temps
[i
];
2874 if (!(ts
->state
& TS_DEAD
)) {
2875 TCGRegSet
*pset
= la_temp_pref(ts
);
2876 TCGRegSet set
= *pset
;
2879 /* If the combination is not possible, restart. */
2881 set
= tcg_target_available_regs
[ts
->type
] & mask
;
2888 /* Liveness analysis : update the opc_arg_life array to tell if a
2889 given input arguments is dead. Instructions updating dead
2890 temporaries are removed. */
2891 static void liveness_pass_1(TCGContext
*s
)
2893 int nb_globals
= s
->nb_globals
;
2894 int nb_temps
= s
->nb_temps
;
2895 TCGOp
*op
, *op_prev
;
2899 prefs
= tcg_malloc(sizeof(TCGRegSet
) * nb_temps
);
2900 for (i
= 0; i
< nb_temps
; ++i
) {
2901 s
->temps
[i
].state_ptr
= prefs
+ i
;
2904 /* ??? Should be redundant with the exit_tb that ends the TB. */
2905 la_func_end(s
, nb_globals
, nb_temps
);
2907 QTAILQ_FOREACH_REVERSE_SAFE(op
, &s
->ops
, link
, op_prev
) {
2908 int nb_iargs
, nb_oargs
;
2909 TCGOpcode opc_new
, opc_new2
;
2911 TCGLifeData arg_life
= 0;
2913 TCGOpcode opc
= op
->opc
;
2914 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
2922 nb_oargs
= TCGOP_CALLO(op
);
2923 nb_iargs
= TCGOP_CALLI(op
);
2924 call_flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
2926 /* pure functions can be removed if their result is unused */
2927 if (call_flags
& TCG_CALL_NO_SIDE_EFFECTS
) {
2928 for (i
= 0; i
< nb_oargs
; i
++) {
2929 ts
= arg_temp(op
->args
[i
]);
2930 if (ts
->state
!= TS_DEAD
) {
2931 goto do_not_remove_call
;
2938 /* Output args are dead. */
2939 for (i
= 0; i
< nb_oargs
; i
++) {
2940 ts
= arg_temp(op
->args
[i
]);
2941 if (ts
->state
& TS_DEAD
) {
2942 arg_life
|= DEAD_ARG
<< i
;
2944 if (ts
->state
& TS_MEM
) {
2945 arg_life
|= SYNC_ARG
<< i
;
2947 ts
->state
= TS_DEAD
;
2950 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2951 op
->output_pref
[i
] = 0;
2954 if (!(call_flags
& (TCG_CALL_NO_WRITE_GLOBALS
|
2955 TCG_CALL_NO_READ_GLOBALS
))) {
2956 la_global_kill(s
, nb_globals
);
2957 } else if (!(call_flags
& TCG_CALL_NO_READ_GLOBALS
)) {
2958 la_global_sync(s
, nb_globals
);
2961 /* Record arguments that die in this helper. */
2962 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
2963 ts
= arg_temp(op
->args
[i
]);
2964 if (ts
&& ts
->state
& TS_DEAD
) {
2965 arg_life
|= DEAD_ARG
<< i
;
2969 /* For all live registers, remove call-clobbered prefs. */
2970 la_cross_call(s
, nb_temps
);
2972 nb_call_regs
= ARRAY_SIZE(tcg_target_call_iarg_regs
);
2974 /* Input arguments are live for preceding opcodes. */
2975 for (i
= 0; i
< nb_iargs
; i
++) {
2976 ts
= arg_temp(op
->args
[i
+ nb_oargs
]);
2977 if (ts
&& ts
->state
& TS_DEAD
) {
2978 /* For those arguments that die, and will be allocated
2979 * in registers, clear the register set for that arg,
2980 * to be filled in below. For args that will be on
2981 * the stack, reset to any available reg.
2984 = (i
< nb_call_regs
? 0 :
2985 tcg_target_available_regs
[ts
->type
]);
2986 ts
->state
&= ~TS_DEAD
;
2990 /* For each input argument, add its input register to prefs.
2991 If a temp is used once, this produces a single set bit. */
2992 for (i
= 0; i
< MIN(nb_call_regs
, nb_iargs
); i
++) {
2993 ts
= arg_temp(op
->args
[i
+ nb_oargs
]);
2995 tcg_regset_set_reg(*la_temp_pref(ts
),
2996 tcg_target_call_iarg_regs
[i
]);
3001 case INDEX_op_insn_start
:
3003 case INDEX_op_discard
:
3004 /* mark the temporary as dead */
3005 ts
= arg_temp(op
->args
[0]);
3006 ts
->state
= TS_DEAD
;
3010 case INDEX_op_add2_i32
:
3011 opc_new
= INDEX_op_add_i32
;
3013 case INDEX_op_sub2_i32
:
3014 opc_new
= INDEX_op_sub_i32
;
3016 case INDEX_op_add2_i64
:
3017 opc_new
= INDEX_op_add_i64
;
3019 case INDEX_op_sub2_i64
:
3020 opc_new
= INDEX_op_sub_i64
;
3024 /* Test if the high part of the operation is dead, but not
3025 the low part. The result can be optimized to a simple
3026 add or sub. This happens often for x86_64 guest when the
3027 cpu mode is set to 32 bit. */
3028 if (arg_temp(op
->args
[1])->state
== TS_DEAD
) {
3029 if (arg_temp(op
->args
[0])->state
== TS_DEAD
) {
3032 /* Replace the opcode and adjust the args in place,
3033 leaving 3 unused args at the end. */
3034 op
->opc
= opc
= opc_new
;
3035 op
->args
[1] = op
->args
[2];
3036 op
->args
[2] = op
->args
[4];
3037 /* Fall through and mark the single-word operation live. */
3043 case INDEX_op_mulu2_i32
:
3044 opc_new
= INDEX_op_mul_i32
;
3045 opc_new2
= INDEX_op_muluh_i32
;
3046 have_opc_new2
= TCG_TARGET_HAS_muluh_i32
;
3048 case INDEX_op_muls2_i32
:
3049 opc_new
= INDEX_op_mul_i32
;
3050 opc_new2
= INDEX_op_mulsh_i32
;
3051 have_opc_new2
= TCG_TARGET_HAS_mulsh_i32
;
3053 case INDEX_op_mulu2_i64
:
3054 opc_new
= INDEX_op_mul_i64
;
3055 opc_new2
= INDEX_op_muluh_i64
;
3056 have_opc_new2
= TCG_TARGET_HAS_muluh_i64
;
3058 case INDEX_op_muls2_i64
:
3059 opc_new
= INDEX_op_mul_i64
;
3060 opc_new2
= INDEX_op_mulsh_i64
;
3061 have_opc_new2
= TCG_TARGET_HAS_mulsh_i64
;
3066 if (arg_temp(op
->args
[1])->state
== TS_DEAD
) {
3067 if (arg_temp(op
->args
[0])->state
== TS_DEAD
) {
3068 /* Both parts of the operation are dead. */
3071 /* The high part of the operation is dead; generate the low. */
3072 op
->opc
= opc
= opc_new
;
3073 op
->args
[1] = op
->args
[2];
3074 op
->args
[2] = op
->args
[3];
3075 } else if (arg_temp(op
->args
[0])->state
== TS_DEAD
&& have_opc_new2
) {
3076 /* The low part of the operation is dead; generate the high. */
3077 op
->opc
= opc
= opc_new2
;
3078 op
->args
[0] = op
->args
[1];
3079 op
->args
[1] = op
->args
[2];
3080 op
->args
[2] = op
->args
[3];
3084 /* Mark the single-word operation live. */
3089 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3090 nb_iargs
= def
->nb_iargs
;
3091 nb_oargs
= def
->nb_oargs
;
3093 /* Test if the operation can be removed because all
3094 its outputs are dead. We assume that nb_oargs == 0
3095 implies side effects */
3096 if (!(def
->flags
& TCG_OPF_SIDE_EFFECTS
) && nb_oargs
!= 0) {
3097 for (i
= 0; i
< nb_oargs
; i
++) {
3098 if (arg_temp(op
->args
[i
])->state
!= TS_DEAD
) {
3107 tcg_op_remove(s
, op
);
3111 for (i
= 0; i
< nb_oargs
; i
++) {
3112 ts
= arg_temp(op
->args
[i
]);
3114 /* Remember the preference of the uses that followed. */
3115 op
->output_pref
[i
] = *la_temp_pref(ts
);
3117 /* Output args are dead. */
3118 if (ts
->state
& TS_DEAD
) {
3119 arg_life
|= DEAD_ARG
<< i
;
3121 if (ts
->state
& TS_MEM
) {
3122 arg_life
|= SYNC_ARG
<< i
;
3124 ts
->state
= TS_DEAD
;
3128 /* If end of basic block, update. */
3129 if (def
->flags
& TCG_OPF_BB_EXIT
) {
3130 la_func_end(s
, nb_globals
, nb_temps
);
3131 } else if (def
->flags
& TCG_OPF_COND_BRANCH
) {
3132 la_bb_sync(s
, nb_globals
, nb_temps
);
3133 } else if (def
->flags
& TCG_OPF_BB_END
) {
3134 la_bb_end(s
, nb_globals
, nb_temps
);
3135 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
3136 la_global_sync(s
, nb_globals
);
3137 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
3138 la_cross_call(s
, nb_temps
);
3142 /* Record arguments that die in this opcode. */
3143 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3144 ts
= arg_temp(op
->args
[i
]);
3145 if (ts
->state
& TS_DEAD
) {
3146 arg_life
|= DEAD_ARG
<< i
;
3150 /* Input arguments are live for preceding opcodes. */
3151 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3152 ts
= arg_temp(op
->args
[i
]);
3153 if (ts
->state
& TS_DEAD
) {
3154 /* For operands that were dead, initially allow
3155 all regs for the type. */
3156 *la_temp_pref(ts
) = tcg_target_available_regs
[ts
->type
];
3157 ts
->state
&= ~TS_DEAD
;
3161 /* Incorporate constraints for this operand. */
3163 case INDEX_op_mov_i32
:
3164 case INDEX_op_mov_i64
:
3165 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3166 have proper constraints. That said, special case
3167 moves to propagate preferences backward. */
3168 if (IS_DEAD_ARG(1)) {
3169 *la_temp_pref(arg_temp(op
->args
[0]))
3170 = *la_temp_pref(arg_temp(op
->args
[1]));
3175 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3176 const TCGArgConstraint
*ct
= &def
->args_ct
[i
];
3177 TCGRegSet set
, *pset
;
3179 ts
= arg_temp(op
->args
[i
]);
3180 pset
= la_temp_pref(ts
);
3185 set
&= op
->output_pref
[ct
->alias_index
];
3187 /* If the combination is not possible, restart. */
3197 op
->life
= arg_life
;
3201 /* Liveness analysis: Convert indirect regs to direct temporaries. */
3202 static bool liveness_pass_2(TCGContext
*s
)
3204 int nb_globals
= s
->nb_globals
;
3206 bool changes
= false;
3207 TCGOp
*op
, *op_next
;
3209 /* Create a temporary for each indirect global. */
3210 for (i
= 0; i
< nb_globals
; ++i
) {
3211 TCGTemp
*its
= &s
->temps
[i
];
3212 if (its
->indirect_reg
) {
3213 TCGTemp
*dts
= tcg_temp_alloc(s
);
3214 dts
->type
= its
->type
;
3215 dts
->base_type
= its
->base_type
;
3216 its
->state_ptr
= dts
;
3218 its
->state_ptr
= NULL
;
3220 /* All globals begin dead. */
3221 its
->state
= TS_DEAD
;
3223 for (nb_temps
= s
->nb_temps
; i
< nb_temps
; ++i
) {
3224 TCGTemp
*its
= &s
->temps
[i
];
3225 its
->state_ptr
= NULL
;
3226 its
->state
= TS_DEAD
;
3229 QTAILQ_FOREACH_SAFE(op
, &s
->ops
, link
, op_next
) {
3230 TCGOpcode opc
= op
->opc
;
3231 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
3232 TCGLifeData arg_life
= op
->life
;
3233 int nb_iargs
, nb_oargs
, call_flags
;
3234 TCGTemp
*arg_ts
, *dir_ts
;
3236 if (opc
== INDEX_op_call
) {
3237 nb_oargs
= TCGOP_CALLO(op
);
3238 nb_iargs
= TCGOP_CALLI(op
);
3239 call_flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
3241 nb_iargs
= def
->nb_iargs
;
3242 nb_oargs
= def
->nb_oargs
;
3244 /* Set flags similar to how calls require. */
3245 if (def
->flags
& TCG_OPF_COND_BRANCH
) {
3246 /* Like reading globals: sync_globals */
3247 call_flags
= TCG_CALL_NO_WRITE_GLOBALS
;
3248 } else if (def
->flags
& TCG_OPF_BB_END
) {
3249 /* Like writing globals: save_globals */
3251 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
3252 /* Like reading globals: sync_globals */
3253 call_flags
= TCG_CALL_NO_WRITE_GLOBALS
;
3255 /* No effect on globals. */
3256 call_flags
= (TCG_CALL_NO_READ_GLOBALS
|
3257 TCG_CALL_NO_WRITE_GLOBALS
);
3261 /* Make sure that input arguments are available. */
3262 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
3263 arg_ts
= arg_temp(op
->args
[i
]);
3265 dir_ts
= arg_ts
->state_ptr
;
3266 if (dir_ts
&& arg_ts
->state
== TS_DEAD
) {
3267 TCGOpcode lopc
= (arg_ts
->type
== TCG_TYPE_I32
3270 TCGOp
*lop
= tcg_op_insert_before(s
, op
, lopc
);
3272 lop
->args
[0] = temp_arg(dir_ts
);
3273 lop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3274 lop
->args
[2] = arg_ts
->mem_offset
;
3276 /* Loaded, but synced with memory. */
3277 arg_ts
->state
= TS_MEM
;
3282 /* Perform input replacement, and mark inputs that became dead.
3283 No action is required except keeping temp_state up to date
3284 so that we reload when needed. */
3285 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
3286 arg_ts
= arg_temp(op
->args
[i
]);
3288 dir_ts
= arg_ts
->state_ptr
;
3290 op
->args
[i
] = temp_arg(dir_ts
);
3292 if (IS_DEAD_ARG(i
)) {
3293 arg_ts
->state
= TS_DEAD
;
3299 /* Liveness analysis should ensure that the following are
3300 all correct, for call sites and basic block end points. */
3301 if (call_flags
& TCG_CALL_NO_READ_GLOBALS
) {
3303 } else if (call_flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
3304 for (i
= 0; i
< nb_globals
; ++i
) {
3305 /* Liveness should see that globals are synced back,
3306 that is, either TS_DEAD or TS_MEM. */
3307 arg_ts
= &s
->temps
[i
];
3308 tcg_debug_assert(arg_ts
->state_ptr
== 0
3309 || arg_ts
->state
!= 0);
3312 for (i
= 0; i
< nb_globals
; ++i
) {
3313 /* Liveness should see that globals are saved back,
3314 that is, TS_DEAD, waiting to be reloaded. */
3315 arg_ts
= &s
->temps
[i
];
3316 tcg_debug_assert(arg_ts
->state_ptr
== 0
3317 || arg_ts
->state
== TS_DEAD
);
3321 /* Outputs become available. */
3322 if (opc
== INDEX_op_mov_i32
|| opc
== INDEX_op_mov_i64
) {
3323 arg_ts
= arg_temp(op
->args
[0]);
3324 dir_ts
= arg_ts
->state_ptr
;
3326 op
->args
[0] = temp_arg(dir_ts
);
3329 /* The output is now live and modified. */
3332 if (NEED_SYNC_ARG(0)) {
3333 TCGOpcode sopc
= (arg_ts
->type
== TCG_TYPE_I32
3336 TCGOp
*sop
= tcg_op_insert_after(s
, op
, sopc
);
3337 TCGTemp
*out_ts
= dir_ts
;
3339 if (IS_DEAD_ARG(0)) {
3340 out_ts
= arg_temp(op
->args
[1]);
3341 arg_ts
->state
= TS_DEAD
;
3342 tcg_op_remove(s
, op
);
3344 arg_ts
->state
= TS_MEM
;
3347 sop
->args
[0] = temp_arg(out_ts
);
3348 sop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3349 sop
->args
[2] = arg_ts
->mem_offset
;
3351 tcg_debug_assert(!IS_DEAD_ARG(0));
3355 for (i
= 0; i
< nb_oargs
; i
++) {
3356 arg_ts
= arg_temp(op
->args
[i
]);
3357 dir_ts
= arg_ts
->state_ptr
;
3361 op
->args
[i
] = temp_arg(dir_ts
);
3364 /* The output is now live and modified. */
3367 /* Sync outputs upon their last write. */
3368 if (NEED_SYNC_ARG(i
)) {
3369 TCGOpcode sopc
= (arg_ts
->type
== TCG_TYPE_I32
3372 TCGOp
*sop
= tcg_op_insert_after(s
, op
, sopc
);
3374 sop
->args
[0] = temp_arg(dir_ts
);
3375 sop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3376 sop
->args
[2] = arg_ts
->mem_offset
;
3378 arg_ts
->state
= TS_MEM
;
3380 /* Drop outputs that are dead. */
3381 if (IS_DEAD_ARG(i
)) {
3382 arg_ts
->state
= TS_DEAD
;
3391 #ifdef CONFIG_DEBUG_TCG
3392 static void dump_regs(TCGContext
*s
)
3398 for(i
= 0; i
< s
->nb_temps
; i
++) {
3400 printf(" %10s: ", tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), ts
));
3401 switch(ts
->val_type
) {
3403 printf("%s", tcg_target_reg_names
[ts
->reg
]);
3406 printf("%d(%s)", (int)ts
->mem_offset
,
3407 tcg_target_reg_names
[ts
->mem_base
->reg
]);
3409 case TEMP_VAL_CONST
:
3410 printf("$0x%" PRIx64
, ts
->val
);
3422 for(i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
3423 if (s
->reg_to_temp
[i
] != NULL
) {
3425 tcg_target_reg_names
[i
],
3426 tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), s
->reg_to_temp
[i
]));
3431 static void check_regs(TCGContext
*s
)
3438 for (reg
= 0; reg
< TCG_TARGET_NB_REGS
; reg
++) {
3439 ts
= s
->reg_to_temp
[reg
];
3441 if (ts
->val_type
!= TEMP_VAL_REG
|| ts
->reg
!= reg
) {
3442 printf("Inconsistency for register %s:\n",
3443 tcg_target_reg_names
[reg
]);
3448 for (k
= 0; k
< s
->nb_temps
; k
++) {
3450 if (ts
->val_type
== TEMP_VAL_REG
3451 && ts
->kind
!= TEMP_FIXED
3452 && s
->reg_to_temp
[ts
->reg
] != ts
) {
3453 printf("Inconsistency for temp %s:\n",
3454 tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), ts
));
3456 printf("reg state:\n");
3464 static void temp_allocate_frame(TCGContext
*s
, TCGTemp
*ts
)
3466 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3467 /* Sparc64 stack is accessed with offset of 2047 */
3468 s
->current_frame_offset
= (s
->current_frame_offset
+
3469 (tcg_target_long
)sizeof(tcg_target_long
) - 1) &
3470 ~(sizeof(tcg_target_long
) - 1);
3472 if (s
->current_frame_offset
+ (tcg_target_long
)sizeof(tcg_target_long
) >
3476 ts
->mem_offset
= s
->current_frame_offset
;
3477 ts
->mem_base
= s
->frame_temp
;
3478 ts
->mem_allocated
= 1;
3479 s
->current_frame_offset
+= sizeof(tcg_target_long
);
3482 static void temp_load(TCGContext
*, TCGTemp
*, TCGRegSet
, TCGRegSet
, TCGRegSet
);
3484 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3485 mark it free; otherwise mark it dead. */
3486 static void temp_free_or_dead(TCGContext
*s
, TCGTemp
*ts
, int free_or_dead
)
3488 TCGTempVal new_type
;
3495 new_type
= TEMP_VAL_MEM
;
3498 new_type
= free_or_dead
< 0 ? TEMP_VAL_MEM
: TEMP_VAL_DEAD
;
3501 new_type
= TEMP_VAL_CONST
;
3504 g_assert_not_reached();
3506 if (ts
->val_type
== TEMP_VAL_REG
) {
3507 s
->reg_to_temp
[ts
->reg
] = NULL
;
3509 ts
->val_type
= new_type
;
3512 /* Mark a temporary as dead. */
3513 static inline void temp_dead(TCGContext
*s
, TCGTemp
*ts
)
3515 temp_free_or_dead(s
, ts
, 1);
3518 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3519 registers needs to be allocated to store a constant. If 'free_or_dead'
3520 is non-zero, subsequently release the temporary; if it is positive, the
3521 temp is dead; if it is negative, the temp is free. */
3522 static void temp_sync(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet allocated_regs
,
3523 TCGRegSet preferred_regs
, int free_or_dead
)
3525 if (!temp_readonly(ts
) && !ts
->mem_coherent
) {
3526 if (!ts
->mem_allocated
) {
3527 temp_allocate_frame(s
, ts
);
3529 switch (ts
->val_type
) {
3530 case TEMP_VAL_CONST
:
3531 /* If we're going to free the temp immediately, then we won't
3532 require it later in a register, so attempt to store the
3533 constant to memory directly. */
3535 && tcg_out_sti(s
, ts
->type
, ts
->val
,
3536 ts
->mem_base
->reg
, ts
->mem_offset
)) {
3539 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
3540 allocated_regs
, preferred_regs
);
3544 tcg_out_st(s
, ts
->type
, ts
->reg
,
3545 ts
->mem_base
->reg
, ts
->mem_offset
);
3555 ts
->mem_coherent
= 1;
3558 temp_free_or_dead(s
, ts
, free_or_dead
);
3562 /* free register 'reg' by spilling the corresponding temporary if necessary */
3563 static void tcg_reg_free(TCGContext
*s
, TCGReg reg
, TCGRegSet allocated_regs
)
3565 TCGTemp
*ts
= s
->reg_to_temp
[reg
];
3567 temp_sync(s
, ts
, allocated_regs
, 0, -1);
3573 * @required_regs: Set of registers in which we must allocate.
3574 * @allocated_regs: Set of registers which must be avoided.
3575 * @preferred_regs: Set of registers we should prefer.
3576 * @rev: True if we search the registers in "indirect" order.
3578 * The allocated register must be in @required_regs & ~@allocated_regs,
3579 * but if we can put it in @preferred_regs we may save a move later.
3581 static TCGReg
tcg_reg_alloc(TCGContext
*s
, TCGRegSet required_regs
,
3582 TCGRegSet allocated_regs
,
3583 TCGRegSet preferred_regs
, bool rev
)
3585 int i
, j
, f
, n
= ARRAY_SIZE(tcg_target_reg_alloc_order
);
3586 TCGRegSet reg_ct
[2];
3589 reg_ct
[1] = required_regs
& ~allocated_regs
;
3590 tcg_debug_assert(reg_ct
[1] != 0);
3591 reg_ct
[0] = reg_ct
[1] & preferred_regs
;
3593 /* Skip the preferred_regs option if it cannot be satisfied,
3594 or if the preference made no difference. */
3595 f
= reg_ct
[0] == 0 || reg_ct
[0] == reg_ct
[1];
3597 order
= rev
? indirect_reg_alloc_order
: tcg_target_reg_alloc_order
;
3599 /* Try free registers, preferences first. */
3600 for (j
= f
; j
< 2; j
++) {
3601 TCGRegSet set
= reg_ct
[j
];
3603 if (tcg_regset_single(set
)) {
3604 /* One register in the set. */
3605 TCGReg reg
= tcg_regset_first(set
);
3606 if (s
->reg_to_temp
[reg
] == NULL
) {
3610 for (i
= 0; i
< n
; i
++) {
3611 TCGReg reg
= order
[i
];
3612 if (s
->reg_to_temp
[reg
] == NULL
&&
3613 tcg_regset_test_reg(set
, reg
)) {
3620 /* We must spill something. */
3621 for (j
= f
; j
< 2; j
++) {
3622 TCGRegSet set
= reg_ct
[j
];
3624 if (tcg_regset_single(set
)) {
3625 /* One register in the set. */
3626 TCGReg reg
= tcg_regset_first(set
);
3627 tcg_reg_free(s
, reg
, allocated_regs
);
3630 for (i
= 0; i
< n
; i
++) {
3631 TCGReg reg
= order
[i
];
3632 if (tcg_regset_test_reg(set
, reg
)) {
3633 tcg_reg_free(s
, reg
, allocated_regs
);
3643 /* Make sure the temporary is in a register. If needed, allocate the register
3644 from DESIRED while avoiding ALLOCATED. */
3645 static void temp_load(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet desired_regs
,
3646 TCGRegSet allocated_regs
, TCGRegSet preferred_regs
)
3650 switch (ts
->val_type
) {
3653 case TEMP_VAL_CONST
:
3654 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
,
3655 preferred_regs
, ts
->indirect_base
);
3656 if (ts
->type
<= TCG_TYPE_I64
) {
3657 tcg_out_movi(s
, ts
->type
, reg
, ts
->val
);
3659 uint64_t val
= ts
->val
;
3663 * Find the minimal vector element that matches the constant.
3664 * The targets will, in general, have to do this search anyway,
3665 * do this generically.
3667 if (val
== dup_const(MO_8
, val
)) {
3669 } else if (val
== dup_const(MO_16
, val
)) {
3671 } else if (val
== dup_const(MO_32
, val
)) {
3675 tcg_out_dupi_vec(s
, ts
->type
, vece
, reg
, ts
->val
);
3677 ts
->mem_coherent
= 0;
3680 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
,
3681 preferred_regs
, ts
->indirect_base
);
3682 tcg_out_ld(s
, ts
->type
, reg
, ts
->mem_base
->reg
, ts
->mem_offset
);
3683 ts
->mem_coherent
= 1;
3690 ts
->val_type
= TEMP_VAL_REG
;
3691 s
->reg_to_temp
[reg
] = ts
;
3694 /* Save a temporary to memory. 'allocated_regs' is used in case a
3695 temporary registers needs to be allocated to store a constant. */
3696 static void temp_save(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet allocated_regs
)
3698 /* The liveness analysis already ensures that globals are back
3699 in memory. Keep an tcg_debug_assert for safety. */
3700 tcg_debug_assert(ts
->val_type
== TEMP_VAL_MEM
|| temp_readonly(ts
));
3703 /* save globals to their canonical location and assume they can be
3704 modified be the following code. 'allocated_regs' is used in case a
3705 temporary registers needs to be allocated to store a constant. */
3706 static void save_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
3710 for (i
= 0, n
= s
->nb_globals
; i
< n
; i
++) {
3711 temp_save(s
, &s
->temps
[i
], allocated_regs
);
3715 /* sync globals to their canonical location and assume they can be
3716 read by the following code. 'allocated_regs' is used in case a
3717 temporary registers needs to be allocated to store a constant. */
3718 static void sync_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
3722 for (i
= 0, n
= s
->nb_globals
; i
< n
; i
++) {
3723 TCGTemp
*ts
= &s
->temps
[i
];
3724 tcg_debug_assert(ts
->val_type
!= TEMP_VAL_REG
3725 || ts
->kind
== TEMP_FIXED
3726 || ts
->mem_coherent
);
3730 /* at the end of a basic block, we assume all temporaries are dead and
3731 all globals are stored at their canonical location. */
3732 static void tcg_reg_alloc_bb_end(TCGContext
*s
, TCGRegSet allocated_regs
)
3736 for (i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
3737 TCGTemp
*ts
= &s
->temps
[i
];
3741 temp_save(s
, ts
, allocated_regs
);
3744 /* The liveness analysis already ensures that temps are dead.
3745 Keep an tcg_debug_assert for safety. */
3746 tcg_debug_assert(ts
->val_type
== TEMP_VAL_DEAD
);
3749 /* Similarly, we should have freed any allocated register. */
3750 tcg_debug_assert(ts
->val_type
== TEMP_VAL_CONST
);
3753 g_assert_not_reached();
3757 save_globals(s
, allocated_regs
);
3761 * At a conditional branch, we assume all temporaries are dead and
3762 * all globals and local temps are synced to their location.
3764 static void tcg_reg_alloc_cbranch(TCGContext
*s
, TCGRegSet allocated_regs
)
3766 sync_globals(s
, allocated_regs
);
3768 for (int i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
3769 TCGTemp
*ts
= &s
->temps
[i
];
3771 * The liveness analysis already ensures that temps are dead.
3772 * Keep tcg_debug_asserts for safety.
3776 tcg_debug_assert(ts
->val_type
!= TEMP_VAL_REG
|| ts
->mem_coherent
);
3779 tcg_debug_assert(ts
->val_type
== TEMP_VAL_DEAD
);
3784 g_assert_not_reached();
3790 * Specialized code generation for INDEX_op_mov_* with a constant.
3792 static void tcg_reg_alloc_do_movi(TCGContext
*s
, TCGTemp
*ots
,
3793 tcg_target_ulong val
, TCGLifeData arg_life
,
3794 TCGRegSet preferred_regs
)
3796 /* ENV should not be modified. */
3797 tcg_debug_assert(!temp_readonly(ots
));
3799 /* The movi is not explicitly generated here. */
3800 if (ots
->val_type
== TEMP_VAL_REG
) {
3801 s
->reg_to_temp
[ots
->reg
] = NULL
;
3803 ots
->val_type
= TEMP_VAL_CONST
;
3805 ots
->mem_coherent
= 0;
3806 if (NEED_SYNC_ARG(0)) {
3807 temp_sync(s
, ots
, s
->reserved_regs
, preferred_regs
, IS_DEAD_ARG(0));
3808 } else if (IS_DEAD_ARG(0)) {
3814 * Specialized code generation for INDEX_op_mov_*.
3816 static void tcg_reg_alloc_mov(TCGContext
*s
, const TCGOp
*op
)
3818 const TCGLifeData arg_life
= op
->life
;
3819 TCGRegSet allocated_regs
, preferred_regs
;
3821 TCGType otype
, itype
;
3823 allocated_regs
= s
->reserved_regs
;
3824 preferred_regs
= op
->output_pref
[0];
3825 ots
= arg_temp(op
->args
[0]);
3826 ts
= arg_temp(op
->args
[1]);
3828 /* ENV should not be modified. */
3829 tcg_debug_assert(!temp_readonly(ots
));
3831 /* Note that otype != itype for no-op truncation. */
3835 if (ts
->val_type
== TEMP_VAL_CONST
) {
3836 /* propagate constant or generate sti */
3837 tcg_target_ulong val
= ts
->val
;
3838 if (IS_DEAD_ARG(1)) {
3841 tcg_reg_alloc_do_movi(s
, ots
, val
, arg_life
, preferred_regs
);
3845 /* If the source value is in memory we're going to be forced
3846 to have it in a register in order to perform the copy. Copy
3847 the SOURCE value into its own register first, that way we
3848 don't have to reload SOURCE the next time it is used. */
3849 if (ts
->val_type
== TEMP_VAL_MEM
) {
3850 temp_load(s
, ts
, tcg_target_available_regs
[itype
],
3851 allocated_regs
, preferred_regs
);
3854 tcg_debug_assert(ts
->val_type
== TEMP_VAL_REG
);
3855 if (IS_DEAD_ARG(0)) {
3856 /* mov to a non-saved dead register makes no sense (even with
3857 liveness analysis disabled). */
3858 tcg_debug_assert(NEED_SYNC_ARG(0));
3859 if (!ots
->mem_allocated
) {
3860 temp_allocate_frame(s
, ots
);
3862 tcg_out_st(s
, otype
, ts
->reg
, ots
->mem_base
->reg
, ots
->mem_offset
);
3863 if (IS_DEAD_ARG(1)) {
3868 if (IS_DEAD_ARG(1) && ts
->kind
!= TEMP_FIXED
) {
3869 /* the mov can be suppressed */
3870 if (ots
->val_type
== TEMP_VAL_REG
) {
3871 s
->reg_to_temp
[ots
->reg
] = NULL
;
3876 if (ots
->val_type
!= TEMP_VAL_REG
) {
3877 /* When allocating a new register, make sure to not spill the
3879 tcg_regset_set_reg(allocated_regs
, ts
->reg
);
3880 ots
->reg
= tcg_reg_alloc(s
, tcg_target_available_regs
[otype
],
3881 allocated_regs
, preferred_regs
,
3882 ots
->indirect_base
);
3884 if (!tcg_out_mov(s
, otype
, ots
->reg
, ts
->reg
)) {
3886 * Cross register class move not supported.
3887 * Store the source register into the destination slot
3888 * and leave the destination temp as TEMP_VAL_MEM.
3890 assert(!temp_readonly(ots
));
3891 if (!ts
->mem_allocated
) {
3892 temp_allocate_frame(s
, ots
);
3894 tcg_out_st(s
, ts
->type
, ts
->reg
,
3895 ots
->mem_base
->reg
, ots
->mem_offset
);
3896 ots
->mem_coherent
= 1;
3897 temp_free_or_dead(s
, ots
, -1);
3901 ots
->val_type
= TEMP_VAL_REG
;
3902 ots
->mem_coherent
= 0;
3903 s
->reg_to_temp
[ots
->reg
] = ots
;
3904 if (NEED_SYNC_ARG(0)) {
3905 temp_sync(s
, ots
, allocated_regs
, 0, 0);
3911 * Specialized code generation for INDEX_op_dup_vec.
3913 static void tcg_reg_alloc_dup(TCGContext
*s
, const TCGOp
*op
)
3915 const TCGLifeData arg_life
= op
->life
;
3916 TCGRegSet dup_out_regs
, dup_in_regs
;
3918 TCGType itype
, vtype
;
3919 intptr_t endian_fixup
;
3923 ots
= arg_temp(op
->args
[0]);
3924 its
= arg_temp(op
->args
[1]);
3926 /* ENV should not be modified. */
3927 tcg_debug_assert(!temp_readonly(ots
));
3930 vece
= TCGOP_VECE(op
);
3931 vtype
= TCGOP_VECL(op
) + TCG_TYPE_V64
;
3933 if (its
->val_type
== TEMP_VAL_CONST
) {
3934 /* Propagate constant via movi -> dupi. */
3935 tcg_target_ulong val
= its
->val
;
3936 if (IS_DEAD_ARG(1)) {
3939 tcg_reg_alloc_do_movi(s
, ots
, val
, arg_life
, op
->output_pref
[0]);
3943 dup_out_regs
= tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[0].regs
;
3944 dup_in_regs
= tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[1].regs
;
3946 /* Allocate the output register now. */
3947 if (ots
->val_type
!= TEMP_VAL_REG
) {
3948 TCGRegSet allocated_regs
= s
->reserved_regs
;
3950 if (!IS_DEAD_ARG(1) && its
->val_type
== TEMP_VAL_REG
) {
3951 /* Make sure to not spill the input register. */
3952 tcg_regset_set_reg(allocated_regs
, its
->reg
);
3954 ots
->reg
= tcg_reg_alloc(s
, dup_out_regs
, allocated_regs
,
3955 op
->output_pref
[0], ots
->indirect_base
);
3956 ots
->val_type
= TEMP_VAL_REG
;
3957 ots
->mem_coherent
= 0;
3958 s
->reg_to_temp
[ots
->reg
] = ots
;
3961 switch (its
->val_type
) {
3964 * The dup constriaints must be broad, covering all possible VECE.
3965 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3966 * to fail, indicating that extra moves are required for that case.
3968 if (tcg_regset_test_reg(dup_in_regs
, its
->reg
)) {
3969 if (tcg_out_dup_vec(s
, vtype
, vece
, ots
->reg
, its
->reg
)) {
3972 /* Try again from memory or a vector input register. */
3974 if (!its
->mem_coherent
) {
3976 * The input register is not synced, and so an extra store
3977 * would be required to use memory. Attempt an integer-vector
3978 * register move first. We do not have a TCGRegSet for this.
3980 if (tcg_out_mov(s
, itype
, ots
->reg
, its
->reg
)) {
3983 /* Sync the temp back to its slot and load from there. */
3984 temp_sync(s
, its
, s
->reserved_regs
, 0, 0);
3989 #ifdef HOST_WORDS_BIGENDIAN
3990 endian_fixup
= itype
== TCG_TYPE_I32
? 4 : 8;
3991 endian_fixup
-= 1 << vece
;
3995 if (tcg_out_dupm_vec(s
, vtype
, vece
, ots
->reg
, its
->mem_base
->reg
,
3996 its
->mem_offset
+ endian_fixup
)) {
3999 tcg_out_ld(s
, itype
, ots
->reg
, its
->mem_base
->reg
, its
->mem_offset
);
4003 g_assert_not_reached();
4006 /* We now have a vector input register, so dup must succeed. */
4007 ok
= tcg_out_dup_vec(s
, vtype
, vece
, ots
->reg
, ots
->reg
);
4008 tcg_debug_assert(ok
);
4011 if (IS_DEAD_ARG(1)) {
4014 if (NEED_SYNC_ARG(0)) {
4015 temp_sync(s
, ots
, s
->reserved_regs
, 0, 0);
4017 if (IS_DEAD_ARG(0)) {
4022 static void tcg_reg_alloc_op(TCGContext
*s
, const TCGOp
*op
)
4024 const TCGLifeData arg_life
= op
->life
;
4025 const TCGOpDef
* const def
= &tcg_op_defs
[op
->opc
];
4026 TCGRegSet i_allocated_regs
;
4027 TCGRegSet o_allocated_regs
;
4028 int i
, k
, nb_iargs
, nb_oargs
;
4031 const TCGArgConstraint
*arg_ct
;
4033 TCGArg new_args
[TCG_MAX_OP_ARGS
];
4034 int const_args
[TCG_MAX_OP_ARGS
];
4036 nb_oargs
= def
->nb_oargs
;
4037 nb_iargs
= def
->nb_iargs
;
4039 /* copy constants */
4040 memcpy(new_args
+ nb_oargs
+ nb_iargs
,
4041 op
->args
+ nb_oargs
+ nb_iargs
,
4042 sizeof(TCGArg
) * def
->nb_cargs
);
4044 i_allocated_regs
= s
->reserved_regs
;
4045 o_allocated_regs
= s
->reserved_regs
;
4047 /* satisfy input constraints */
4048 for (k
= 0; k
< nb_iargs
; k
++) {
4049 TCGRegSet i_preferred_regs
, o_preferred_regs
;
4051 i
= def
->args_ct
[nb_oargs
+ k
].sort_index
;
4053 arg_ct
= &def
->args_ct
[i
];
4056 if (ts
->val_type
== TEMP_VAL_CONST
4057 && tcg_target_const_match(ts
->val
, ts
->type
, arg_ct
)) {
4058 /* constant is OK for instruction */
4060 new_args
[i
] = ts
->val
;
4064 i_preferred_regs
= o_preferred_regs
= 0;
4065 if (arg_ct
->ialias
) {
4066 o_preferred_regs
= op
->output_pref
[arg_ct
->alias_index
];
4069 * If the input is readonly, then it cannot also be an
4070 * output and aliased to itself. If the input is not
4071 * dead after the instruction, we must allocate a new
4072 * register and move it.
4074 if (temp_readonly(ts
) || !IS_DEAD_ARG(i
)) {
4075 goto allocate_in_reg
;
4079 * Check if the current register has already been allocated
4080 * for another input aliased to an output.
4082 if (ts
->val_type
== TEMP_VAL_REG
) {
4084 for (int k2
= 0; k2
< k
; k2
++) {
4085 int i2
= def
->args_ct
[nb_oargs
+ k2
].sort_index
;
4086 if (def
->args_ct
[i2
].ialias
&& reg
== new_args
[i2
]) {
4087 goto allocate_in_reg
;
4091 i_preferred_regs
= o_preferred_regs
;
4094 temp_load(s
, ts
, arg_ct
->regs
, i_allocated_regs
, i_preferred_regs
);
4097 if (!tcg_regset_test_reg(arg_ct
->regs
, reg
)) {
4100 * Allocate a new register matching the constraint
4101 * and move the temporary register into it.
4103 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
4104 i_allocated_regs
, 0);
4105 reg
= tcg_reg_alloc(s
, arg_ct
->regs
, i_allocated_regs
,
4106 o_preferred_regs
, ts
->indirect_base
);
4107 if (!tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
)) {
4109 * Cross register class move not supported. Sync the
4110 * temp back to its slot and load from there.
4112 temp_sync(s
, ts
, i_allocated_regs
, 0, 0);
4113 tcg_out_ld(s
, ts
->type
, reg
,
4114 ts
->mem_base
->reg
, ts
->mem_offset
);
4119 tcg_regset_set_reg(i_allocated_regs
, reg
);
4122 /* mark dead temporaries and free the associated registers */
4123 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
4124 if (IS_DEAD_ARG(i
)) {
4125 temp_dead(s
, arg_temp(op
->args
[i
]));
4129 if (def
->flags
& TCG_OPF_COND_BRANCH
) {
4130 tcg_reg_alloc_cbranch(s
, i_allocated_regs
);
4131 } else if (def
->flags
& TCG_OPF_BB_END
) {
4132 tcg_reg_alloc_bb_end(s
, i_allocated_regs
);
4134 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
4135 /* XXX: permit generic clobber register list ? */
4136 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
4137 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
4138 tcg_reg_free(s
, i
, i_allocated_regs
);
4142 if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
4143 /* sync globals if the op has side effects and might trigger
4145 sync_globals(s
, i_allocated_regs
);
4148 /* satisfy the output constraints */
4149 for(k
= 0; k
< nb_oargs
; k
++) {
4150 i
= def
->args_ct
[k
].sort_index
;
4152 arg_ct
= &def
->args_ct
[i
];
4155 /* ENV should not be modified. */
4156 tcg_debug_assert(!temp_readonly(ts
));
4158 if (arg_ct
->oalias
&& !const_args
[arg_ct
->alias_index
]) {
4159 reg
= new_args
[arg_ct
->alias_index
];
4160 } else if (arg_ct
->newreg
) {
4161 reg
= tcg_reg_alloc(s
, arg_ct
->regs
,
4162 i_allocated_regs
| o_allocated_regs
,
4163 op
->output_pref
[k
], ts
->indirect_base
);
4165 reg
= tcg_reg_alloc(s
, arg_ct
->regs
, o_allocated_regs
,
4166 op
->output_pref
[k
], ts
->indirect_base
);
4168 tcg_regset_set_reg(o_allocated_regs
, reg
);
4169 if (ts
->val_type
== TEMP_VAL_REG
) {
4170 s
->reg_to_temp
[ts
->reg
] = NULL
;
4172 ts
->val_type
= TEMP_VAL_REG
;
4175 * Temp value is modified, so the value kept in memory is
4176 * potentially not the same.
4178 ts
->mem_coherent
= 0;
4179 s
->reg_to_temp
[reg
] = ts
;
4184 /* emit instruction */
4185 if (def
->flags
& TCG_OPF_VECTOR
) {
4186 tcg_out_vec_op(s
, op
->opc
, TCGOP_VECL(op
), TCGOP_VECE(op
),
4187 new_args
, const_args
);
4189 tcg_out_op(s
, op
->opc
, new_args
, const_args
);
4192 /* move the outputs in the correct register if needed */
4193 for(i
= 0; i
< nb_oargs
; i
++) {
4194 ts
= arg_temp(op
->args
[i
]);
4196 /* ENV should not be modified. */
4197 tcg_debug_assert(!temp_readonly(ts
));
4199 if (NEED_SYNC_ARG(i
)) {
4200 temp_sync(s
, ts
, o_allocated_regs
, 0, IS_DEAD_ARG(i
));
4201 } else if (IS_DEAD_ARG(i
)) {
4207 static bool tcg_reg_alloc_dup2(TCGContext
*s
, const TCGOp
*op
)
4209 const TCGLifeData arg_life
= op
->life
;
4210 TCGTemp
*ots
, *itsl
, *itsh
;
4211 TCGType vtype
= TCGOP_VECL(op
) + TCG_TYPE_V64
;
4213 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4214 tcg_debug_assert(TCG_TARGET_REG_BITS
== 32);
4215 tcg_debug_assert(TCGOP_VECE(op
) == MO_64
);
4217 ots
= arg_temp(op
->args
[0]);
4218 itsl
= arg_temp(op
->args
[1]);
4219 itsh
= arg_temp(op
->args
[2]);
4221 /* ENV should not be modified. */
4222 tcg_debug_assert(!temp_readonly(ots
));
4224 /* Allocate the output register now. */
4225 if (ots
->val_type
!= TEMP_VAL_REG
) {
4226 TCGRegSet allocated_regs
= s
->reserved_regs
;
4227 TCGRegSet dup_out_regs
=
4228 tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[0].regs
;
4230 /* Make sure to not spill the input registers. */
4231 if (!IS_DEAD_ARG(1) && itsl
->val_type
== TEMP_VAL_REG
) {
4232 tcg_regset_set_reg(allocated_regs
, itsl
->reg
);
4234 if (!IS_DEAD_ARG(2) && itsh
->val_type
== TEMP_VAL_REG
) {
4235 tcg_regset_set_reg(allocated_regs
, itsh
->reg
);
4238 ots
->reg
= tcg_reg_alloc(s
, dup_out_regs
, allocated_regs
,
4239 op
->output_pref
[0], ots
->indirect_base
);
4240 ots
->val_type
= TEMP_VAL_REG
;
4241 ots
->mem_coherent
= 0;
4242 s
->reg_to_temp
[ots
->reg
] = ots
;
4245 /* Promote dup2 of immediates to dupi_vec. */
4246 if (itsl
->val_type
== TEMP_VAL_CONST
&& itsh
->val_type
== TEMP_VAL_CONST
) {
4247 uint64_t val
= deposit64(itsl
->val
, 32, 32, itsh
->val
);
4250 if (val
== dup_const(MO_8
, val
)) {
4252 } else if (val
== dup_const(MO_16
, val
)) {
4254 } else if (val
== dup_const(MO_32
, val
)) {
4258 tcg_out_dupi_vec(s
, vtype
, vece
, ots
->reg
, val
);
4262 /* If the two inputs form one 64-bit value, try dupm_vec. */
4263 if (itsl
+ 1 == itsh
&& itsl
->base_type
== TCG_TYPE_I64
) {
4264 if (!itsl
->mem_coherent
) {
4265 temp_sync(s
, itsl
, s
->reserved_regs
, 0, 0);
4267 if (!itsh
->mem_coherent
) {
4268 temp_sync(s
, itsh
, s
->reserved_regs
, 0, 0);
4270 #ifdef HOST_WORDS_BIGENDIAN
4271 TCGTemp
*its
= itsh
;
4273 TCGTemp
*its
= itsl
;
4275 if (tcg_out_dupm_vec(s
, vtype
, MO_64
, ots
->reg
,
4276 its
->mem_base
->reg
, its
->mem_offset
)) {
4281 /* Fall back to generic expansion. */
4285 if (IS_DEAD_ARG(1)) {
4288 if (IS_DEAD_ARG(2)) {
4291 if (NEED_SYNC_ARG(0)) {
4292 temp_sync(s
, ots
, s
->reserved_regs
, 0, IS_DEAD_ARG(0));
4293 } else if (IS_DEAD_ARG(0)) {
4299 #ifdef TCG_TARGET_STACK_GROWSUP
4300 #define STACK_DIR(x) (-(x))
4302 #define STACK_DIR(x) (x)
4305 static void tcg_reg_alloc_call(TCGContext
*s
, TCGOp
*op
)
4307 const int nb_oargs
= TCGOP_CALLO(op
);
4308 const int nb_iargs
= TCGOP_CALLI(op
);
4309 const TCGLifeData arg_life
= op
->life
;
4310 int flags
, nb_regs
, i
;
4314 intptr_t stack_offset
;
4315 size_t call_stack_size
;
4316 tcg_insn_unit
*func_addr
;
4318 TCGRegSet allocated_regs
;
4320 func_addr
= (tcg_insn_unit
*)(intptr_t)op
->args
[nb_oargs
+ nb_iargs
];
4321 flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
4323 nb_regs
= ARRAY_SIZE(tcg_target_call_iarg_regs
);
4324 if (nb_regs
> nb_iargs
) {
4328 /* assign stack slots first */
4329 call_stack_size
= (nb_iargs
- nb_regs
) * sizeof(tcg_target_long
);
4330 call_stack_size
= (call_stack_size
+ TCG_TARGET_STACK_ALIGN
- 1) &
4331 ~(TCG_TARGET_STACK_ALIGN
- 1);
4332 allocate_args
= (call_stack_size
> TCG_STATIC_CALL_ARGS_SIZE
);
4333 if (allocate_args
) {
4334 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4335 preallocate call stack */
4339 stack_offset
= TCG_TARGET_CALL_STACK_OFFSET
;
4340 for (i
= nb_regs
; i
< nb_iargs
; i
++) {
4341 arg
= op
->args
[nb_oargs
+ i
];
4342 #ifdef TCG_TARGET_STACK_GROWSUP
4343 stack_offset
-= sizeof(tcg_target_long
);
4345 if (arg
!= TCG_CALL_DUMMY_ARG
) {
4347 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
4348 s
->reserved_regs
, 0);
4349 tcg_out_st(s
, ts
->type
, ts
->reg
, TCG_REG_CALL_STACK
, stack_offset
);
4351 #ifndef TCG_TARGET_STACK_GROWSUP
4352 stack_offset
+= sizeof(tcg_target_long
);
4356 /* assign input registers */
4357 allocated_regs
= s
->reserved_regs
;
4358 for (i
= 0; i
< nb_regs
; i
++) {
4359 arg
= op
->args
[nb_oargs
+ i
];
4360 if (arg
!= TCG_CALL_DUMMY_ARG
) {
4362 reg
= tcg_target_call_iarg_regs
[i
];
4364 if (ts
->val_type
== TEMP_VAL_REG
) {
4365 if (ts
->reg
!= reg
) {
4366 tcg_reg_free(s
, reg
, allocated_regs
);
4367 if (!tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
)) {
4369 * Cross register class move not supported. Sync the
4370 * temp back to its slot and load from there.
4372 temp_sync(s
, ts
, allocated_regs
, 0, 0);
4373 tcg_out_ld(s
, ts
->type
, reg
,
4374 ts
->mem_base
->reg
, ts
->mem_offset
);
4378 TCGRegSet arg_set
= 0;
4380 tcg_reg_free(s
, reg
, allocated_regs
);
4381 tcg_regset_set_reg(arg_set
, reg
);
4382 temp_load(s
, ts
, arg_set
, allocated_regs
, 0);
4385 tcg_regset_set_reg(allocated_regs
, reg
);
4389 /* mark dead temporaries and free the associated registers */
4390 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
4391 if (IS_DEAD_ARG(i
)) {
4392 temp_dead(s
, arg_temp(op
->args
[i
]));
4396 /* clobber call registers */
4397 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
4398 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
4399 tcg_reg_free(s
, i
, allocated_regs
);
4403 /* Save globals if they might be written by the helper, sync them if
4404 they might be read. */
4405 if (flags
& TCG_CALL_NO_READ_GLOBALS
) {
4407 } else if (flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
4408 sync_globals(s
, allocated_regs
);
4410 save_globals(s
, allocated_regs
);
4413 tcg_out_call(s
, func_addr
);
4415 /* assign output registers and emit moves if needed */
4416 for(i
= 0; i
< nb_oargs
; i
++) {
4420 /* ENV should not be modified. */
4421 tcg_debug_assert(!temp_readonly(ts
));
4423 reg
= tcg_target_call_oarg_regs
[i
];
4424 tcg_debug_assert(s
->reg_to_temp
[reg
] == NULL
);
4425 if (ts
->val_type
== TEMP_VAL_REG
) {
4426 s
->reg_to_temp
[ts
->reg
] = NULL
;
4428 ts
->val_type
= TEMP_VAL_REG
;
4430 ts
->mem_coherent
= 0;
4431 s
->reg_to_temp
[reg
] = ts
;
4432 if (NEED_SYNC_ARG(i
)) {
4433 temp_sync(s
, ts
, allocated_regs
, 0, IS_DEAD_ARG(i
));
4434 } else if (IS_DEAD_ARG(i
)) {
4440 #ifdef CONFIG_PROFILER
4442 /* avoid copy/paste errors */
4443 #define PROF_ADD(to, from, field) \
4445 (to)->field += qatomic_read(&((from)->field)); \
4448 #define PROF_MAX(to, from, field) \
4450 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4451 if (val__ > (to)->field) { \
4452 (to)->field = val__; \
4456 /* Pass in a zero'ed @prof */
4458 void tcg_profile_snapshot(TCGProfile
*prof
, bool counters
, bool table
)
4460 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
4463 for (i
= 0; i
< n_ctxs
; i
++) {
4464 TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
4465 const TCGProfile
*orig
= &s
->prof
;
4468 PROF_ADD(prof
, orig
, cpu_exec_time
);
4469 PROF_ADD(prof
, orig
, tb_count1
);
4470 PROF_ADD(prof
, orig
, tb_count
);
4471 PROF_ADD(prof
, orig
, op_count
);
4472 PROF_MAX(prof
, orig
, op_count_max
);
4473 PROF_ADD(prof
, orig
, temp_count
);
4474 PROF_MAX(prof
, orig
, temp_count_max
);
4475 PROF_ADD(prof
, orig
, del_op_count
);
4476 PROF_ADD(prof
, orig
, code_in_len
);
4477 PROF_ADD(prof
, orig
, code_out_len
);
4478 PROF_ADD(prof
, orig
, search_out_len
);
4479 PROF_ADD(prof
, orig
, interm_time
);
4480 PROF_ADD(prof
, orig
, code_time
);
4481 PROF_ADD(prof
, orig
, la_time
);
4482 PROF_ADD(prof
, orig
, opt_time
);
4483 PROF_ADD(prof
, orig
, restore_count
);
4484 PROF_ADD(prof
, orig
, restore_time
);
4489 for (i
= 0; i
< NB_OPS
; i
++) {
4490 PROF_ADD(prof
, orig
, table_op_count
[i
]);
4499 static void tcg_profile_snapshot_counters(TCGProfile
*prof
)
4501 tcg_profile_snapshot(prof
, true, false);
4504 static void tcg_profile_snapshot_table(TCGProfile
*prof
)
4506 tcg_profile_snapshot(prof
, false, true);
4509 void tcg_dump_op_count(void)
4511 TCGProfile prof
= {};
4514 tcg_profile_snapshot_table(&prof
);
4515 for (i
= 0; i
< NB_OPS
; i
++) {
4516 qemu_printf("%s %" PRId64
"\n", tcg_op_defs
[i
].name
,
4517 prof
.table_op_count
[i
]);
4521 int64_t tcg_cpu_exec_time(void)
4523 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
4527 for (i
= 0; i
< n_ctxs
; i
++) {
4528 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
4529 const TCGProfile
*prof
= &s
->prof
;
4531 ret
+= qatomic_read(&prof
->cpu_exec_time
);
4536 void tcg_dump_op_count(void)
4538 qemu_printf("[TCG profiler not compiled]\n");
4541 int64_t tcg_cpu_exec_time(void)
4543 error_report("%s: TCG profiler not compiled", __func__
);
4549 int tcg_gen_code(TCGContext
*s
, TranslationBlock
*tb
)
4551 #ifdef CONFIG_PROFILER
4552 TCGProfile
*prof
= &s
->prof
;
4557 #ifdef CONFIG_PROFILER
4561 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
4564 qatomic_set(&prof
->op_count
, prof
->op_count
+ n
);
4565 if (n
> prof
->op_count_max
) {
4566 qatomic_set(&prof
->op_count_max
, n
);
4570 qatomic_set(&prof
->temp_count
, prof
->temp_count
+ n
);
4571 if (n
> prof
->temp_count_max
) {
4572 qatomic_set(&prof
->temp_count_max
, n
);
4578 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP
)
4579 && qemu_log_in_addr_range(tb
->pc
))) {
4580 FILE *logfile
= qemu_log_lock();
4582 tcg_dump_ops(s
, false);
4584 qemu_log_unlock(logfile
);
4588 #ifdef CONFIG_DEBUG_TCG
4589 /* Ensure all labels referenced have been emitted. */
4594 QSIMPLEQ_FOREACH(l
, &s
->labels
, next
) {
4595 if (unlikely(!l
->present
) && l
->refs
) {
4596 qemu_log_mask(CPU_LOG_TB_OP
,
4597 "$L%d referenced but not present.\n", l
->id
);
4605 #ifdef CONFIG_PROFILER
4606 qatomic_set(&prof
->opt_time
, prof
->opt_time
- profile_getclock());
4609 #ifdef USE_TCG_OPTIMIZATIONS
4613 #ifdef CONFIG_PROFILER
4614 qatomic_set(&prof
->opt_time
, prof
->opt_time
+ profile_getclock());
4615 qatomic_set(&prof
->la_time
, prof
->la_time
- profile_getclock());
4618 reachable_code_pass(s
);
4621 if (s
->nb_indirects
> 0) {
4623 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND
)
4624 && qemu_log_in_addr_range(tb
->pc
))) {
4625 FILE *logfile
= qemu_log_lock();
4626 qemu_log("OP before indirect lowering:\n");
4627 tcg_dump_ops(s
, false);
4629 qemu_log_unlock(logfile
);
4632 /* Replace indirect temps with direct temps. */
4633 if (liveness_pass_2(s
)) {
4634 /* If changes were made, re-run liveness. */
4639 #ifdef CONFIG_PROFILER
4640 qatomic_set(&prof
->la_time
, prof
->la_time
+ profile_getclock());
4644 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT
)
4645 && qemu_log_in_addr_range(tb
->pc
))) {
4646 FILE *logfile
= qemu_log_lock();
4647 qemu_log("OP after optimization and liveness analysis:\n");
4648 tcg_dump_ops(s
, true);
4650 qemu_log_unlock(logfile
);
4654 tcg_reg_alloc_start(s
);
4657 * Reset the buffer pointers when restarting after overflow.
4658 * TODO: Move this into translate-all.c with the rest of the
4659 * buffer management. Having only this done here is confusing.
4661 s
->code_buf
= tcg_splitwx_to_rw(tb
->tc
.ptr
);
4662 s
->code_ptr
= s
->code_buf
;
4664 #ifdef TCG_TARGET_NEED_LDST_LABELS
4665 QSIMPLEQ_INIT(&s
->ldst_labels
);
4667 #ifdef TCG_TARGET_NEED_POOL_LABELS
4668 s
->pool_labels
= NULL
;
4672 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
4673 TCGOpcode opc
= op
->opc
;
4675 #ifdef CONFIG_PROFILER
4676 qatomic_set(&prof
->table_op_count
[opc
], prof
->table_op_count
[opc
] + 1);
4680 case INDEX_op_mov_i32
:
4681 case INDEX_op_mov_i64
:
4682 case INDEX_op_mov_vec
:
4683 tcg_reg_alloc_mov(s
, op
);
4685 case INDEX_op_dup_vec
:
4686 tcg_reg_alloc_dup(s
, op
);
4688 case INDEX_op_insn_start
:
4689 if (num_insns
>= 0) {
4690 size_t off
= tcg_current_code_size(s
);
4691 s
->gen_insn_end_off
[num_insns
] = off
;
4692 /* Assert that we do not overflow our stored offset. */
4693 assert(s
->gen_insn_end_off
[num_insns
] == off
);
4696 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
4698 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4699 a
= deposit64(op
->args
[i
* 2], 32, 32, op
->args
[i
* 2 + 1]);
4703 s
->gen_insn_data
[num_insns
][i
] = a
;
4706 case INDEX_op_discard
:
4707 temp_dead(s
, arg_temp(op
->args
[0]));
4709 case INDEX_op_set_label
:
4710 tcg_reg_alloc_bb_end(s
, s
->reserved_regs
);
4711 tcg_out_label(s
, arg_label(op
->args
[0]));
4714 tcg_reg_alloc_call(s
, op
);
4716 case INDEX_op_dup2_vec
:
4717 if (tcg_reg_alloc_dup2(s
, op
)) {
4722 /* Sanity check that we've not introduced any unhandled opcodes. */
4723 tcg_debug_assert(tcg_op_supported(opc
));
4724 /* Note: in order to speed up the code, it would be much
4725 faster to have specialized register allocator functions for
4726 some common argument patterns */
4727 tcg_reg_alloc_op(s
, op
);
4730 #ifdef CONFIG_DEBUG_TCG
4733 /* Test for (pending) buffer overflow. The assumption is that any
4734 one operation beginning below the high water mark cannot overrun
4735 the buffer completely. Thus we can test for overflow after
4736 generating code without having to check during generation. */
4737 if (unlikely((void *)s
->code_ptr
> s
->code_gen_highwater
)) {
4740 /* Test for TB overflow, as seen by gen_insn_end_off. */
4741 if (unlikely(tcg_current_code_size(s
) > UINT16_MAX
)) {
4745 tcg_debug_assert(num_insns
>= 0);
4746 s
->gen_insn_end_off
[num_insns
] = tcg_current_code_size(s
);
4748 /* Generate TB finalization at the end of block */
4749 #ifdef TCG_TARGET_NEED_LDST_LABELS
4750 i
= tcg_out_ldst_finalize(s
);
4755 #ifdef TCG_TARGET_NEED_POOL_LABELS
4756 i
= tcg_out_pool_finalize(s
);
4761 if (!tcg_resolve_relocs(s
)) {
4765 #ifndef CONFIG_TCG_INTERPRETER
4766 /* flush instruction cache */
4767 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s
->code_buf
),
4768 (uintptr_t)s
->code_buf
,
4769 tcg_ptr_byte_diff(s
->code_ptr
, s
->code_buf
));
4772 return tcg_current_code_size(s
);
4775 #ifdef CONFIG_PROFILER
4776 void tcg_dump_info(void)
4778 TCGProfile prof
= {};
4779 const TCGProfile
*s
;
4781 int64_t tb_div_count
;
4784 tcg_profile_snapshot_counters(&prof
);
4786 tb_count
= s
->tb_count
;
4787 tb_div_count
= tb_count
? tb_count
: 1;
4788 tot
= s
->interm_time
+ s
->code_time
;
4790 qemu_printf("JIT cycles %" PRId64
" (%0.3f s at 2.4 GHz)\n",
4792 qemu_printf("translated TBs %" PRId64
" (aborted=%" PRId64
4794 tb_count
, s
->tb_count1
- tb_count
,
4795 (double)(s
->tb_count1
- s
->tb_count
)
4796 / (s
->tb_count1
? s
->tb_count1
: 1) * 100.0);
4797 qemu_printf("avg ops/TB %0.1f max=%d\n",
4798 (double)s
->op_count
/ tb_div_count
, s
->op_count_max
);
4799 qemu_printf("deleted ops/TB %0.2f\n",
4800 (double)s
->del_op_count
/ tb_div_count
);
4801 qemu_printf("avg temps/TB %0.2f max=%d\n",
4802 (double)s
->temp_count
/ tb_div_count
, s
->temp_count_max
);
4803 qemu_printf("avg host code/TB %0.1f\n",
4804 (double)s
->code_out_len
/ tb_div_count
);
4805 qemu_printf("avg search data/TB %0.1f\n",
4806 (double)s
->search_out_len
/ tb_div_count
);
4808 qemu_printf("cycles/op %0.1f\n",
4809 s
->op_count
? (double)tot
/ s
->op_count
: 0);
4810 qemu_printf("cycles/in byte %0.1f\n",
4811 s
->code_in_len
? (double)tot
/ s
->code_in_len
: 0);
4812 qemu_printf("cycles/out byte %0.1f\n",
4813 s
->code_out_len
? (double)tot
/ s
->code_out_len
: 0);
4814 qemu_printf("cycles/search byte %0.1f\n",
4815 s
->search_out_len
? (double)tot
/ s
->search_out_len
: 0);
4819 qemu_printf(" gen_interm time %0.1f%%\n",
4820 (double)s
->interm_time
/ tot
* 100.0);
4821 qemu_printf(" gen_code time %0.1f%%\n",
4822 (double)s
->code_time
/ tot
* 100.0);
4823 qemu_printf("optim./code time %0.1f%%\n",
4824 (double)s
->opt_time
/ (s
->code_time
? s
->code_time
: 1)
4826 qemu_printf("liveness/code time %0.1f%%\n",
4827 (double)s
->la_time
/ (s
->code_time
? s
->code_time
: 1) * 100.0);
4828 qemu_printf("cpu_restore count %" PRId64
"\n",
4830 qemu_printf(" avg cycles %0.1f\n",
4831 s
->restore_count
? (double)s
->restore_time
/ s
->restore_count
: 0);
4834 void tcg_dump_info(void)
4836 qemu_printf("[TCG profiler not compiled]\n");
4840 #ifdef ELF_HOST_MACHINE
4841 /* In order to use this feature, the backend needs to do three things:
4843 (1) Define ELF_HOST_MACHINE to indicate both what value to
4844 put into the ELF image and to indicate support for the feature.
4846 (2) Define tcg_register_jit. This should create a buffer containing
4847 the contents of a .debug_frame section that describes the post-
4848 prologue unwind info for the tcg machine.
4850 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4853 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4860 struct jit_code_entry
{
4861 struct jit_code_entry
*next_entry
;
4862 struct jit_code_entry
*prev_entry
;
4863 const void *symfile_addr
;
4864 uint64_t symfile_size
;
4867 struct jit_descriptor
{
4869 uint32_t action_flag
;
4870 struct jit_code_entry
*relevant_entry
;
4871 struct jit_code_entry
*first_entry
;
4874 void __jit_debug_register_code(void) __attribute__((noinline
));
4875 void __jit_debug_register_code(void)
4880 /* Must statically initialize the version, because GDB may check
4881 the version before we can set it. */
4882 struct jit_descriptor __jit_debug_descriptor
= { 1, 0, 0, 0 };
4884 /* End GDB interface. */
4886 static int find_string(const char *strtab
, const char *str
)
4888 const char *p
= strtab
+ 1;
4891 if (strcmp(p
, str
) == 0) {
4898 static void tcg_register_jit_int(const void *buf_ptr
, size_t buf_size
,
4899 const void *debug_frame
,
4900 size_t debug_frame_size
)
4902 struct __attribute__((packed
)) DebugInfo
{
4909 uintptr_t cu_low_pc
;
4910 uintptr_t cu_high_pc
;
4913 uintptr_t fn_low_pc
;
4914 uintptr_t fn_high_pc
;
4923 struct DebugInfo di
;
4928 struct ElfImage
*img
;
4930 static const struct ElfImage img_template
= {
4932 .e_ident
[EI_MAG0
] = ELFMAG0
,
4933 .e_ident
[EI_MAG1
] = ELFMAG1
,
4934 .e_ident
[EI_MAG2
] = ELFMAG2
,
4935 .e_ident
[EI_MAG3
] = ELFMAG3
,
4936 .e_ident
[EI_CLASS
] = ELF_CLASS
,
4937 .e_ident
[EI_DATA
] = ELF_DATA
,
4938 .e_ident
[EI_VERSION
] = EV_CURRENT
,
4940 .e_machine
= ELF_HOST_MACHINE
,
4941 .e_version
= EV_CURRENT
,
4942 .e_phoff
= offsetof(struct ElfImage
, phdr
),
4943 .e_shoff
= offsetof(struct ElfImage
, shdr
),
4944 .e_ehsize
= sizeof(ElfW(Shdr
)),
4945 .e_phentsize
= sizeof(ElfW(Phdr
)),
4947 .e_shentsize
= sizeof(ElfW(Shdr
)),
4948 .e_shnum
= ARRAY_SIZE(img
->shdr
),
4949 .e_shstrndx
= ARRAY_SIZE(img
->shdr
) - 1,
4950 #ifdef ELF_HOST_FLAGS
4951 .e_flags
= ELF_HOST_FLAGS
,
4954 .e_ident
[EI_OSABI
] = ELF_OSABI
,
4962 [0] = { .sh_type
= SHT_NULL
},
4963 /* Trick: The contents of code_gen_buffer are not present in
4964 this fake ELF file; that got allocated elsewhere. Therefore
4965 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4966 will not look for contents. We can record any address. */
4968 .sh_type
= SHT_NOBITS
,
4969 .sh_flags
= SHF_EXECINSTR
| SHF_ALLOC
,
4971 [2] = { /* .debug_info */
4972 .sh_type
= SHT_PROGBITS
,
4973 .sh_offset
= offsetof(struct ElfImage
, di
),
4974 .sh_size
= sizeof(struct DebugInfo
),
4976 [3] = { /* .debug_abbrev */
4977 .sh_type
= SHT_PROGBITS
,
4978 .sh_offset
= offsetof(struct ElfImage
, da
),
4979 .sh_size
= sizeof(img
->da
),
4981 [4] = { /* .debug_frame */
4982 .sh_type
= SHT_PROGBITS
,
4983 .sh_offset
= sizeof(struct ElfImage
),
4985 [5] = { /* .symtab */
4986 .sh_type
= SHT_SYMTAB
,
4987 .sh_offset
= offsetof(struct ElfImage
, sym
),
4988 .sh_size
= sizeof(img
->sym
),
4990 .sh_link
= ARRAY_SIZE(img
->shdr
) - 1,
4991 .sh_entsize
= sizeof(ElfW(Sym
)),
4993 [6] = { /* .strtab */
4994 .sh_type
= SHT_STRTAB
,
4995 .sh_offset
= offsetof(struct ElfImage
, str
),
4996 .sh_size
= sizeof(img
->str
),
5000 [1] = { /* code_gen_buffer */
5001 .st_info
= ELF_ST_INFO(STB_GLOBAL
, STT_FUNC
),
5006 .len
= sizeof(struct DebugInfo
) - 4,
5008 .ptr_size
= sizeof(void *),
5010 .cu_lang
= 0x8001, /* DW_LANG_Mips_Assembler */
5012 .fn_name
= "code_gen_buffer"
5015 1, /* abbrev number (the cu) */
5016 0x11, 1, /* DW_TAG_compile_unit, has children */
5017 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
5018 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5019 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5020 0, 0, /* end of abbrev */
5021 2, /* abbrev number (the fn) */
5022 0x2e, 0, /* DW_TAG_subprogram, no children */
5023 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
5024 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5025 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5026 0, 0, /* end of abbrev */
5027 0 /* no more abbrev */
5029 .str
= "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5030 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5033 /* We only need a single jit entry; statically allocate it. */
5034 static struct jit_code_entry one_entry
;
5036 uintptr_t buf
= (uintptr_t)buf_ptr
;
5037 size_t img_size
= sizeof(struct ElfImage
) + debug_frame_size
;
5038 DebugFrameHeader
*dfh
;
5040 img
= g_malloc(img_size
);
5041 *img
= img_template
;
5043 img
->phdr
.p_vaddr
= buf
;
5044 img
->phdr
.p_paddr
= buf
;
5045 img
->phdr
.p_memsz
= buf_size
;
5047 img
->shdr
[1].sh_name
= find_string(img
->str
, ".text");
5048 img
->shdr
[1].sh_addr
= buf
;
5049 img
->shdr
[1].sh_size
= buf_size
;
5051 img
->shdr
[2].sh_name
= find_string(img
->str
, ".debug_info");
5052 img
->shdr
[3].sh_name
= find_string(img
->str
, ".debug_abbrev");
5054 img
->shdr
[4].sh_name
= find_string(img
->str
, ".debug_frame");
5055 img
->shdr
[4].sh_size
= debug_frame_size
;
5057 img
->shdr
[5].sh_name
= find_string(img
->str
, ".symtab");
5058 img
->shdr
[6].sh_name
= find_string(img
->str
, ".strtab");
5060 img
->sym
[1].st_name
= find_string(img
->str
, "code_gen_buffer");
5061 img
->sym
[1].st_value
= buf
;
5062 img
->sym
[1].st_size
= buf_size
;
5064 img
->di
.cu_low_pc
= buf
;
5065 img
->di
.cu_high_pc
= buf
+ buf_size
;
5066 img
->di
.fn_low_pc
= buf
;
5067 img
->di
.fn_high_pc
= buf
+ buf_size
;
5069 dfh
= (DebugFrameHeader
*)(img
+ 1);
5070 memcpy(dfh
, debug_frame
, debug_frame_size
);
5071 dfh
->fde
.func_start
= buf
;
5072 dfh
->fde
.func_len
= buf_size
;
5075 /* Enable this block to be able to debug the ELF image file creation.
5076 One can use readelf, objdump, or other inspection utilities. */
5078 FILE *f
= fopen("/tmp/qemu.jit", "w+b");
5080 if (fwrite(img
, img_size
, 1, f
) != img_size
) {
5081 /* Avoid stupid unused return value warning for fwrite. */
5088 one_entry
.symfile_addr
= img
;
5089 one_entry
.symfile_size
= img_size
;
5091 __jit_debug_descriptor
.action_flag
= JIT_REGISTER_FN
;
5092 __jit_debug_descriptor
.relevant_entry
= &one_entry
;
5093 __jit_debug_descriptor
.first_entry
= &one_entry
;
5094 __jit_debug_register_code();
5097 /* No support for the feature. Provide the entry point expected by exec.c,
5098 and implement the internal function we declared earlier. */
5100 static void tcg_register_jit_int(const void *buf
, size_t size
,
5101 const void *debug_frame
,
5102 size_t debug_frame_size
)
5106 void tcg_register_jit(const void *buf
, size_t buf_size
)
5109 #endif /* ELF_HOST_MACHINE */
5111 #if !TCG_TARGET_MAYBE_vec
5112 void tcg_expand_vec_op(TCGOpcode o
, TCGType t
, unsigned e
, TCGArg a0
, ...)
5114 g_assert_not_reached();