2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_LIVENESS_ANALYSIS
27 #define USE_TCG_OPTIMIZATIONS
29 #include "qemu/osdep.h"
31 /* Define to jump the ELF file used to communicate with GDB. */
34 #if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
35 /* define it to suppress various consistency checks (faster) */
39 #include "qemu/cutils.h"
40 #include "qemu/host-utils.h"
41 #include "qemu/timer.h"
43 /* Note: the long term plan is to reduce the dependencies on the QEMU
44 CPU definitions. Currently they are used for qemu_ld/st
46 #define NO_CPU_IO_DEFS
51 #if UINTPTR_MAX == UINT32_MAX
52 # define ELF_CLASS ELFCLASS32
54 # define ELF_CLASS ELFCLASS64
56 #ifdef HOST_WORDS_BIGENDIAN
57 # define ELF_DATA ELFDATA2MSB
59 # define ELF_DATA ELFDATA2LSB
65 /* Forward declarations for functions declared in tcg-target.inc.c and
67 static void tcg_target_init(TCGContext
*s
);
68 static void tcg_target_qemu_prologue(TCGContext
*s
);
69 static void patch_reloc(tcg_insn_unit
*code_ptr
, int type
,
70 intptr_t value
, intptr_t addend
);
72 /* The CIE and FDE header definitions will be common to all hosts. */
74 uint32_t len
__attribute__((aligned((sizeof(void *)))));
80 uint8_t return_column
;
83 typedef struct QEMU_PACKED
{
84 uint32_t len
__attribute__((aligned((sizeof(void *)))));
88 } DebugFrameFDEHeader
;
90 typedef struct QEMU_PACKED
{
92 DebugFrameFDEHeader fde
;
95 static void tcg_register_jit_int(void *buf
, size_t size
,
96 const void *debug_frame
,
97 size_t debug_frame_size
)
98 __attribute__((unused
));
100 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
101 static int target_parse_constraint(TCGArgConstraint
*ct
, const char **pct_str
);
102 static void tcg_out_ld(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg1
,
104 static void tcg_out_mov(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg
);
105 static void tcg_out_movi(TCGContext
*s
, TCGType type
,
106 TCGReg ret
, tcg_target_long arg
);
107 static void tcg_out_op(TCGContext
*s
, TCGOpcode opc
, const TCGArg
*args
,
108 const int *const_args
);
109 static void tcg_out_st(TCGContext
*s
, TCGType type
, TCGReg arg
, TCGReg arg1
,
111 static void tcg_out_call(TCGContext
*s
, tcg_insn_unit
*target
);
112 static int tcg_target_const_match(tcg_target_long val
, TCGType type
,
113 const TCGArgConstraint
*arg_ct
);
114 static void tcg_out_tb_init(TCGContext
*s
);
115 static bool tcg_out_tb_finalize(TCGContext
*s
);
119 static TCGRegSet tcg_target_available_regs
[2];
120 static TCGRegSet tcg_target_call_clobber_regs
;
122 #if TCG_TARGET_INSN_UNIT_SIZE == 1
123 static __attribute__((unused
)) inline void tcg_out8(TCGContext
*s
, uint8_t v
)
128 static __attribute__((unused
)) inline void tcg_patch8(tcg_insn_unit
*p
,
135 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
136 static __attribute__((unused
)) inline void tcg_out16(TCGContext
*s
, uint16_t v
)
138 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
141 tcg_insn_unit
*p
= s
->code_ptr
;
142 memcpy(p
, &v
, sizeof(v
));
143 s
->code_ptr
= p
+ (2 / TCG_TARGET_INSN_UNIT_SIZE
);
147 static __attribute__((unused
)) inline void tcg_patch16(tcg_insn_unit
*p
,
150 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
153 memcpy(p
, &v
, sizeof(v
));
158 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
159 static __attribute__((unused
)) inline void tcg_out32(TCGContext
*s
, uint32_t v
)
161 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
164 tcg_insn_unit
*p
= s
->code_ptr
;
165 memcpy(p
, &v
, sizeof(v
));
166 s
->code_ptr
= p
+ (4 / TCG_TARGET_INSN_UNIT_SIZE
);
170 static __attribute__((unused
)) inline void tcg_patch32(tcg_insn_unit
*p
,
173 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
176 memcpy(p
, &v
, sizeof(v
));
181 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
182 static __attribute__((unused
)) inline void tcg_out64(TCGContext
*s
, uint64_t v
)
184 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
187 tcg_insn_unit
*p
= s
->code_ptr
;
188 memcpy(p
, &v
, sizeof(v
));
189 s
->code_ptr
= p
+ (8 / TCG_TARGET_INSN_UNIT_SIZE
);
193 static __attribute__((unused
)) inline void tcg_patch64(tcg_insn_unit
*p
,
196 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
199 memcpy(p
, &v
, sizeof(v
));
204 /* label relocation processing */
206 static void tcg_out_reloc(TCGContext
*s
, tcg_insn_unit
*code_ptr
, int type
,
207 TCGLabel
*l
, intptr_t addend
)
212 /* FIXME: This may break relocations on RISC targets that
213 modify instruction fields in place. The caller may not have
214 written the initial value. */
215 patch_reloc(code_ptr
, type
, l
->u
.value
, addend
);
217 /* add a new relocation entry */
218 r
= tcg_malloc(sizeof(TCGRelocation
));
222 r
->next
= l
->u
.first_reloc
;
223 l
->u
.first_reloc
= r
;
227 static void tcg_out_label(TCGContext
*s
, TCGLabel
*l
, tcg_insn_unit
*ptr
)
229 intptr_t value
= (intptr_t)ptr
;
232 assert(!l
->has_value
);
234 for (r
= l
->u
.first_reloc
; r
!= NULL
; r
= r
->next
) {
235 patch_reloc(r
->ptr
, r
->type
, value
, r
->addend
);
239 l
->u
.value_ptr
= ptr
;
242 TCGLabel
*gen_new_label(void)
244 TCGContext
*s
= &tcg_ctx
;
245 TCGLabel
*l
= tcg_malloc(sizeof(TCGLabel
));
254 #include "tcg-target.inc.c"
256 /* pool based memory allocation */
257 void *tcg_malloc_internal(TCGContext
*s
, int size
)
262 if (size
> TCG_POOL_CHUNK_SIZE
) {
263 /* big malloc: insert a new pool (XXX: could optimize) */
264 p
= g_malloc(sizeof(TCGPool
) + size
);
266 p
->next
= s
->pool_first_large
;
267 s
->pool_first_large
= p
;
278 pool_size
= TCG_POOL_CHUNK_SIZE
;
279 p
= g_malloc(sizeof(TCGPool
) + pool_size
);
283 s
->pool_current
->next
= p
;
292 s
->pool_cur
= p
->data
+ size
;
293 s
->pool_end
= p
->data
+ p
->size
;
297 void tcg_pool_reset(TCGContext
*s
)
300 for (p
= s
->pool_first_large
; p
; p
= t
) {
304 s
->pool_first_large
= NULL
;
305 s
->pool_cur
= s
->pool_end
= NULL
;
306 s
->pool_current
= NULL
;
309 typedef struct TCGHelperInfo
{
316 #include "exec/helper-proto.h"
318 static const TCGHelperInfo all_helpers
[] = {
319 #include "exec/helper-tcg.h"
322 static int indirect_reg_alloc_order
[ARRAY_SIZE(tcg_target_reg_alloc_order
)];
324 void tcg_context_init(TCGContext
*s
)
326 int op
, total_args
, n
, i
;
328 TCGArgConstraint
*args_ct
;
330 GHashTable
*helper_table
;
332 memset(s
, 0, sizeof(*s
));
335 /* Count total number of arguments and allocate the corresponding
338 for(op
= 0; op
< NB_OPS
; op
++) {
339 def
= &tcg_op_defs
[op
];
340 n
= def
->nb_iargs
+ def
->nb_oargs
;
344 args_ct
= g_malloc(sizeof(TCGArgConstraint
) * total_args
);
345 sorted_args
= g_malloc(sizeof(int) * total_args
);
347 for(op
= 0; op
< NB_OPS
; op
++) {
348 def
= &tcg_op_defs
[op
];
349 def
->args_ct
= args_ct
;
350 def
->sorted_args
= sorted_args
;
351 n
= def
->nb_iargs
+ def
->nb_oargs
;
356 /* Register helpers. */
357 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
358 s
->helpers
= helper_table
= g_hash_table_new(NULL
, NULL
);
360 for (i
= 0; i
< ARRAY_SIZE(all_helpers
); ++i
) {
361 g_hash_table_insert(helper_table
, (gpointer
)all_helpers
[i
].func
,
362 (gpointer
)&all_helpers
[i
]);
367 /* Reverse the order of the saved registers, assuming they're all at
368 the start of tcg_target_reg_alloc_order. */
369 for (n
= 0; n
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++n
) {
370 int r
= tcg_target_reg_alloc_order
[n
];
371 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, r
)) {
375 for (i
= 0; i
< n
; ++i
) {
376 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[n
- 1 - i
];
378 for (; i
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++i
) {
379 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[i
];
383 void tcg_prologue_init(TCGContext
*s
)
385 size_t prologue_size
, total_size
;
388 /* Put the prologue at the beginning of code_gen_buffer. */
389 buf0
= s
->code_gen_buffer
;
392 s
->code_gen_prologue
= buf0
;
394 /* Generate the prologue. */
395 tcg_target_qemu_prologue(s
);
397 flush_icache_range((uintptr_t)buf0
, (uintptr_t)buf1
);
399 /* Deduct the prologue from the buffer. */
400 prologue_size
= tcg_current_code_size(s
);
401 s
->code_gen_ptr
= buf1
;
402 s
->code_gen_buffer
= buf1
;
404 total_size
= s
->code_gen_buffer_size
- prologue_size
;
405 s
->code_gen_buffer_size
= total_size
;
407 /* Compute a high-water mark, at which we voluntarily flush the buffer
408 and start over. The size here is arbitrary, significantly larger
409 than we expect the code generation for any one opcode to require. */
410 s
->code_gen_highwater
= s
->code_gen_buffer
+ (total_size
- 1024);
412 tcg_register_jit(s
->code_gen_buffer
, total_size
);
415 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM
)) {
416 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size
);
417 log_disas(buf0
, prologue_size
);
424 void tcg_func_start(TCGContext
*s
)
427 s
->nb_temps
= s
->nb_globals
;
429 /* No temps have been previously allocated for size or locality. */
430 memset(s
->free_temps
, 0, sizeof(s
->free_temps
));
433 s
->current_frame_offset
= s
->frame_start
;
435 #ifdef CONFIG_DEBUG_TCG
436 s
->goto_tb_issue_mask
= 0;
439 s
->gen_first_op_idx
= 0;
440 s
->gen_last_op_idx
= -1;
441 s
->gen_next_op_idx
= 0;
442 s
->gen_next_parm_idx
= 0;
444 s
->be
= tcg_malloc(sizeof(TCGBackendData
));
447 static inline int temp_idx(TCGContext
*s
, TCGTemp
*ts
)
449 ptrdiff_t n
= ts
- s
->temps
;
450 tcg_debug_assert(n
>= 0 && n
< s
->nb_temps
);
454 static inline TCGTemp
*tcg_temp_alloc(TCGContext
*s
)
456 int n
= s
->nb_temps
++;
457 tcg_debug_assert(n
< TCG_MAX_TEMPS
);
458 return memset(&s
->temps
[n
], 0, sizeof(TCGTemp
));
461 static inline TCGTemp
*tcg_global_alloc(TCGContext
*s
)
463 tcg_debug_assert(s
->nb_globals
== s
->nb_temps
);
465 return tcg_temp_alloc(s
);
468 static int tcg_global_reg_new_internal(TCGContext
*s
, TCGType type
,
469 TCGReg reg
, const char *name
)
473 if (TCG_TARGET_REG_BITS
== 32 && type
!= TCG_TYPE_I32
) {
477 ts
= tcg_global_alloc(s
);
478 ts
->base_type
= type
;
483 tcg_regset_set_reg(s
->reserved_regs
, reg
);
485 return temp_idx(s
, ts
);
488 void tcg_set_frame(TCGContext
*s
, TCGReg reg
, intptr_t start
, intptr_t size
)
491 s
->frame_start
= start
;
492 s
->frame_end
= start
+ size
;
493 idx
= tcg_global_reg_new_internal(s
, TCG_TYPE_PTR
, reg
, "_frame");
494 s
->frame_temp
= &s
->temps
[idx
];
497 TCGv_i32
tcg_global_reg_new_i32(TCGReg reg
, const char *name
)
499 TCGContext
*s
= &tcg_ctx
;
502 if (tcg_regset_test_reg(s
->reserved_regs
, reg
)) {
505 idx
= tcg_global_reg_new_internal(s
, TCG_TYPE_I32
, reg
, name
);
506 return MAKE_TCGV_I32(idx
);
509 TCGv_i64
tcg_global_reg_new_i64(TCGReg reg
, const char *name
)
511 TCGContext
*s
= &tcg_ctx
;
514 if (tcg_regset_test_reg(s
->reserved_regs
, reg
)) {
517 idx
= tcg_global_reg_new_internal(s
, TCG_TYPE_I64
, reg
, name
);
518 return MAKE_TCGV_I64(idx
);
521 int tcg_global_mem_new_internal(TCGType type
, TCGv_ptr base
,
522 intptr_t offset
, const char *name
)
524 TCGContext
*s
= &tcg_ctx
;
525 TCGTemp
*base_ts
= &s
->temps
[GET_TCGV_PTR(base
)];
526 TCGTemp
*ts
= tcg_global_alloc(s
);
527 int indirect_reg
= 0, bigendian
= 0;
528 #ifdef HOST_WORDS_BIGENDIAN
532 if (!base_ts
->fixed_reg
) {
534 base_ts
->indirect_base
= 1;
537 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
538 TCGTemp
*ts2
= tcg_global_alloc(s
);
541 ts
->base_type
= TCG_TYPE_I64
;
542 ts
->type
= TCG_TYPE_I32
;
543 ts
->indirect_reg
= indirect_reg
;
544 ts
->mem_allocated
= 1;
545 ts
->mem_base
= base_ts
;
546 ts
->mem_offset
= offset
+ bigendian
* 4;
547 pstrcpy(buf
, sizeof(buf
), name
);
548 pstrcat(buf
, sizeof(buf
), "_0");
549 ts
->name
= strdup(buf
);
551 tcg_debug_assert(ts2
== ts
+ 1);
552 ts2
->base_type
= TCG_TYPE_I64
;
553 ts2
->type
= TCG_TYPE_I32
;
554 ts2
->indirect_reg
= indirect_reg
;
555 ts2
->mem_allocated
= 1;
556 ts2
->mem_base
= base_ts
;
557 ts2
->mem_offset
= offset
+ (1 - bigendian
) * 4;
558 pstrcpy(buf
, sizeof(buf
), name
);
559 pstrcat(buf
, sizeof(buf
), "_1");
560 ts
->name
= strdup(buf
);
562 ts
->base_type
= type
;
564 ts
->indirect_reg
= indirect_reg
;
565 ts
->mem_allocated
= 1;
566 ts
->mem_base
= base_ts
;
567 ts
->mem_offset
= offset
;
570 return temp_idx(s
, ts
);
573 static int tcg_temp_new_internal(TCGType type
, int temp_local
)
575 TCGContext
*s
= &tcg_ctx
;
579 k
= type
+ (temp_local
? TCG_TYPE_COUNT
: 0);
580 idx
= find_first_bit(s
->free_temps
[k
].l
, TCG_MAX_TEMPS
);
581 if (idx
< TCG_MAX_TEMPS
) {
582 /* There is already an available temp with the right type. */
583 clear_bit(idx
, s
->free_temps
[k
].l
);
586 ts
->temp_allocated
= 1;
587 tcg_debug_assert(ts
->base_type
== type
);
588 tcg_debug_assert(ts
->temp_local
== temp_local
);
590 ts
= tcg_temp_alloc(s
);
591 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
592 TCGTemp
*ts2
= tcg_temp_alloc(s
);
594 ts
->base_type
= type
;
595 ts
->type
= TCG_TYPE_I32
;
596 ts
->temp_allocated
= 1;
597 ts
->temp_local
= temp_local
;
599 tcg_debug_assert(ts2
== ts
+ 1);
600 ts2
->base_type
= TCG_TYPE_I64
;
601 ts2
->type
= TCG_TYPE_I32
;
602 ts2
->temp_allocated
= 1;
603 ts2
->temp_local
= temp_local
;
605 ts
->base_type
= type
;
607 ts
->temp_allocated
= 1;
608 ts
->temp_local
= temp_local
;
610 idx
= temp_idx(s
, ts
);
613 #if defined(CONFIG_DEBUG_TCG)
619 TCGv_i32
tcg_temp_new_internal_i32(int temp_local
)
623 idx
= tcg_temp_new_internal(TCG_TYPE_I32
, temp_local
);
624 return MAKE_TCGV_I32(idx
);
627 TCGv_i64
tcg_temp_new_internal_i64(int temp_local
)
631 idx
= tcg_temp_new_internal(TCG_TYPE_I64
, temp_local
);
632 return MAKE_TCGV_I64(idx
);
635 static void tcg_temp_free_internal(int idx
)
637 TCGContext
*s
= &tcg_ctx
;
641 #if defined(CONFIG_DEBUG_TCG)
643 if (s
->temps_in_use
< 0) {
644 fprintf(stderr
, "More temporaries freed than allocated!\n");
648 assert(idx
>= s
->nb_globals
&& idx
< s
->nb_temps
);
650 assert(ts
->temp_allocated
!= 0);
651 ts
->temp_allocated
= 0;
653 k
= ts
->base_type
+ (ts
->temp_local
? TCG_TYPE_COUNT
: 0);
654 set_bit(idx
, s
->free_temps
[k
].l
);
657 void tcg_temp_free_i32(TCGv_i32 arg
)
659 tcg_temp_free_internal(GET_TCGV_I32(arg
));
662 void tcg_temp_free_i64(TCGv_i64 arg
)
664 tcg_temp_free_internal(GET_TCGV_I64(arg
));
667 TCGv_i32
tcg_const_i32(int32_t val
)
670 t0
= tcg_temp_new_i32();
671 tcg_gen_movi_i32(t0
, val
);
675 TCGv_i64
tcg_const_i64(int64_t val
)
678 t0
= tcg_temp_new_i64();
679 tcg_gen_movi_i64(t0
, val
);
683 TCGv_i32
tcg_const_local_i32(int32_t val
)
686 t0
= tcg_temp_local_new_i32();
687 tcg_gen_movi_i32(t0
, val
);
691 TCGv_i64
tcg_const_local_i64(int64_t val
)
694 t0
= tcg_temp_local_new_i64();
695 tcg_gen_movi_i64(t0
, val
);
699 #if defined(CONFIG_DEBUG_TCG)
700 void tcg_clear_temp_count(void)
702 TCGContext
*s
= &tcg_ctx
;
706 int tcg_check_temp_count(void)
708 TCGContext
*s
= &tcg_ctx
;
709 if (s
->temps_in_use
) {
710 /* Clear the count so that we don't give another
711 * warning immediately next time around.
720 /* Note: we convert the 64 bit args to 32 bit and do some alignment
721 and endian swap. Maybe it would be better to do the alignment
722 and endian swap in tcg_reg_alloc_call(). */
723 void tcg_gen_callN(TCGContext
*s
, void *func
, TCGArg ret
,
724 int nargs
, TCGArg
*args
)
726 int i
, real_args
, nb_rets
, pi
, pi_first
;
727 unsigned sizemask
, flags
;
730 info
= g_hash_table_lookup(s
->helpers
, (gpointer
)func
);
732 sizemask
= info
->sizemask
;
734 #if defined(__sparc__) && !defined(__arch64__) \
735 && !defined(CONFIG_TCG_INTERPRETER)
736 /* We have 64-bit values in one register, but need to pass as two
737 separate parameters. Split them. */
738 int orig_sizemask
= sizemask
;
739 int orig_nargs
= nargs
;
742 TCGV_UNUSED_I64(retl
);
743 TCGV_UNUSED_I64(reth
);
745 TCGArg
*split_args
= __builtin_alloca(sizeof(TCGArg
) * nargs
* 2);
746 for (i
= real_args
= 0; i
< nargs
; ++i
) {
747 int is_64bit
= sizemask
& (1 << (i
+1)*2);
749 TCGv_i64 orig
= MAKE_TCGV_I64(args
[i
]);
750 TCGv_i32 h
= tcg_temp_new_i32();
751 TCGv_i32 l
= tcg_temp_new_i32();
752 tcg_gen_extr_i64_i32(l
, h
, orig
);
753 split_args
[real_args
++] = GET_TCGV_I32(h
);
754 split_args
[real_args
++] = GET_TCGV_I32(l
);
756 split_args
[real_args
++] = args
[i
];
763 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
764 for (i
= 0; i
< nargs
; ++i
) {
765 int is_64bit
= sizemask
& (1 << (i
+1)*2);
766 int is_signed
= sizemask
& (2 << (i
+1)*2);
768 TCGv_i64 temp
= tcg_temp_new_i64();
769 TCGv_i64 orig
= MAKE_TCGV_I64(args
[i
]);
771 tcg_gen_ext32s_i64(temp
, orig
);
773 tcg_gen_ext32u_i64(temp
, orig
);
775 args
[i
] = GET_TCGV_I64(temp
);
778 #endif /* TCG_TARGET_EXTEND_ARGS */
780 pi_first
= pi
= s
->gen_next_parm_idx
;
781 if (ret
!= TCG_CALL_DUMMY_ARG
) {
782 #if defined(__sparc__) && !defined(__arch64__) \
783 && !defined(CONFIG_TCG_INTERPRETER)
784 if (orig_sizemask
& 1) {
785 /* The 32-bit ABI is going to return the 64-bit value in
786 the %o0/%o1 register pair. Prepare for this by using
787 two return temporaries, and reassemble below. */
788 retl
= tcg_temp_new_i64();
789 reth
= tcg_temp_new_i64();
790 s
->gen_opparam_buf
[pi
++] = GET_TCGV_I64(reth
);
791 s
->gen_opparam_buf
[pi
++] = GET_TCGV_I64(retl
);
794 s
->gen_opparam_buf
[pi
++] = ret
;
798 if (TCG_TARGET_REG_BITS
< 64 && (sizemask
& 1)) {
799 #ifdef HOST_WORDS_BIGENDIAN
800 s
->gen_opparam_buf
[pi
++] = ret
+ 1;
801 s
->gen_opparam_buf
[pi
++] = ret
;
803 s
->gen_opparam_buf
[pi
++] = ret
;
804 s
->gen_opparam_buf
[pi
++] = ret
+ 1;
808 s
->gen_opparam_buf
[pi
++] = ret
;
816 for (i
= 0; i
< nargs
; i
++) {
817 int is_64bit
= sizemask
& (1 << (i
+1)*2);
818 if (TCG_TARGET_REG_BITS
< 64 && is_64bit
) {
819 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
820 /* some targets want aligned 64 bit args */
822 s
->gen_opparam_buf
[pi
++] = TCG_CALL_DUMMY_ARG
;
826 /* If stack grows up, then we will be placing successive
827 arguments at lower addresses, which means we need to
828 reverse the order compared to how we would normally
829 treat either big or little-endian. For those arguments
830 that will wind up in registers, this still works for
831 HPPA (the only current STACK_GROWSUP target) since the
832 argument registers are *also* allocated in decreasing
833 order. If another such target is added, this logic may
834 have to get more complicated to differentiate between
835 stack arguments and register arguments. */
836 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
837 s
->gen_opparam_buf
[pi
++] = args
[i
] + 1;
838 s
->gen_opparam_buf
[pi
++] = args
[i
];
840 s
->gen_opparam_buf
[pi
++] = args
[i
];
841 s
->gen_opparam_buf
[pi
++] = args
[i
] + 1;
847 s
->gen_opparam_buf
[pi
++] = args
[i
];
850 s
->gen_opparam_buf
[pi
++] = (uintptr_t)func
;
851 s
->gen_opparam_buf
[pi
++] = flags
;
853 i
= s
->gen_next_op_idx
;
854 tcg_debug_assert(i
< OPC_BUF_SIZE
);
855 tcg_debug_assert(pi
<= OPPARAM_BUF_SIZE
);
857 /* Set links for sequential allocation during translation. */
858 s
->gen_op_buf
[i
] = (TCGOp
){
859 .opc
= INDEX_op_call
,
867 /* Make sure the calli field didn't overflow. */
868 tcg_debug_assert(s
->gen_op_buf
[i
].calli
== real_args
);
870 s
->gen_last_op_idx
= i
;
871 s
->gen_next_op_idx
= i
+ 1;
872 s
->gen_next_parm_idx
= pi
;
874 #if defined(__sparc__) && !defined(__arch64__) \
875 && !defined(CONFIG_TCG_INTERPRETER)
876 /* Free all of the parts we allocated above. */
877 for (i
= real_args
= 0; i
< orig_nargs
; ++i
) {
878 int is_64bit
= orig_sizemask
& (1 << (i
+1)*2);
880 TCGv_i32 h
= MAKE_TCGV_I32(args
[real_args
++]);
881 TCGv_i32 l
= MAKE_TCGV_I32(args
[real_args
++]);
882 tcg_temp_free_i32(h
);
883 tcg_temp_free_i32(l
);
888 if (orig_sizemask
& 1) {
889 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
890 Note that describing these as TCGv_i64 eliminates an unnecessary
891 zero-extension that tcg_gen_concat_i32_i64 would create. */
892 tcg_gen_concat32_i64(MAKE_TCGV_I64(ret
), retl
, reth
);
893 tcg_temp_free_i64(retl
);
894 tcg_temp_free_i64(reth
);
896 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
897 for (i
= 0; i
< nargs
; ++i
) {
898 int is_64bit
= sizemask
& (1 << (i
+1)*2);
900 TCGv_i64 temp
= MAKE_TCGV_I64(args
[i
]);
901 tcg_temp_free_i64(temp
);
904 #endif /* TCG_TARGET_EXTEND_ARGS */
907 static void tcg_reg_alloc_start(TCGContext
*s
)
911 for(i
= 0; i
< s
->nb_globals
; i
++) {
914 ts
->val_type
= TEMP_VAL_REG
;
916 ts
->val_type
= TEMP_VAL_MEM
;
919 for(i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
921 if (ts
->temp_local
) {
922 ts
->val_type
= TEMP_VAL_MEM
;
924 ts
->val_type
= TEMP_VAL_DEAD
;
926 ts
->mem_allocated
= 0;
930 memset(s
->reg_to_temp
, 0, sizeof(s
->reg_to_temp
));
933 static char *tcg_get_arg_str_ptr(TCGContext
*s
, char *buf
, int buf_size
,
936 int idx
= temp_idx(s
, ts
);
938 if (idx
< s
->nb_globals
) {
939 pstrcpy(buf
, buf_size
, ts
->name
);
940 } else if (ts
->temp_local
) {
941 snprintf(buf
, buf_size
, "loc%d", idx
- s
->nb_globals
);
943 snprintf(buf
, buf_size
, "tmp%d", idx
- s
->nb_globals
);
948 static char *tcg_get_arg_str_idx(TCGContext
*s
, char *buf
,
949 int buf_size
, int idx
)
951 assert(idx
>= 0 && idx
< s
->nb_temps
);
952 return tcg_get_arg_str_ptr(s
, buf
, buf_size
, &s
->temps
[idx
]);
955 /* Find helper name. */
956 static inline const char *tcg_find_helper(TCGContext
*s
, uintptr_t val
)
958 const char *ret
= NULL
;
960 TCGHelperInfo
*info
= g_hash_table_lookup(s
->helpers
, (gpointer
)val
);
968 static const char * const cond_name
[] =
970 [TCG_COND_NEVER
] = "never",
971 [TCG_COND_ALWAYS
] = "always",
972 [TCG_COND_EQ
] = "eq",
973 [TCG_COND_NE
] = "ne",
974 [TCG_COND_LT
] = "lt",
975 [TCG_COND_GE
] = "ge",
976 [TCG_COND_LE
] = "le",
977 [TCG_COND_GT
] = "gt",
978 [TCG_COND_LTU
] = "ltu",
979 [TCG_COND_GEU
] = "geu",
980 [TCG_COND_LEU
] = "leu",
981 [TCG_COND_GTU
] = "gtu"
984 static const char * const ldst_name
[] =
1000 void tcg_dump_ops(TCGContext
*s
)
1006 for (oi
= s
->gen_first_op_idx
; oi
>= 0; oi
= op
->next
) {
1007 int i
, k
, nb_oargs
, nb_iargs
, nb_cargs
;
1008 const TCGOpDef
*def
;
1012 op
= &s
->gen_op_buf
[oi
];
1014 def
= &tcg_op_defs
[c
];
1015 args
= &s
->gen_opparam_buf
[op
->args
];
1017 if (c
== INDEX_op_insn_start
) {
1018 qemu_log("%s ----", oi
!= s
->gen_first_op_idx
? "\n" : "");
1020 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
1022 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1023 a
= ((target_ulong
)args
[i
* 2 + 1] << 32) | args
[i
* 2];
1027 qemu_log(" " TARGET_FMT_lx
, a
);
1029 } else if (c
== INDEX_op_call
) {
1030 /* variable number of arguments */
1031 nb_oargs
= op
->callo
;
1032 nb_iargs
= op
->calli
;
1033 nb_cargs
= def
->nb_cargs
;
1035 /* function name, flags, out args */
1036 qemu_log(" %s %s,$0x%" TCG_PRIlx
",$%d", def
->name
,
1037 tcg_find_helper(s
, args
[nb_oargs
+ nb_iargs
]),
1038 args
[nb_oargs
+ nb_iargs
+ 1], nb_oargs
);
1039 for (i
= 0; i
< nb_oargs
; i
++) {
1040 qemu_log(",%s", tcg_get_arg_str_idx(s
, buf
, sizeof(buf
),
1043 for (i
= 0; i
< nb_iargs
; i
++) {
1044 TCGArg arg
= args
[nb_oargs
+ i
];
1045 const char *t
= "<dummy>";
1046 if (arg
!= TCG_CALL_DUMMY_ARG
) {
1047 t
= tcg_get_arg_str_idx(s
, buf
, sizeof(buf
), arg
);
1052 qemu_log(" %s ", def
->name
);
1054 nb_oargs
= def
->nb_oargs
;
1055 nb_iargs
= def
->nb_iargs
;
1056 nb_cargs
= def
->nb_cargs
;
1059 for (i
= 0; i
< nb_oargs
; i
++) {
1063 qemu_log("%s", tcg_get_arg_str_idx(s
, buf
, sizeof(buf
),
1066 for (i
= 0; i
< nb_iargs
; i
++) {
1070 qemu_log("%s", tcg_get_arg_str_idx(s
, buf
, sizeof(buf
),
1074 case INDEX_op_brcond_i32
:
1075 case INDEX_op_setcond_i32
:
1076 case INDEX_op_movcond_i32
:
1077 case INDEX_op_brcond2_i32
:
1078 case INDEX_op_setcond2_i32
:
1079 case INDEX_op_brcond_i64
:
1080 case INDEX_op_setcond_i64
:
1081 case INDEX_op_movcond_i64
:
1082 if (args
[k
] < ARRAY_SIZE(cond_name
) && cond_name
[args
[k
]]) {
1083 qemu_log(",%s", cond_name
[args
[k
++]]);
1085 qemu_log(",$0x%" TCG_PRIlx
, args
[k
++]);
1089 case INDEX_op_qemu_ld_i32
:
1090 case INDEX_op_qemu_st_i32
:
1091 case INDEX_op_qemu_ld_i64
:
1092 case INDEX_op_qemu_st_i64
:
1094 TCGMemOpIdx oi
= args
[k
++];
1095 TCGMemOp op
= get_memop(oi
);
1096 unsigned ix
= get_mmuidx(oi
);
1098 if (op
& ~(MO_AMASK
| MO_BSWAP
| MO_SSIZE
)) {
1099 qemu_log(",$0x%x,%u", op
, ix
);
1101 const char *s_al
= "", *s_op
;
1102 if (op
& MO_AMASK
) {
1103 if ((op
& MO_AMASK
) == MO_ALIGN
) {
1109 s_op
= ldst_name
[op
& (MO_BSWAP
| MO_SSIZE
)];
1110 qemu_log(",%s%s,%u", s_al
, s_op
, ix
);
1120 case INDEX_op_set_label
:
1122 case INDEX_op_brcond_i32
:
1123 case INDEX_op_brcond_i64
:
1124 case INDEX_op_brcond2_i32
:
1125 qemu_log("%s$L%d", k
? "," : "", arg_label(args
[k
])->id
);
1131 for (; i
< nb_cargs
; i
++, k
++) {
1132 qemu_log("%s$0x%" TCG_PRIlx
, k
? "," : "", args
[k
]);
1139 /* we give more priority to constraints with less registers */
1140 static int get_constraint_priority(const TCGOpDef
*def
, int k
)
1142 const TCGArgConstraint
*arg_ct
;
1145 arg_ct
= &def
->args_ct
[k
];
1146 if (arg_ct
->ct
& TCG_CT_ALIAS
) {
1147 /* an alias is equivalent to a single register */
1150 if (!(arg_ct
->ct
& TCG_CT_REG
))
1153 for(i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
1154 if (tcg_regset_test_reg(arg_ct
->u
.regs
, i
))
1158 return TCG_TARGET_NB_REGS
- n
+ 1;
1161 /* sort from highest priority to lowest */
1162 static void sort_constraints(TCGOpDef
*def
, int start
, int n
)
1164 int i
, j
, p1
, p2
, tmp
;
1166 for(i
= 0; i
< n
; i
++)
1167 def
->sorted_args
[start
+ i
] = start
+ i
;
1170 for(i
= 0; i
< n
- 1; i
++) {
1171 for(j
= i
+ 1; j
< n
; j
++) {
1172 p1
= get_constraint_priority(def
, def
->sorted_args
[start
+ i
]);
1173 p2
= get_constraint_priority(def
, def
->sorted_args
[start
+ j
]);
1175 tmp
= def
->sorted_args
[start
+ i
];
1176 def
->sorted_args
[start
+ i
] = def
->sorted_args
[start
+ j
];
1177 def
->sorted_args
[start
+ j
] = tmp
;
1183 void tcg_add_target_add_op_defs(const TCGTargetOpDef
*tdefs
)
1191 if (tdefs
->op
== (TCGOpcode
)-1)
1194 assert((unsigned)op
< NB_OPS
);
1195 def
= &tcg_op_defs
[op
];
1196 #if defined(CONFIG_DEBUG_TCG)
1197 /* Duplicate entry in op definitions? */
1201 nb_args
= def
->nb_iargs
+ def
->nb_oargs
;
1202 for(i
= 0; i
< nb_args
; i
++) {
1203 ct_str
= tdefs
->args_ct_str
[i
];
1204 /* Incomplete TCGTargetOpDef entry? */
1205 assert(ct_str
!= NULL
);
1206 tcg_regset_clear(def
->args_ct
[i
].u
.regs
);
1207 def
->args_ct
[i
].ct
= 0;
1208 if (ct_str
[0] >= '0' && ct_str
[0] <= '9') {
1210 oarg
= ct_str
[0] - '0';
1211 assert(oarg
< def
->nb_oargs
);
1212 assert(def
->args_ct
[oarg
].ct
& TCG_CT_REG
);
1213 /* TCG_CT_ALIAS is for the output arguments. The input
1214 argument is tagged with TCG_CT_IALIAS. */
1215 def
->args_ct
[i
] = def
->args_ct
[oarg
];
1216 def
->args_ct
[oarg
].ct
= TCG_CT_ALIAS
;
1217 def
->args_ct
[oarg
].alias_index
= i
;
1218 def
->args_ct
[i
].ct
|= TCG_CT_IALIAS
;
1219 def
->args_ct
[i
].alias_index
= oarg
;
1222 if (*ct_str
== '\0')
1226 def
->args_ct
[i
].ct
|= TCG_CT_CONST
;
1230 if (target_parse_constraint(&def
->args_ct
[i
], &ct_str
) < 0) {
1231 fprintf(stderr
, "Invalid constraint '%s' for arg %d of operation '%s'\n",
1232 ct_str
, i
, def
->name
);
1240 /* TCGTargetOpDef entry with too much information? */
1241 assert(i
== TCG_MAX_OP_ARGS
|| tdefs
->args_ct_str
[i
] == NULL
);
1243 /* sort the constraints (XXX: this is just an heuristic) */
1244 sort_constraints(def
, 0, def
->nb_oargs
);
1245 sort_constraints(def
, def
->nb_oargs
, def
->nb_iargs
);
1251 printf("%s: sorted=", def
->name
);
1252 for(i
= 0; i
< def
->nb_oargs
+ def
->nb_iargs
; i
++)
1253 printf(" %d", def
->sorted_args
[i
]);
1260 #if defined(CONFIG_DEBUG_TCG)
1262 for (op
= 0; op
< tcg_op_defs_max
; op
++) {
1263 const TCGOpDef
*def
= &tcg_op_defs
[op
];
1264 if (def
->flags
& TCG_OPF_NOT_PRESENT
) {
1265 /* Wrong entry in op definitions? */
1267 fprintf(stderr
, "Invalid op definition for %s\n", def
->name
);
1271 /* Missing entry in op definitions? */
1273 fprintf(stderr
, "Missing op definition for %s\n", def
->name
);
1284 void tcg_op_remove(TCGContext
*s
, TCGOp
*op
)
1286 int next
= op
->next
;
1287 int prev
= op
->prev
;
1290 s
->gen_op_buf
[next
].prev
= prev
;
1292 s
->gen_last_op_idx
= prev
;
1295 s
->gen_op_buf
[prev
].next
= next
;
1297 s
->gen_first_op_idx
= next
;
1300 memset(op
, -1, sizeof(*op
));
1302 #ifdef CONFIG_PROFILER
1307 #ifdef USE_LIVENESS_ANALYSIS
1308 /* liveness analysis: end of function: all temps are dead, and globals
1309 should be in memory. */
1310 static inline void tcg_la_func_end(TCGContext
*s
, uint8_t *dead_temps
,
1313 memset(dead_temps
, 1, s
->nb_temps
);
1314 memset(mem_temps
, 1, s
->nb_globals
);
1315 memset(mem_temps
+ s
->nb_globals
, 0, s
->nb_temps
- s
->nb_globals
);
1318 /* liveness analysis: end of basic block: all temps are dead, globals
1319 and local temps should be in memory. */
1320 static inline void tcg_la_bb_end(TCGContext
*s
, uint8_t *dead_temps
,
1325 memset(dead_temps
, 1, s
->nb_temps
);
1326 memset(mem_temps
, 1, s
->nb_globals
);
1327 for(i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
1328 mem_temps
[i
] = s
->temps
[i
].temp_local
;
1332 /* Liveness analysis : update the opc_dead_args array to tell if a
1333 given input arguments is dead. Instructions updating dead
1334 temporaries are removed. */
1335 static void tcg_liveness_analysis(TCGContext
*s
)
1337 uint8_t *dead_temps
, *mem_temps
;
1338 int oi
, oi_prev
, nb_ops
;
1340 nb_ops
= s
->gen_next_op_idx
;
1341 s
->op_dead_args
= tcg_malloc(nb_ops
* sizeof(uint16_t));
1342 s
->op_sync_args
= tcg_malloc(nb_ops
* sizeof(uint8_t));
1344 dead_temps
= tcg_malloc(s
->nb_temps
);
1345 mem_temps
= tcg_malloc(s
->nb_temps
);
1346 tcg_la_func_end(s
, dead_temps
, mem_temps
);
1348 for (oi
= s
->gen_last_op_idx
; oi
>= 0; oi
= oi_prev
) {
1349 int i
, nb_iargs
, nb_oargs
;
1350 TCGOpcode opc_new
, opc_new2
;
1356 TCGOp
* const op
= &s
->gen_op_buf
[oi
];
1357 TCGArg
* const args
= &s
->gen_opparam_buf
[op
->args
];
1358 TCGOpcode opc
= op
->opc
;
1359 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
1368 nb_oargs
= op
->callo
;
1369 nb_iargs
= op
->calli
;
1370 call_flags
= args
[nb_oargs
+ nb_iargs
+ 1];
1372 /* pure functions can be removed if their result is unused */
1373 if (call_flags
& TCG_CALL_NO_SIDE_EFFECTS
) {
1374 for (i
= 0; i
< nb_oargs
; i
++) {
1376 if (!dead_temps
[arg
] || mem_temps
[arg
]) {
1377 goto do_not_remove_call
;
1384 /* output args are dead */
1387 for (i
= 0; i
< nb_oargs
; i
++) {
1389 if (dead_temps
[arg
]) {
1390 dead_args
|= (1 << i
);
1392 if (mem_temps
[arg
]) {
1393 sync_args
|= (1 << i
);
1395 dead_temps
[arg
] = 1;
1399 if (!(call_flags
& TCG_CALL_NO_READ_GLOBALS
)) {
1400 /* globals should be synced to memory */
1401 memset(mem_temps
, 1, s
->nb_globals
);
1403 if (!(call_flags
& (TCG_CALL_NO_WRITE_GLOBALS
|
1404 TCG_CALL_NO_READ_GLOBALS
))) {
1405 /* globals should go back to memory */
1406 memset(dead_temps
, 1, s
->nb_globals
);
1409 /* record arguments that die in this helper */
1410 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
1412 if (arg
!= TCG_CALL_DUMMY_ARG
) {
1413 if (dead_temps
[arg
]) {
1414 dead_args
|= (1 << i
);
1418 /* input arguments are live for preceding opcodes */
1419 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
1421 dead_temps
[arg
] = 0;
1423 s
->op_dead_args
[oi
] = dead_args
;
1424 s
->op_sync_args
[oi
] = sync_args
;
1428 case INDEX_op_insn_start
:
1430 case INDEX_op_discard
:
1431 /* mark the temporary as dead */
1432 dead_temps
[args
[0]] = 1;
1433 mem_temps
[args
[0]] = 0;
1436 case INDEX_op_add2_i32
:
1437 opc_new
= INDEX_op_add_i32
;
1439 case INDEX_op_sub2_i32
:
1440 opc_new
= INDEX_op_sub_i32
;
1442 case INDEX_op_add2_i64
:
1443 opc_new
= INDEX_op_add_i64
;
1445 case INDEX_op_sub2_i64
:
1446 opc_new
= INDEX_op_sub_i64
;
1450 /* Test if the high part of the operation is dead, but not
1451 the low part. The result can be optimized to a simple
1452 add or sub. This happens often for x86_64 guest when the
1453 cpu mode is set to 32 bit. */
1454 if (dead_temps
[args
[1]] && !mem_temps
[args
[1]]) {
1455 if (dead_temps
[args
[0]] && !mem_temps
[args
[0]]) {
1458 /* Replace the opcode and adjust the args in place,
1459 leaving 3 unused args at the end. */
1460 op
->opc
= opc
= opc_new
;
1463 /* Fall through and mark the single-word operation live. */
1469 case INDEX_op_mulu2_i32
:
1470 opc_new
= INDEX_op_mul_i32
;
1471 opc_new2
= INDEX_op_muluh_i32
;
1472 have_opc_new2
= TCG_TARGET_HAS_muluh_i32
;
1474 case INDEX_op_muls2_i32
:
1475 opc_new
= INDEX_op_mul_i32
;
1476 opc_new2
= INDEX_op_mulsh_i32
;
1477 have_opc_new2
= TCG_TARGET_HAS_mulsh_i32
;
1479 case INDEX_op_mulu2_i64
:
1480 opc_new
= INDEX_op_mul_i64
;
1481 opc_new2
= INDEX_op_muluh_i64
;
1482 have_opc_new2
= TCG_TARGET_HAS_muluh_i64
;
1484 case INDEX_op_muls2_i64
:
1485 opc_new
= INDEX_op_mul_i64
;
1486 opc_new2
= INDEX_op_mulsh_i64
;
1487 have_opc_new2
= TCG_TARGET_HAS_mulsh_i64
;
1492 if (dead_temps
[args
[1]] && !mem_temps
[args
[1]]) {
1493 if (dead_temps
[args
[0]] && !mem_temps
[args
[0]]) {
1494 /* Both parts of the operation are dead. */
1497 /* The high part of the operation is dead; generate the low. */
1498 op
->opc
= opc
= opc_new
;
1501 } else if (have_opc_new2
&& dead_temps
[args
[0]]
1502 && !mem_temps
[args
[0]]) {
1503 /* The low part of the operation is dead; generate the high. */
1504 op
->opc
= opc
= opc_new2
;
1511 /* Mark the single-word operation live. */
1516 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
1517 nb_iargs
= def
->nb_iargs
;
1518 nb_oargs
= def
->nb_oargs
;
1520 /* Test if the operation can be removed because all
1521 its outputs are dead. We assume that nb_oargs == 0
1522 implies side effects */
1523 if (!(def
->flags
& TCG_OPF_SIDE_EFFECTS
) && nb_oargs
!= 0) {
1524 for (i
= 0; i
< nb_oargs
; i
++) {
1526 if (!dead_temps
[arg
] || mem_temps
[arg
]) {
1531 tcg_op_remove(s
, op
);
1534 /* output args are dead */
1537 for (i
= 0; i
< nb_oargs
; i
++) {
1539 if (dead_temps
[arg
]) {
1540 dead_args
|= (1 << i
);
1542 if (mem_temps
[arg
]) {
1543 sync_args
|= (1 << i
);
1545 dead_temps
[arg
] = 1;
1549 /* if end of basic block, update */
1550 if (def
->flags
& TCG_OPF_BB_END
) {
1551 tcg_la_bb_end(s
, dead_temps
, mem_temps
);
1552 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
1553 /* globals should be synced to memory */
1554 memset(mem_temps
, 1, s
->nb_globals
);
1557 /* record arguments that die in this opcode */
1558 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
1560 if (dead_temps
[arg
]) {
1561 dead_args
|= (1 << i
);
1564 /* input arguments are live for preceding opcodes */
1565 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
1567 dead_temps
[arg
] = 0;
1569 s
->op_dead_args
[oi
] = dead_args
;
1570 s
->op_sync_args
[oi
] = sync_args
;
1577 /* dummy liveness analysis */
1578 static void tcg_liveness_analysis(TCGContext
*s
)
1580 int nb_ops
= s
->gen_next_op_idx
;
1582 s
->op_dead_args
= tcg_malloc(nb_ops
* sizeof(uint16_t));
1583 memset(s
->op_dead_args
, 0, nb_ops
* sizeof(uint16_t));
1584 s
->op_sync_args
= tcg_malloc(nb_ops
* sizeof(uint8_t));
1585 memset(s
->op_sync_args
, 0, nb_ops
* sizeof(uint8_t));
1590 static void dump_regs(TCGContext
*s
)
1596 for(i
= 0; i
< s
->nb_temps
; i
++) {
1598 printf(" %10s: ", tcg_get_arg_str_idx(s
, buf
, sizeof(buf
), i
));
1599 switch(ts
->val_type
) {
1601 printf("%s", tcg_target_reg_names
[ts
->reg
]);
1604 printf("%d(%s)", (int)ts
->mem_offset
,
1605 tcg_target_reg_names
[ts
->mem_base
->reg
]);
1607 case TEMP_VAL_CONST
:
1608 printf("$0x%" TCG_PRIlx
, ts
->val
);
1620 for(i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
1621 if (s
->reg_to_temp
[i
] != NULL
) {
1623 tcg_target_reg_names
[i
],
1624 tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), s
->reg_to_temp
[i
]));
1629 static void check_regs(TCGContext
*s
)
1636 for (reg
= 0; reg
< TCG_TARGET_NB_REGS
; reg
++) {
1637 ts
= s
->reg_to_temp
[reg
];
1639 if (ts
->val_type
!= TEMP_VAL_REG
|| ts
->reg
!= reg
) {
1640 printf("Inconsistency for register %s:\n",
1641 tcg_target_reg_names
[reg
]);
1646 for (k
= 0; k
< s
->nb_temps
; k
++) {
1648 if (ts
->val_type
== TEMP_VAL_REG
&& !ts
->fixed_reg
1649 && s
->reg_to_temp
[ts
->reg
] != ts
) {
1650 printf("Inconsistency for temp %s:\n",
1651 tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), ts
));
1653 printf("reg state:\n");
1661 static void temp_allocate_frame(TCGContext
*s
, int temp
)
1664 ts
= &s
->temps
[temp
];
1665 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
1666 /* Sparc64 stack is accessed with offset of 2047 */
1667 s
->current_frame_offset
= (s
->current_frame_offset
+
1668 (tcg_target_long
)sizeof(tcg_target_long
) - 1) &
1669 ~(sizeof(tcg_target_long
) - 1);
1671 if (s
->current_frame_offset
+ (tcg_target_long
)sizeof(tcg_target_long
) >
1675 ts
->mem_offset
= s
->current_frame_offset
;
1676 ts
->mem_base
= s
->frame_temp
;
1677 ts
->mem_allocated
= 1;
1678 s
->current_frame_offset
+= sizeof(tcg_target_long
);
1681 static void temp_load(TCGContext
*, TCGTemp
*, TCGRegSet
, TCGRegSet
);
1683 /* sync register 'reg' by saving it to the corresponding temporary */
1684 static void tcg_reg_sync(TCGContext
*s
, TCGReg reg
, TCGRegSet allocated_regs
)
1686 TCGTemp
*ts
= s
->reg_to_temp
[reg
];
1688 assert(ts
->val_type
== TEMP_VAL_REG
);
1689 if (!ts
->mem_coherent
&& !ts
->fixed_reg
) {
1690 if (!ts
->mem_allocated
) {
1691 temp_allocate_frame(s
, temp_idx(s
, ts
));
1692 } else if (ts
->indirect_reg
) {
1693 tcg_regset_set_reg(allocated_regs
, ts
->reg
);
1694 temp_load(s
, ts
->mem_base
,
1695 tcg_target_available_regs
[TCG_TYPE_PTR
],
1698 tcg_out_st(s
, ts
->type
, reg
, ts
->mem_base
->reg
, ts
->mem_offset
);
1700 ts
->mem_coherent
= 1;
1703 /* free register 'reg' by spilling the corresponding temporary if necessary */
1704 static void tcg_reg_free(TCGContext
*s
, TCGReg reg
, TCGRegSet allocated_regs
)
1706 TCGTemp
*ts
= s
->reg_to_temp
[reg
];
1709 tcg_reg_sync(s
, reg
, allocated_regs
);
1710 ts
->val_type
= TEMP_VAL_MEM
;
1711 s
->reg_to_temp
[reg
] = NULL
;
1715 /* Allocate a register belonging to reg1 & ~reg2 */
1716 static TCGReg
tcg_reg_alloc(TCGContext
*s
, TCGRegSet desired_regs
,
1717 TCGRegSet allocated_regs
, bool rev
)
1719 int i
, n
= ARRAY_SIZE(tcg_target_reg_alloc_order
);
1724 tcg_regset_andnot(reg_ct
, desired_regs
, allocated_regs
);
1725 order
= rev
? indirect_reg_alloc_order
: tcg_target_reg_alloc_order
;
1727 /* first try free registers */
1728 for(i
= 0; i
< n
; i
++) {
1730 if (tcg_regset_test_reg(reg_ct
, reg
) && s
->reg_to_temp
[reg
] == NULL
)
1734 /* XXX: do better spill choice */
1735 for(i
= 0; i
< n
; i
++) {
1737 if (tcg_regset_test_reg(reg_ct
, reg
)) {
1738 tcg_reg_free(s
, reg
, allocated_regs
);
1746 /* Make sure the temporary is in a register. If needed, allocate the register
1747 from DESIRED while avoiding ALLOCATED. */
1748 static void temp_load(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet desired_regs
,
1749 TCGRegSet allocated_regs
)
1753 switch (ts
->val_type
) {
1756 case TEMP_VAL_CONST
:
1757 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
, ts
->indirect_base
);
1758 tcg_out_movi(s
, ts
->type
, reg
, ts
->val
);
1759 ts
->mem_coherent
= 0;
1762 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
, ts
->indirect_base
);
1763 if (ts
->indirect_reg
) {
1764 tcg_regset_set_reg(allocated_regs
, reg
);
1765 temp_load(s
, ts
->mem_base
,
1766 tcg_target_available_regs
[TCG_TYPE_PTR
],
1769 tcg_out_ld(s
, ts
->type
, reg
, ts
->mem_base
->reg
, ts
->mem_offset
);
1770 ts
->mem_coherent
= 1;
1777 ts
->val_type
= TEMP_VAL_REG
;
1778 s
->reg_to_temp
[reg
] = ts
;
1781 /* mark a temporary as dead. */
1782 static inline void temp_dead(TCGContext
*s
, TCGTemp
*ts
)
1784 if (ts
->fixed_reg
) {
1787 if (ts
->val_type
== TEMP_VAL_REG
) {
1788 s
->reg_to_temp
[ts
->reg
] = NULL
;
1790 ts
->val_type
= (temp_idx(s
, ts
) < s
->nb_globals
|| ts
->temp_local
1791 ? TEMP_VAL_MEM
: TEMP_VAL_DEAD
);
1794 /* sync a temporary to memory. 'allocated_regs' is used in case a
1795 temporary registers needs to be allocated to store a constant. */
1796 static void temp_sync(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet allocated_regs
)
1798 if (ts
->fixed_reg
) {
1801 switch (ts
->val_type
) {
1802 case TEMP_VAL_CONST
:
1803 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
], allocated_regs
);
1806 tcg_reg_sync(s
, ts
->reg
, allocated_regs
);
1816 /* save a temporary to memory. 'allocated_regs' is used in case a
1817 temporary registers needs to be allocated to store a constant. */
1818 static inline void temp_save(TCGContext
*s
, TCGTemp
*ts
,
1819 TCGRegSet allocated_regs
)
1821 #ifdef USE_LIVENESS_ANALYSIS
1822 /* ??? Liveness does not yet incorporate indirect bases. */
1823 if (!ts
->indirect_base
) {
1824 /* The liveness analysis already ensures that globals are back
1825 in memory. Keep an assert for safety. */
1826 tcg_debug_assert(ts
->val_type
== TEMP_VAL_MEM
|| ts
->fixed_reg
);
1830 temp_sync(s
, ts
, allocated_regs
);
1834 /* save globals to their canonical location and assume they can be
1835 modified be the following code. 'allocated_regs' is used in case a
1836 temporary registers needs to be allocated to store a constant. */
1837 static void save_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
1841 for (i
= 0; i
< s
->nb_globals
; i
++) {
1842 temp_save(s
, &s
->temps
[i
], allocated_regs
);
1846 /* sync globals to their canonical location and assume they can be
1847 read by the following code. 'allocated_regs' is used in case a
1848 temporary registers needs to be allocated to store a constant. */
1849 static void sync_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
1853 for (i
= 0; i
< s
->nb_globals
; i
++) {
1854 TCGTemp
*ts
= &s
->temps
[i
];
1855 #ifdef USE_LIVENESS_ANALYSIS
1856 /* ??? Liveness does not yet incorporate indirect bases. */
1857 if (!ts
->indirect_base
) {
1858 tcg_debug_assert(ts
->val_type
!= TEMP_VAL_REG
1860 || ts
->mem_coherent
);
1864 temp_sync(s
, ts
, allocated_regs
);
1868 /* at the end of a basic block, we assume all temporaries are dead and
1869 all globals are stored at their canonical location. */
1870 static void tcg_reg_alloc_bb_end(TCGContext
*s
, TCGRegSet allocated_regs
)
1874 for (i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
1875 TCGTemp
*ts
= &s
->temps
[i
];
1876 if (ts
->temp_local
) {
1877 temp_save(s
, ts
, allocated_regs
);
1879 #ifdef USE_LIVENESS_ANALYSIS
1880 /* ??? Liveness does not yet incorporate indirect bases. */
1881 if (!ts
->indirect_base
) {
1882 /* The liveness analysis already ensures that temps are dead.
1883 Keep an assert for safety. */
1884 assert(ts
->val_type
== TEMP_VAL_DEAD
);
1892 save_globals(s
, allocated_regs
);
1895 #define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
1896 #define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
1898 static void tcg_reg_alloc_movi(TCGContext
*s
, const TCGArg
*args
,
1899 uint16_t dead_args
, uint8_t sync_args
)
1902 tcg_target_ulong val
;
1904 ots
= &s
->temps
[args
[0]];
1907 if (ots
->fixed_reg
) {
1908 /* for fixed registers, we do not do any constant
1910 tcg_out_movi(s
, ots
->type
, ots
->reg
, val
);
1912 /* The movi is not explicitly generated here */
1913 if (ots
->val_type
== TEMP_VAL_REG
) {
1914 s
->reg_to_temp
[ots
->reg
] = NULL
;
1916 ots
->val_type
= TEMP_VAL_CONST
;
1919 if (NEED_SYNC_ARG(0)) {
1920 temp_sync(s
, ots
, s
->reserved_regs
);
1922 if (IS_DEAD_ARG(0)) {
1927 static void tcg_reg_alloc_mov(TCGContext
*s
, const TCGOpDef
*def
,
1928 const TCGArg
*args
, uint16_t dead_args
,
1931 TCGRegSet allocated_regs
;
1933 TCGType otype
, itype
;
1935 tcg_regset_set(allocated_regs
, s
->reserved_regs
);
1936 ots
= &s
->temps
[args
[0]];
1937 ts
= &s
->temps
[args
[1]];
1939 /* Note that otype != itype for no-op truncation. */
1943 /* If the source value is not in a register, and we're going to be
1944 forced to have it in a register in order to perform the copy,
1945 then copy the SOURCE value into its own register first. That way
1946 we don't have to reload SOURCE the next time it is used. */
1947 if (((NEED_SYNC_ARG(0) || ots
->fixed_reg
) && ts
->val_type
!= TEMP_VAL_REG
)
1948 || ts
->val_type
== TEMP_VAL_MEM
) {
1949 temp_load(s
, ts
, tcg_target_available_regs
[itype
], allocated_regs
);
1952 if (IS_DEAD_ARG(0) && !ots
->fixed_reg
) {
1953 /* mov to a non-saved dead register makes no sense (even with
1954 liveness analysis disabled). */
1955 assert(NEED_SYNC_ARG(0));
1956 /* The code above should have moved the temp to a register. */
1957 assert(ts
->val_type
== TEMP_VAL_REG
);
1958 if (!ots
->mem_allocated
) {
1959 temp_allocate_frame(s
, args
[0]);
1961 if (ots
->indirect_reg
) {
1962 tcg_regset_set_reg(allocated_regs
, ts
->reg
);
1963 temp_load(s
, ots
->mem_base
,
1964 tcg_target_available_regs
[TCG_TYPE_PTR
],
1967 tcg_out_st(s
, otype
, ts
->reg
, ots
->mem_base
->reg
, ots
->mem_offset
);
1968 if (IS_DEAD_ARG(1)) {
1972 } else if (ts
->val_type
== TEMP_VAL_CONST
) {
1973 /* propagate constant */
1974 if (ots
->val_type
== TEMP_VAL_REG
) {
1975 s
->reg_to_temp
[ots
->reg
] = NULL
;
1977 ots
->val_type
= TEMP_VAL_CONST
;
1979 if (IS_DEAD_ARG(1)) {
1983 /* The code in the first if block should have moved the
1984 temp to a register. */
1985 assert(ts
->val_type
== TEMP_VAL_REG
);
1986 if (IS_DEAD_ARG(1) && !ts
->fixed_reg
&& !ots
->fixed_reg
) {
1987 /* the mov can be suppressed */
1988 if (ots
->val_type
== TEMP_VAL_REG
) {
1989 s
->reg_to_temp
[ots
->reg
] = NULL
;
1994 if (ots
->val_type
!= TEMP_VAL_REG
) {
1995 /* When allocating a new register, make sure to not spill the
1997 tcg_regset_set_reg(allocated_regs
, ts
->reg
);
1998 ots
->reg
= tcg_reg_alloc(s
, tcg_target_available_regs
[otype
],
1999 allocated_regs
, ots
->indirect_base
);
2001 tcg_out_mov(s
, otype
, ots
->reg
, ts
->reg
);
2003 ots
->val_type
= TEMP_VAL_REG
;
2004 ots
->mem_coherent
= 0;
2005 s
->reg_to_temp
[ots
->reg
] = ots
;
2006 if (NEED_SYNC_ARG(0)) {
2007 tcg_reg_sync(s
, ots
->reg
, allocated_regs
);
2012 static void tcg_reg_alloc_op(TCGContext
*s
,
2013 const TCGOpDef
*def
, TCGOpcode opc
,
2014 const TCGArg
*args
, uint16_t dead_args
,
2017 TCGRegSet allocated_regs
;
2018 int i
, k
, nb_iargs
, nb_oargs
;
2021 const TCGArgConstraint
*arg_ct
;
2023 TCGArg new_args
[TCG_MAX_OP_ARGS
];
2024 int const_args
[TCG_MAX_OP_ARGS
];
2026 nb_oargs
= def
->nb_oargs
;
2027 nb_iargs
= def
->nb_iargs
;
2029 /* copy constants */
2030 memcpy(new_args
+ nb_oargs
+ nb_iargs
,
2031 args
+ nb_oargs
+ nb_iargs
,
2032 sizeof(TCGArg
) * def
->nb_cargs
);
2034 /* satisfy input constraints */
2035 tcg_regset_set(allocated_regs
, s
->reserved_regs
);
2036 for(k
= 0; k
< nb_iargs
; k
++) {
2037 i
= def
->sorted_args
[nb_oargs
+ k
];
2039 arg_ct
= &def
->args_ct
[i
];
2040 ts
= &s
->temps
[arg
];
2042 if (ts
->val_type
== TEMP_VAL_CONST
2043 && tcg_target_const_match(ts
->val
, ts
->type
, arg_ct
)) {
2044 /* constant is OK for instruction */
2046 new_args
[i
] = ts
->val
;
2050 temp_load(s
, ts
, arg_ct
->u
.regs
, allocated_regs
);
2052 if (arg_ct
->ct
& TCG_CT_IALIAS
) {
2053 if (ts
->fixed_reg
) {
2054 /* if fixed register, we must allocate a new register
2055 if the alias is not the same register */
2056 if (arg
!= args
[arg_ct
->alias_index
])
2057 goto allocate_in_reg
;
2059 /* if the input is aliased to an output and if it is
2060 not dead after the instruction, we must allocate
2061 a new register and move it */
2062 if (!IS_DEAD_ARG(i
)) {
2063 goto allocate_in_reg
;
2065 /* check if the current register has already been allocated
2066 for another input aliased to an output */
2068 for (k2
= 0 ; k2
< k
; k2
++) {
2069 i2
= def
->sorted_args
[nb_oargs
+ k2
];
2070 if ((def
->args_ct
[i2
].ct
& TCG_CT_IALIAS
) &&
2071 (new_args
[i2
] == ts
->reg
)) {
2072 goto allocate_in_reg
;
2078 if (tcg_regset_test_reg(arg_ct
->u
.regs
, reg
)) {
2079 /* nothing to do : the constraint is satisfied */
2082 /* allocate a new register matching the constraint
2083 and move the temporary register into it */
2084 reg
= tcg_reg_alloc(s
, arg_ct
->u
.regs
, allocated_regs
,
2086 tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
);
2090 tcg_regset_set_reg(allocated_regs
, reg
);
2094 /* mark dead temporaries and free the associated registers */
2095 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
2096 if (IS_DEAD_ARG(i
)) {
2097 temp_dead(s
, &s
->temps
[args
[i
]]);
2101 if (def
->flags
& TCG_OPF_BB_END
) {
2102 tcg_reg_alloc_bb_end(s
, allocated_regs
);
2104 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
2105 /* XXX: permit generic clobber register list ? */
2106 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
2107 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
2108 tcg_reg_free(s
, i
, allocated_regs
);
2112 if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
2113 /* sync globals if the op has side effects and might trigger
2115 sync_globals(s
, allocated_regs
);
2118 /* satisfy the output constraints */
2119 tcg_regset_set(allocated_regs
, s
->reserved_regs
);
2120 for(k
= 0; k
< nb_oargs
; k
++) {
2121 i
= def
->sorted_args
[k
];
2123 arg_ct
= &def
->args_ct
[i
];
2124 ts
= &s
->temps
[arg
];
2125 if (arg_ct
->ct
& TCG_CT_ALIAS
) {
2126 reg
= new_args
[arg_ct
->alias_index
];
2128 /* if fixed register, we try to use it */
2130 if (ts
->fixed_reg
&&
2131 tcg_regset_test_reg(arg_ct
->u
.regs
, reg
)) {
2134 reg
= tcg_reg_alloc(s
, arg_ct
->u
.regs
, allocated_regs
,
2137 tcg_regset_set_reg(allocated_regs
, reg
);
2138 /* if a fixed register is used, then a move will be done afterwards */
2139 if (!ts
->fixed_reg
) {
2140 if (ts
->val_type
== TEMP_VAL_REG
) {
2141 s
->reg_to_temp
[ts
->reg
] = NULL
;
2143 ts
->val_type
= TEMP_VAL_REG
;
2145 /* temp value is modified, so the value kept in memory is
2146 potentially not the same */
2147 ts
->mem_coherent
= 0;
2148 s
->reg_to_temp
[reg
] = ts
;
2155 /* emit instruction */
2156 tcg_out_op(s
, opc
, new_args
, const_args
);
2158 /* move the outputs in the correct register if needed */
2159 for(i
= 0; i
< nb_oargs
; i
++) {
2160 ts
= &s
->temps
[args
[i
]];
2162 if (ts
->fixed_reg
&& ts
->reg
!= reg
) {
2163 tcg_out_mov(s
, ts
->type
, ts
->reg
, reg
);
2165 if (NEED_SYNC_ARG(i
)) {
2166 tcg_reg_sync(s
, reg
, allocated_regs
);
2168 if (IS_DEAD_ARG(i
)) {
2174 #ifdef TCG_TARGET_STACK_GROWSUP
2175 #define STACK_DIR(x) (-(x))
2177 #define STACK_DIR(x) (x)
2180 static void tcg_reg_alloc_call(TCGContext
*s
, int nb_oargs
, int nb_iargs
,
2181 const TCGArg
* const args
, uint16_t dead_args
,
2184 int flags
, nb_regs
, i
;
2188 intptr_t stack_offset
;
2189 size_t call_stack_size
;
2190 tcg_insn_unit
*func_addr
;
2192 TCGRegSet allocated_regs
;
2194 func_addr
= (tcg_insn_unit
*)(intptr_t)args
[nb_oargs
+ nb_iargs
];
2195 flags
= args
[nb_oargs
+ nb_iargs
+ 1];
2197 nb_regs
= ARRAY_SIZE(tcg_target_call_iarg_regs
);
2198 if (nb_regs
> nb_iargs
) {
2202 /* assign stack slots first */
2203 call_stack_size
= (nb_iargs
- nb_regs
) * sizeof(tcg_target_long
);
2204 call_stack_size
= (call_stack_size
+ TCG_TARGET_STACK_ALIGN
- 1) &
2205 ~(TCG_TARGET_STACK_ALIGN
- 1);
2206 allocate_args
= (call_stack_size
> TCG_STATIC_CALL_ARGS_SIZE
);
2207 if (allocate_args
) {
2208 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2209 preallocate call stack */
2213 stack_offset
= TCG_TARGET_CALL_STACK_OFFSET
;
2214 for(i
= nb_regs
; i
< nb_iargs
; i
++) {
2215 arg
= args
[nb_oargs
+ i
];
2216 #ifdef TCG_TARGET_STACK_GROWSUP
2217 stack_offset
-= sizeof(tcg_target_long
);
2219 if (arg
!= TCG_CALL_DUMMY_ARG
) {
2220 ts
= &s
->temps
[arg
];
2221 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
2223 tcg_out_st(s
, ts
->type
, ts
->reg
, TCG_REG_CALL_STACK
, stack_offset
);
2225 #ifndef TCG_TARGET_STACK_GROWSUP
2226 stack_offset
+= sizeof(tcg_target_long
);
2230 /* assign input registers */
2231 tcg_regset_set(allocated_regs
, s
->reserved_regs
);
2232 for(i
= 0; i
< nb_regs
; i
++) {
2233 arg
= args
[nb_oargs
+ i
];
2234 if (arg
!= TCG_CALL_DUMMY_ARG
) {
2235 ts
= &s
->temps
[arg
];
2236 reg
= tcg_target_call_iarg_regs
[i
];
2237 tcg_reg_free(s
, reg
, allocated_regs
);
2239 if (ts
->val_type
== TEMP_VAL_REG
) {
2240 if (ts
->reg
!= reg
) {
2241 tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
);
2246 tcg_regset_clear(arg_set
);
2247 tcg_regset_set_reg(arg_set
, reg
);
2248 temp_load(s
, ts
, arg_set
, allocated_regs
);
2251 tcg_regset_set_reg(allocated_regs
, reg
);
2255 /* mark dead temporaries and free the associated registers */
2256 for(i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
2257 if (IS_DEAD_ARG(i
)) {
2258 temp_dead(s
, &s
->temps
[args
[i
]]);
2262 /* clobber call registers */
2263 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
2264 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
2265 tcg_reg_free(s
, i
, allocated_regs
);
2269 /* Save globals if they might be written by the helper, sync them if
2270 they might be read. */
2271 if (flags
& TCG_CALL_NO_READ_GLOBALS
) {
2273 } else if (flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
2274 sync_globals(s
, allocated_regs
);
2276 save_globals(s
, allocated_regs
);
2279 tcg_out_call(s
, func_addr
);
2281 /* assign output registers and emit moves if needed */
2282 for(i
= 0; i
< nb_oargs
; i
++) {
2284 ts
= &s
->temps
[arg
];
2285 reg
= tcg_target_call_oarg_regs
[i
];
2286 assert(s
->reg_to_temp
[reg
] == NULL
);
2288 if (ts
->fixed_reg
) {
2289 if (ts
->reg
!= reg
) {
2290 tcg_out_mov(s
, ts
->type
, ts
->reg
, reg
);
2293 if (ts
->val_type
== TEMP_VAL_REG
) {
2294 s
->reg_to_temp
[ts
->reg
] = NULL
;
2296 ts
->val_type
= TEMP_VAL_REG
;
2298 ts
->mem_coherent
= 0;
2299 s
->reg_to_temp
[reg
] = ts
;
2300 if (NEED_SYNC_ARG(i
)) {
2301 tcg_reg_sync(s
, reg
, allocated_regs
);
2303 if (IS_DEAD_ARG(i
)) {
2310 #ifdef CONFIG_PROFILER
2312 static int64_t tcg_table_op_count
[NB_OPS
];
2314 void tcg_dump_op_count(FILE *f
, fprintf_function cpu_fprintf
)
2318 for (i
= 0; i
< NB_OPS
; i
++) {
2319 cpu_fprintf(f
, "%s %" PRId64
"\n", tcg_op_defs
[i
].name
,
2320 tcg_table_op_count
[i
]);
2324 void tcg_dump_op_count(FILE *f
, fprintf_function cpu_fprintf
)
2326 cpu_fprintf(f
, "[TCG profiler not compiled]\n");
2331 int tcg_gen_code(TCGContext
*s
, tcg_insn_unit
*gen_code_buf
)
2333 int i
, oi
, oi_next
, num_insns
;
2335 #ifdef CONFIG_PROFILER
2339 n
= s
->gen_last_op_idx
+ 1;
2341 if (n
> s
->op_count_max
) {
2342 s
->op_count_max
= n
;
2347 if (n
> s
->temp_count_max
) {
2348 s
->temp_count_max
= n
;
2354 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP
))) {
2361 #ifdef CONFIG_PROFILER
2362 s
->opt_time
-= profile_getclock();
2365 #ifdef USE_TCG_OPTIMIZATIONS
2369 #ifdef CONFIG_PROFILER
2370 s
->opt_time
+= profile_getclock();
2371 s
->la_time
-= profile_getclock();
2374 tcg_liveness_analysis(s
);
2376 #ifdef CONFIG_PROFILER
2377 s
->la_time
+= profile_getclock();
2381 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT
))) {
2382 qemu_log("OP after optimization and liveness analysis:\n");
2388 tcg_reg_alloc_start(s
);
2390 s
->code_buf
= gen_code_buf
;
2391 s
->code_ptr
= gen_code_buf
;
2396 for (oi
= s
->gen_first_op_idx
; oi
>= 0; oi
= oi_next
) {
2397 TCGOp
* const op
= &s
->gen_op_buf
[oi
];
2398 TCGArg
* const args
= &s
->gen_opparam_buf
[op
->args
];
2399 TCGOpcode opc
= op
->opc
;
2400 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
2401 uint16_t dead_args
= s
->op_dead_args
[oi
];
2402 uint8_t sync_args
= s
->op_sync_args
[oi
];
2405 #ifdef CONFIG_PROFILER
2406 tcg_table_op_count
[opc
]++;
2410 case INDEX_op_mov_i32
:
2411 case INDEX_op_mov_i64
:
2412 tcg_reg_alloc_mov(s
, def
, args
, dead_args
, sync_args
);
2414 case INDEX_op_movi_i32
:
2415 case INDEX_op_movi_i64
:
2416 tcg_reg_alloc_movi(s
, args
, dead_args
, sync_args
);
2418 case INDEX_op_insn_start
:
2419 if (num_insns
>= 0) {
2420 s
->gen_insn_end_off
[num_insns
] = tcg_current_code_size(s
);
2423 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
2425 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2426 a
= ((target_ulong
)args
[i
* 2 + 1] << 32) | args
[i
* 2];
2430 s
->gen_insn_data
[num_insns
][i
] = a
;
2433 case INDEX_op_discard
:
2434 temp_dead(s
, &s
->temps
[args
[0]]);
2436 case INDEX_op_set_label
:
2437 tcg_reg_alloc_bb_end(s
, s
->reserved_regs
);
2438 tcg_out_label(s
, arg_label(args
[0]), s
->code_ptr
);
2441 tcg_reg_alloc_call(s
, op
->callo
, op
->calli
, args
,
2442 dead_args
, sync_args
);
2445 /* Sanity check that we've not introduced any unhandled opcodes. */
2446 if (def
->flags
& TCG_OPF_NOT_PRESENT
) {
2449 /* Note: in order to speed up the code, it would be much
2450 faster to have specialized register allocator functions for
2451 some common argument patterns */
2452 tcg_reg_alloc_op(s
, def
, opc
, args
, dead_args
, sync_args
);
2458 /* Test for (pending) buffer overflow. The assumption is that any
2459 one operation beginning below the high water mark cannot overrun
2460 the buffer completely. Thus we can test for overflow after
2461 generating code without having to check during generation. */
2462 if (unlikely((void *)s
->code_ptr
> s
->code_gen_highwater
)) {
2466 tcg_debug_assert(num_insns
>= 0);
2467 s
->gen_insn_end_off
[num_insns
] = tcg_current_code_size(s
);
2469 /* Generate TB finalization at the end of block */
2470 if (!tcg_out_tb_finalize(s
)) {
2474 /* flush instruction cache */
2475 flush_icache_range((uintptr_t)s
->code_buf
, (uintptr_t)s
->code_ptr
);
2477 return tcg_current_code_size(s
);
2480 #ifdef CONFIG_PROFILER
2481 void tcg_dump_info(FILE *f
, fprintf_function cpu_fprintf
)
2483 TCGContext
*s
= &tcg_ctx
;
2484 int64_t tb_count
= s
->tb_count
;
2485 int64_t tb_div_count
= tb_count
? tb_count
: 1;
2486 int64_t tot
= s
->interm_time
+ s
->code_time
;
2488 cpu_fprintf(f
, "JIT cycles %" PRId64
" (%0.3f s at 2.4 GHz)\n",
2490 cpu_fprintf(f
, "translated TBs %" PRId64
" (aborted=%" PRId64
" %0.1f%%)\n",
2491 tb_count
, s
->tb_count1
- tb_count
,
2492 (double)(s
->tb_count1
- s
->tb_count
)
2493 / (s
->tb_count1
? s
->tb_count1
: 1) * 100.0);
2494 cpu_fprintf(f
, "avg ops/TB %0.1f max=%d\n",
2495 (double)s
->op_count
/ tb_div_count
, s
->op_count_max
);
2496 cpu_fprintf(f
, "deleted ops/TB %0.2f\n",
2497 (double)s
->del_op_count
/ tb_div_count
);
2498 cpu_fprintf(f
, "avg temps/TB %0.2f max=%d\n",
2499 (double)s
->temp_count
/ tb_div_count
, s
->temp_count_max
);
2500 cpu_fprintf(f
, "avg host code/TB %0.1f\n",
2501 (double)s
->code_out_len
/ tb_div_count
);
2502 cpu_fprintf(f
, "avg search data/TB %0.1f\n",
2503 (double)s
->search_out_len
/ tb_div_count
);
2505 cpu_fprintf(f
, "cycles/op %0.1f\n",
2506 s
->op_count
? (double)tot
/ s
->op_count
: 0);
2507 cpu_fprintf(f
, "cycles/in byte %0.1f\n",
2508 s
->code_in_len
? (double)tot
/ s
->code_in_len
: 0);
2509 cpu_fprintf(f
, "cycles/out byte %0.1f\n",
2510 s
->code_out_len
? (double)tot
/ s
->code_out_len
: 0);
2511 cpu_fprintf(f
, "cycles/search byte %0.1f\n",
2512 s
->search_out_len
? (double)tot
/ s
->search_out_len
: 0);
2516 cpu_fprintf(f
, " gen_interm time %0.1f%%\n",
2517 (double)s
->interm_time
/ tot
* 100.0);
2518 cpu_fprintf(f
, " gen_code time %0.1f%%\n",
2519 (double)s
->code_time
/ tot
* 100.0);
2520 cpu_fprintf(f
, "optim./code time %0.1f%%\n",
2521 (double)s
->opt_time
/ (s
->code_time
? s
->code_time
: 1)
2523 cpu_fprintf(f
, "liveness/code time %0.1f%%\n",
2524 (double)s
->la_time
/ (s
->code_time
? s
->code_time
: 1) * 100.0);
2525 cpu_fprintf(f
, "cpu_restore count %" PRId64
"\n",
2527 cpu_fprintf(f
, " avg cycles %0.1f\n",
2528 s
->restore_count
? (double)s
->restore_time
/ s
->restore_count
: 0);
2531 void tcg_dump_info(FILE *f
, fprintf_function cpu_fprintf
)
2533 cpu_fprintf(f
, "[TCG profiler not compiled]\n");
2537 #ifdef ELF_HOST_MACHINE
2538 /* In order to use this feature, the backend needs to do three things:
2540 (1) Define ELF_HOST_MACHINE to indicate both what value to
2541 put into the ELF image and to indicate support for the feature.
2543 (2) Define tcg_register_jit. This should create a buffer containing
2544 the contents of a .debug_frame section that describes the post-
2545 prologue unwind info for the tcg machine.
2547 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
2550 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
2557 struct jit_code_entry
{
2558 struct jit_code_entry
*next_entry
;
2559 struct jit_code_entry
*prev_entry
;
2560 const void *symfile_addr
;
2561 uint64_t symfile_size
;
2564 struct jit_descriptor
{
2566 uint32_t action_flag
;
2567 struct jit_code_entry
*relevant_entry
;
2568 struct jit_code_entry
*first_entry
;
2571 void __jit_debug_register_code(void) __attribute__((noinline
));
2572 void __jit_debug_register_code(void)
2577 /* Must statically initialize the version, because GDB may check
2578 the version before we can set it. */
2579 struct jit_descriptor __jit_debug_descriptor
= { 1, 0, 0, 0 };
2581 /* End GDB interface. */
2583 static int find_string(const char *strtab
, const char *str
)
2585 const char *p
= strtab
+ 1;
2588 if (strcmp(p
, str
) == 0) {
2595 static void tcg_register_jit_int(void *buf_ptr
, size_t buf_size
,
2596 const void *debug_frame
,
2597 size_t debug_frame_size
)
2599 struct __attribute__((packed
)) DebugInfo
{
2606 uintptr_t cu_low_pc
;
2607 uintptr_t cu_high_pc
;
2610 uintptr_t fn_low_pc
;
2611 uintptr_t fn_high_pc
;
2620 struct DebugInfo di
;
2625 struct ElfImage
*img
;
2627 static const struct ElfImage img_template
= {
2629 .e_ident
[EI_MAG0
] = ELFMAG0
,
2630 .e_ident
[EI_MAG1
] = ELFMAG1
,
2631 .e_ident
[EI_MAG2
] = ELFMAG2
,
2632 .e_ident
[EI_MAG3
] = ELFMAG3
,
2633 .e_ident
[EI_CLASS
] = ELF_CLASS
,
2634 .e_ident
[EI_DATA
] = ELF_DATA
,
2635 .e_ident
[EI_VERSION
] = EV_CURRENT
,
2637 .e_machine
= ELF_HOST_MACHINE
,
2638 .e_version
= EV_CURRENT
,
2639 .e_phoff
= offsetof(struct ElfImage
, phdr
),
2640 .e_shoff
= offsetof(struct ElfImage
, shdr
),
2641 .e_ehsize
= sizeof(ElfW(Shdr
)),
2642 .e_phentsize
= sizeof(ElfW(Phdr
)),
2644 .e_shentsize
= sizeof(ElfW(Shdr
)),
2645 .e_shnum
= ARRAY_SIZE(img
->shdr
),
2646 .e_shstrndx
= ARRAY_SIZE(img
->shdr
) - 1,
2647 #ifdef ELF_HOST_FLAGS
2648 .e_flags
= ELF_HOST_FLAGS
,
2651 .e_ident
[EI_OSABI
] = ELF_OSABI
,
2659 [0] = { .sh_type
= SHT_NULL
},
2660 /* Trick: The contents of code_gen_buffer are not present in
2661 this fake ELF file; that got allocated elsewhere. Therefore
2662 we mark .text as SHT_NOBITS (similar to .bss) so that readers
2663 will not look for contents. We can record any address. */
2665 .sh_type
= SHT_NOBITS
,
2666 .sh_flags
= SHF_EXECINSTR
| SHF_ALLOC
,
2668 [2] = { /* .debug_info */
2669 .sh_type
= SHT_PROGBITS
,
2670 .sh_offset
= offsetof(struct ElfImage
, di
),
2671 .sh_size
= sizeof(struct DebugInfo
),
2673 [3] = { /* .debug_abbrev */
2674 .sh_type
= SHT_PROGBITS
,
2675 .sh_offset
= offsetof(struct ElfImage
, da
),
2676 .sh_size
= sizeof(img
->da
),
2678 [4] = { /* .debug_frame */
2679 .sh_type
= SHT_PROGBITS
,
2680 .sh_offset
= sizeof(struct ElfImage
),
2682 [5] = { /* .symtab */
2683 .sh_type
= SHT_SYMTAB
,
2684 .sh_offset
= offsetof(struct ElfImage
, sym
),
2685 .sh_size
= sizeof(img
->sym
),
2687 .sh_link
= ARRAY_SIZE(img
->shdr
) - 1,
2688 .sh_entsize
= sizeof(ElfW(Sym
)),
2690 [6] = { /* .strtab */
2691 .sh_type
= SHT_STRTAB
,
2692 .sh_offset
= offsetof(struct ElfImage
, str
),
2693 .sh_size
= sizeof(img
->str
),
2697 [1] = { /* code_gen_buffer */
2698 .st_info
= ELF_ST_INFO(STB_GLOBAL
, STT_FUNC
),
2703 .len
= sizeof(struct DebugInfo
) - 4,
2705 .ptr_size
= sizeof(void *),
2707 .cu_lang
= 0x8001, /* DW_LANG_Mips_Assembler */
2709 .fn_name
= "code_gen_buffer"
2712 1, /* abbrev number (the cu) */
2713 0x11, 1, /* DW_TAG_compile_unit, has children */
2714 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
2715 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2716 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2717 0, 0, /* end of abbrev */
2718 2, /* abbrev number (the fn) */
2719 0x2e, 0, /* DW_TAG_subprogram, no children */
2720 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
2721 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2722 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2723 0, 0, /* end of abbrev */
2724 0 /* no more abbrev */
2726 .str
= "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
2727 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
2730 /* We only need a single jit entry; statically allocate it. */
2731 static struct jit_code_entry one_entry
;
2733 uintptr_t buf
= (uintptr_t)buf_ptr
;
2734 size_t img_size
= sizeof(struct ElfImage
) + debug_frame_size
;
2735 DebugFrameHeader
*dfh
;
2737 img
= g_malloc(img_size
);
2738 *img
= img_template
;
2740 img
->phdr
.p_vaddr
= buf
;
2741 img
->phdr
.p_paddr
= buf
;
2742 img
->phdr
.p_memsz
= buf_size
;
2744 img
->shdr
[1].sh_name
= find_string(img
->str
, ".text");
2745 img
->shdr
[1].sh_addr
= buf
;
2746 img
->shdr
[1].sh_size
= buf_size
;
2748 img
->shdr
[2].sh_name
= find_string(img
->str
, ".debug_info");
2749 img
->shdr
[3].sh_name
= find_string(img
->str
, ".debug_abbrev");
2751 img
->shdr
[4].sh_name
= find_string(img
->str
, ".debug_frame");
2752 img
->shdr
[4].sh_size
= debug_frame_size
;
2754 img
->shdr
[5].sh_name
= find_string(img
->str
, ".symtab");
2755 img
->shdr
[6].sh_name
= find_string(img
->str
, ".strtab");
2757 img
->sym
[1].st_name
= find_string(img
->str
, "code_gen_buffer");
2758 img
->sym
[1].st_value
= buf
;
2759 img
->sym
[1].st_size
= buf_size
;
2761 img
->di
.cu_low_pc
= buf
;
2762 img
->di
.cu_high_pc
= buf
+ buf_size
;
2763 img
->di
.fn_low_pc
= buf
;
2764 img
->di
.fn_high_pc
= buf
+ buf_size
;
2766 dfh
= (DebugFrameHeader
*)(img
+ 1);
2767 memcpy(dfh
, debug_frame
, debug_frame_size
);
2768 dfh
->fde
.func_start
= buf
;
2769 dfh
->fde
.func_len
= buf_size
;
2772 /* Enable this block to be able to debug the ELF image file creation.
2773 One can use readelf, objdump, or other inspection utilities. */
2775 FILE *f
= fopen("/tmp/qemu.jit", "w+b");
2777 if (fwrite(img
, img_size
, 1, f
) != img_size
) {
2778 /* Avoid stupid unused return value warning for fwrite. */
2785 one_entry
.symfile_addr
= img
;
2786 one_entry
.symfile_size
= img_size
;
2788 __jit_debug_descriptor
.action_flag
= JIT_REGISTER_FN
;
2789 __jit_debug_descriptor
.relevant_entry
= &one_entry
;
2790 __jit_debug_descriptor
.first_entry
= &one_entry
;
2791 __jit_debug_register_code();
2794 /* No support for the feature. Provide the entry point expected by exec.c,
2795 and implement the internal function we declared earlier. */
2797 static void tcg_register_jit_int(void *buf
, size_t size
,
2798 const void *debug_frame
,
2799 size_t debug_frame_size
)
2803 void tcg_register_jit(void *buf
, size_t buf_size
)
2806 #endif /* ELF_HOST_MACHINE */