2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
43 #define NO_CPU_IO_DEFS
46 #include "exec/exec-all.h"
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
52 #include "tcg/tcg-op.h"
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS ELFCLASS32
57 # define ELF_CLASS ELFCLASS64
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA ELFDATA2MSB
62 # define ELF_DATA ELFDATA2LSB
67 #include "sysemu/sysemu.h"
69 /* Forward declarations for functions declared in tcg-target.c.inc and
71 static void tcg_target_init(TCGContext
*s
);
72 static const TCGTargetOpDef
*tcg_target_op_def(TCGOpcode
);
73 static void tcg_target_qemu_prologue(TCGContext
*s
);
74 static bool patch_reloc(tcg_insn_unit
*code_ptr
, int type
,
75 intptr_t value
, intptr_t addend
);
77 /* The CIE and FDE header definitions will be common to all hosts. */
79 uint32_t len
__attribute__((aligned((sizeof(void *)))));
85 uint8_t return_column
;
88 typedef struct QEMU_PACKED
{
89 uint32_t len
__attribute__((aligned((sizeof(void *)))));
93 } DebugFrameFDEHeader
;
95 typedef struct QEMU_PACKED
{
97 DebugFrameFDEHeader fde
;
100 static void tcg_register_jit_int(const void *buf
, size_t size
,
101 const void *debug_frame
,
102 size_t debug_frame_size
)
103 __attribute__((unused
));
105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
106 static const char *target_parse_constraint(TCGArgConstraint
*ct
,
107 const char *ct_str
, TCGType type
);
108 static void tcg_out_ld(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg1
,
110 static bool tcg_out_mov(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg
);
111 static void tcg_out_movi(TCGContext
*s
, TCGType type
,
112 TCGReg ret
, tcg_target_long arg
);
113 static void tcg_out_op(TCGContext
*s
, TCGOpcode opc
, const TCGArg
*args
,
114 const int *const_args
);
115 #if TCG_TARGET_MAYBE_vec
116 static bool tcg_out_dup_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
117 TCGReg dst
, TCGReg src
);
118 static bool tcg_out_dupm_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
119 TCGReg dst
, TCGReg base
, intptr_t offset
);
120 static void tcg_out_dupi_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
121 TCGReg dst
, int64_t arg
);
122 static void tcg_out_vec_op(TCGContext
*s
, TCGOpcode opc
, unsigned vecl
,
123 unsigned vece
, const TCGArg
*args
,
124 const int *const_args
);
126 static inline bool tcg_out_dup_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
127 TCGReg dst
, TCGReg src
)
129 g_assert_not_reached();
131 static inline bool tcg_out_dupm_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
132 TCGReg dst
, TCGReg base
, intptr_t offset
)
134 g_assert_not_reached();
136 static inline void tcg_out_dupi_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
137 TCGReg dst
, int64_t arg
)
139 g_assert_not_reached();
141 static inline void tcg_out_vec_op(TCGContext
*s
, TCGOpcode opc
, unsigned vecl
,
142 unsigned vece
, const TCGArg
*args
,
143 const int *const_args
)
145 g_assert_not_reached();
148 static void tcg_out_st(TCGContext
*s
, TCGType type
, TCGReg arg
, TCGReg arg1
,
150 static bool tcg_out_sti(TCGContext
*s
, TCGType type
, TCGArg val
,
151 TCGReg base
, intptr_t ofs
);
152 static void tcg_out_call(TCGContext
*s
, const tcg_insn_unit
*target
);
153 static int tcg_target_const_match(tcg_target_long val
, TCGType type
,
154 const TCGArgConstraint
*arg_ct
);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext
*s
);
159 #define TCG_HIGHWATER 1024
161 static TCGContext
**tcg_ctxs
;
162 static unsigned int n_tcg_ctxs
;
163 TCGv_env cpu_env
= 0;
164 const void *tcg_code_gen_epilogue
;
165 uintptr_t tcg_splitwx_diff
;
167 #ifndef CONFIG_TCG_INTERPRETER
168 tcg_prologue_fn
*tcg_qemu_tb_exec
;
171 struct tcg_region_tree
{
174 /* padding to avoid false sharing is computed at run-time */
178 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
179 * dynamically allocate from as demand dictates. Given appropriate region
180 * sizing, this minimizes flushes even when some TCG threads generate a lot
181 * more code than others.
183 struct tcg_region_state
{
186 /* fields set at init time */
191 size_t size
; /* size of one region */
192 size_t stride
; /* .size + guard size */
194 /* fields protected by the lock */
195 size_t current
; /* current region index */
196 size_t agg_size_full
; /* aggregate size of full regions */
199 static struct tcg_region_state region
;
201 * This is an array of struct tcg_region_tree's, with padding.
202 * We use void * to simplify the computation of region_trees[i]; each
203 * struct is found every tree_size bytes.
205 static void *region_trees
;
206 static size_t tree_size
;
207 static TCGRegSet tcg_target_available_regs
[TCG_TYPE_COUNT
];
208 static TCGRegSet tcg_target_call_clobber_regs
;
210 #if TCG_TARGET_INSN_UNIT_SIZE == 1
211 static __attribute__((unused
)) inline void tcg_out8(TCGContext
*s
, uint8_t v
)
216 static __attribute__((unused
)) inline void tcg_patch8(tcg_insn_unit
*p
,
223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
224 static __attribute__((unused
)) inline void tcg_out16(TCGContext
*s
, uint16_t v
)
226 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
229 tcg_insn_unit
*p
= s
->code_ptr
;
230 memcpy(p
, &v
, sizeof(v
));
231 s
->code_ptr
= p
+ (2 / TCG_TARGET_INSN_UNIT_SIZE
);
235 static __attribute__((unused
)) inline void tcg_patch16(tcg_insn_unit
*p
,
238 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
241 memcpy(p
, &v
, sizeof(v
));
246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
247 static __attribute__((unused
)) inline void tcg_out32(TCGContext
*s
, uint32_t v
)
249 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
252 tcg_insn_unit
*p
= s
->code_ptr
;
253 memcpy(p
, &v
, sizeof(v
));
254 s
->code_ptr
= p
+ (4 / TCG_TARGET_INSN_UNIT_SIZE
);
258 static __attribute__((unused
)) inline void tcg_patch32(tcg_insn_unit
*p
,
261 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
264 memcpy(p
, &v
, sizeof(v
));
269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
270 static __attribute__((unused
)) inline void tcg_out64(TCGContext
*s
, uint64_t v
)
272 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
275 tcg_insn_unit
*p
= s
->code_ptr
;
276 memcpy(p
, &v
, sizeof(v
));
277 s
->code_ptr
= p
+ (8 / TCG_TARGET_INSN_UNIT_SIZE
);
281 static __attribute__((unused
)) inline void tcg_patch64(tcg_insn_unit
*p
,
284 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
287 memcpy(p
, &v
, sizeof(v
));
292 /* label relocation processing */
294 static void tcg_out_reloc(TCGContext
*s
, tcg_insn_unit
*code_ptr
, int type
,
295 TCGLabel
*l
, intptr_t addend
)
297 TCGRelocation
*r
= tcg_malloc(sizeof(TCGRelocation
));
302 QSIMPLEQ_INSERT_TAIL(&l
->relocs
, r
, next
);
305 static void tcg_out_label(TCGContext
*s
, TCGLabel
*l
)
307 tcg_debug_assert(!l
->has_value
);
309 l
->u
.value_ptr
= tcg_splitwx_to_rx(s
->code_ptr
);
312 TCGLabel
*gen_new_label(void)
314 TCGContext
*s
= tcg_ctx
;
315 TCGLabel
*l
= tcg_malloc(sizeof(TCGLabel
));
317 memset(l
, 0, sizeof(TCGLabel
));
318 l
->id
= s
->nb_labels
++;
319 QSIMPLEQ_INIT(&l
->relocs
);
321 QSIMPLEQ_INSERT_TAIL(&s
->labels
, l
, next
);
326 static bool tcg_resolve_relocs(TCGContext
*s
)
330 QSIMPLEQ_FOREACH(l
, &s
->labels
, next
) {
332 uintptr_t value
= l
->u
.value
;
334 QSIMPLEQ_FOREACH(r
, &l
->relocs
, next
) {
335 if (!patch_reloc(r
->ptr
, r
->type
, value
, r
->addend
)) {
343 static void set_jmp_reset_offset(TCGContext
*s
, int which
)
346 * We will check for overflow at the end of the opcode loop in
347 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
349 s
->tb_jmp_reset_offset
[which
] = tcg_current_code_size(s
);
352 #include "tcg-target.c.inc"
354 /* compare a pointer @ptr and a tb_tc @s */
355 static int ptr_cmp_tb_tc(const void *ptr
, const struct tb_tc
*s
)
357 if (ptr
>= s
->ptr
+ s
->size
) {
359 } else if (ptr
< s
->ptr
) {
365 static gint
tb_tc_cmp(gconstpointer ap
, gconstpointer bp
)
367 const struct tb_tc
*a
= ap
;
368 const struct tb_tc
*b
= bp
;
371 * When both sizes are set, we know this isn't a lookup.
372 * This is the most likely case: every TB must be inserted; lookups
373 * are a lot less frequent.
375 if (likely(a
->size
&& b
->size
)) {
376 if (a
->ptr
> b
->ptr
) {
378 } else if (a
->ptr
< b
->ptr
) {
381 /* a->ptr == b->ptr should happen only on deletions */
382 g_assert(a
->size
== b
->size
);
386 * All lookups have either .size field set to 0.
387 * From the glib sources we see that @ap is always the lookup key. However
388 * the docs provide no guarantee, so we just mark this case as likely.
390 if (likely(a
->size
== 0)) {
391 return ptr_cmp_tb_tc(a
->ptr
, b
);
393 return ptr_cmp_tb_tc(b
->ptr
, a
);
396 static void tcg_region_trees_init(void)
400 tree_size
= ROUND_UP(sizeof(struct tcg_region_tree
), qemu_dcache_linesize
);
401 region_trees
= qemu_memalign(qemu_dcache_linesize
, region
.n
* tree_size
);
402 for (i
= 0; i
< region
.n
; i
++) {
403 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
405 qemu_mutex_init(&rt
->lock
);
406 rt
->tree
= g_tree_new(tb_tc_cmp
);
410 static struct tcg_region_tree
*tc_ptr_to_region_tree(const void *cp
)
412 void *p
= tcg_splitwx_to_rw(cp
);
415 if (p
< region
.start_aligned
) {
418 ptrdiff_t offset
= p
- region
.start_aligned
;
420 if (offset
> region
.stride
* (region
.n
- 1)) {
421 region_idx
= region
.n
- 1;
423 region_idx
= offset
/ region
.stride
;
426 return region_trees
+ region_idx
* tree_size
;
429 void tcg_tb_insert(TranslationBlock
*tb
)
431 struct tcg_region_tree
*rt
= tc_ptr_to_region_tree(tb
->tc
.ptr
);
433 qemu_mutex_lock(&rt
->lock
);
434 g_tree_insert(rt
->tree
, &tb
->tc
, tb
);
435 qemu_mutex_unlock(&rt
->lock
);
438 void tcg_tb_remove(TranslationBlock
*tb
)
440 struct tcg_region_tree
*rt
= tc_ptr_to_region_tree(tb
->tc
.ptr
);
442 qemu_mutex_lock(&rt
->lock
);
443 g_tree_remove(rt
->tree
, &tb
->tc
);
444 qemu_mutex_unlock(&rt
->lock
);
448 * Find the TB 'tb' such that
449 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
450 * Return NULL if not found.
452 TranslationBlock
*tcg_tb_lookup(uintptr_t tc_ptr
)
454 struct tcg_region_tree
*rt
= tc_ptr_to_region_tree((void *)tc_ptr
);
455 TranslationBlock
*tb
;
456 struct tb_tc s
= { .ptr
= (void *)tc_ptr
};
458 qemu_mutex_lock(&rt
->lock
);
459 tb
= g_tree_lookup(rt
->tree
, &s
);
460 qemu_mutex_unlock(&rt
->lock
);
464 static void tcg_region_tree_lock_all(void)
468 for (i
= 0; i
< region
.n
; i
++) {
469 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
471 qemu_mutex_lock(&rt
->lock
);
475 static void tcg_region_tree_unlock_all(void)
479 for (i
= 0; i
< region
.n
; i
++) {
480 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
482 qemu_mutex_unlock(&rt
->lock
);
486 void tcg_tb_foreach(GTraverseFunc func
, gpointer user_data
)
490 tcg_region_tree_lock_all();
491 for (i
= 0; i
< region
.n
; i
++) {
492 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
494 g_tree_foreach(rt
->tree
, func
, user_data
);
496 tcg_region_tree_unlock_all();
499 size_t tcg_nb_tbs(void)
504 tcg_region_tree_lock_all();
505 for (i
= 0; i
< region
.n
; i
++) {
506 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
508 nb_tbs
+= g_tree_nnodes(rt
->tree
);
510 tcg_region_tree_unlock_all();
514 static gboolean
tcg_region_tree_traverse(gpointer k
, gpointer v
, gpointer data
)
516 TranslationBlock
*tb
= v
;
522 static void tcg_region_tree_reset_all(void)
526 tcg_region_tree_lock_all();
527 for (i
= 0; i
< region
.n
; i
++) {
528 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
530 g_tree_foreach(rt
->tree
, tcg_region_tree_traverse
, NULL
);
531 /* Increment the refcount first so that destroy acts as a reset */
532 g_tree_ref(rt
->tree
);
533 g_tree_destroy(rt
->tree
);
535 tcg_region_tree_unlock_all();
538 static void tcg_region_bounds(size_t curr_region
, void **pstart
, void **pend
)
542 start
= region
.start_aligned
+ curr_region
* region
.stride
;
543 end
= start
+ region
.size
;
545 if (curr_region
== 0) {
546 start
= region
.start
;
548 if (curr_region
== region
.n
- 1) {
556 static void tcg_region_assign(TCGContext
*s
, size_t curr_region
)
560 tcg_region_bounds(curr_region
, &start
, &end
);
562 s
->code_gen_buffer
= start
;
563 s
->code_gen_ptr
= start
;
564 s
->code_gen_buffer_size
= end
- start
;
565 s
->code_gen_highwater
= end
- TCG_HIGHWATER
;
568 static bool tcg_region_alloc__locked(TCGContext
*s
)
570 if (region
.current
== region
.n
) {
573 tcg_region_assign(s
, region
.current
);
579 * Request a new region once the one in use has filled up.
580 * Returns true on error.
582 static bool tcg_region_alloc(TCGContext
*s
)
585 /* read the region size now; alloc__locked will overwrite it on success */
586 size_t size_full
= s
->code_gen_buffer_size
;
588 qemu_mutex_lock(®ion
.lock
);
589 err
= tcg_region_alloc__locked(s
);
591 region
.agg_size_full
+= size_full
- TCG_HIGHWATER
;
593 qemu_mutex_unlock(®ion
.lock
);
598 * Perform a context's first region allocation.
599 * This function does _not_ increment region.agg_size_full.
601 static inline bool tcg_region_initial_alloc__locked(TCGContext
*s
)
603 return tcg_region_alloc__locked(s
);
606 /* Call from a safe-work context */
607 void tcg_region_reset_all(void)
609 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
612 qemu_mutex_lock(®ion
.lock
);
614 region
.agg_size_full
= 0;
616 for (i
= 0; i
< n_ctxs
; i
++) {
617 TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
618 bool err
= tcg_region_initial_alloc__locked(s
);
622 qemu_mutex_unlock(®ion
.lock
);
624 tcg_region_tree_reset_all();
627 #ifdef CONFIG_USER_ONLY
628 static size_t tcg_n_regions(void)
634 * It is likely that some vCPUs will translate more code than others, so we
635 * first try to set more regions than max_cpus, with those regions being of
636 * reasonable size. If that's not possible we make do by evenly dividing
637 * the code_gen_buffer among the vCPUs.
639 static size_t tcg_n_regions(void)
643 /* Use a single region if all we have is one vCPU thread */
644 #if !defined(CONFIG_USER_ONLY)
645 MachineState
*ms
= MACHINE(qdev_get_machine());
646 unsigned int max_cpus
= ms
->smp
.max_cpus
;
648 if (max_cpus
== 1 || !qemu_tcg_mttcg_enabled()) {
652 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
653 for (i
= 8; i
> 0; i
--) {
654 size_t regions_per_thread
= i
;
657 region_size
= tcg_init_ctx
.code_gen_buffer_size
;
658 region_size
/= max_cpus
* regions_per_thread
;
660 if (region_size
>= 2 * 1024u * 1024) {
661 return max_cpus
* regions_per_thread
;
664 /* If we can't, then just allocate one region per vCPU thread */
670 * Initializes region partitioning.
672 * Called at init time from the parent thread (i.e. the one calling
673 * tcg_context_init), after the target's TCG globals have been set.
675 * Region partitioning works by splitting code_gen_buffer into separate regions,
676 * and then assigning regions to TCG threads so that the threads can translate
677 * code in parallel without synchronization.
679 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
680 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
681 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
682 * must have been parsed before calling this function, since it calls
683 * qemu_tcg_mttcg_enabled().
685 * In user-mode we use a single region. Having multiple regions in user-mode
686 * is not supported, because the number of vCPU threads (recall that each thread
687 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
688 * OS, and usually this number is huge (tens of thousands is not uncommon).
689 * Thus, given this large bound on the number of vCPU threads and the fact
690 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
691 * that the availability of at least one region per vCPU thread.
693 * However, this user-mode limitation is unlikely to be a significant problem
694 * in practice. Multi-threaded guests share most if not all of their translated
695 * code, which makes parallel code generation less appealing than in softmmu.
697 void tcg_region_init(void)
699 void *buf
= tcg_init_ctx
.code_gen_buffer
;
701 size_t size
= tcg_init_ctx
.code_gen_buffer_size
;
702 size_t page_size
= qemu_real_host_page_size
;
706 uintptr_t splitwx_diff
;
708 n_regions
= tcg_n_regions();
710 /* The first region will be 'aligned - buf' bytes larger than the others */
711 aligned
= QEMU_ALIGN_PTR_UP(buf
, page_size
);
712 g_assert(aligned
< tcg_init_ctx
.code_gen_buffer
+ size
);
714 * Make region_size a multiple of page_size, using aligned as the start.
715 * As a result of this we might end up with a few extra pages at the end of
716 * the buffer; we will assign those to the last region.
718 region_size
= (size
- (aligned
- buf
)) / n_regions
;
719 region_size
= QEMU_ALIGN_DOWN(region_size
, page_size
);
721 /* A region must have at least 2 pages; one code, one guard */
722 g_assert(region_size
>= 2 * page_size
);
724 /* init the region struct */
725 qemu_mutex_init(®ion
.lock
);
726 region
.n
= n_regions
;
727 region
.size
= region_size
- page_size
;
728 region
.stride
= region_size
;
730 region
.start_aligned
= aligned
;
731 /* page-align the end, since its last page will be a guard page */
732 region
.end
= QEMU_ALIGN_PTR_DOWN(buf
+ size
, page_size
);
733 /* account for that last guard page */
734 region
.end
-= page_size
;
736 /* set guard pages */
737 splitwx_diff
= tcg_splitwx_diff
;
738 for (i
= 0; i
< region
.n
; i
++) {
742 tcg_region_bounds(i
, &start
, &end
);
743 rc
= qemu_mprotect_none(end
, page_size
);
746 rc
= qemu_mprotect_none(end
+ splitwx_diff
, page_size
);
751 tcg_region_trees_init();
753 /* In user-mode we support only one ctx, so do the initial allocation now */
754 #ifdef CONFIG_USER_ONLY
756 bool err
= tcg_region_initial_alloc__locked(tcg_ctx
);
763 #ifdef CONFIG_DEBUG_TCG
764 const void *tcg_splitwx_to_rx(void *rw
)
766 /* Pass NULL pointers unchanged. */
768 g_assert(in_code_gen_buffer(rw
));
769 rw
+= tcg_splitwx_diff
;
774 void *tcg_splitwx_to_rw(const void *rx
)
776 /* Pass NULL pointers unchanged. */
778 rx
-= tcg_splitwx_diff
;
779 /* Assert that we end with a pointer in the rw region. */
780 g_assert(in_code_gen_buffer(rx
));
784 #endif /* CONFIG_DEBUG_TCG */
786 static void alloc_tcg_plugin_context(TCGContext
*s
)
789 s
->plugin_tb
= g_new0(struct qemu_plugin_tb
, 1);
790 s
->plugin_tb
->insns
=
791 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn
);
796 * All TCG threads except the parent (i.e. the one that called tcg_context_init
797 * and registered the target's TCG globals) must register with this function
798 * before initiating translation.
800 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
801 * of tcg_region_init() for the reasoning behind this.
803 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
804 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
805 * is not used anymore for translation once this function is called.
807 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
808 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
810 #ifdef CONFIG_USER_ONLY
811 void tcg_register_thread(void)
813 tcg_ctx
= &tcg_init_ctx
;
816 void tcg_register_thread(void)
818 MachineState
*ms
= MACHINE(qdev_get_machine());
819 TCGContext
*s
= g_malloc(sizeof(*s
));
825 /* Relink mem_base. */
826 for (i
= 0, n
= tcg_init_ctx
.nb_globals
; i
< n
; ++i
) {
827 if (tcg_init_ctx
.temps
[i
].mem_base
) {
828 ptrdiff_t b
= tcg_init_ctx
.temps
[i
].mem_base
- tcg_init_ctx
.temps
;
829 tcg_debug_assert(b
>= 0 && b
< n
);
830 s
->temps
[i
].mem_base
= &s
->temps
[b
];
834 /* Claim an entry in tcg_ctxs */
835 n
= qatomic_fetch_inc(&n_tcg_ctxs
);
836 g_assert(n
< ms
->smp
.max_cpus
);
837 qatomic_set(&tcg_ctxs
[n
], s
);
840 alloc_tcg_plugin_context(s
);
844 qemu_mutex_lock(®ion
.lock
);
845 err
= tcg_region_initial_alloc__locked(tcg_ctx
);
847 qemu_mutex_unlock(®ion
.lock
);
849 #endif /* !CONFIG_USER_ONLY */
852 * Returns the size (in bytes) of all translated code (i.e. from all regions)
853 * currently in the cache.
854 * See also: tcg_code_capacity()
855 * Do not confuse with tcg_current_code_size(); that one applies to a single
858 size_t tcg_code_size(void)
860 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
864 qemu_mutex_lock(®ion
.lock
);
865 total
= region
.agg_size_full
;
866 for (i
= 0; i
< n_ctxs
; i
++) {
867 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
870 size
= qatomic_read(&s
->code_gen_ptr
) - s
->code_gen_buffer
;
871 g_assert(size
<= s
->code_gen_buffer_size
);
874 qemu_mutex_unlock(®ion
.lock
);
879 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
881 * See also: tcg_code_size()
883 size_t tcg_code_capacity(void)
885 size_t guard_size
, capacity
;
887 /* no need for synchronization; these variables are set at init time */
888 guard_size
= region
.stride
- region
.size
;
889 capacity
= region
.end
+ guard_size
- region
.start
;
890 capacity
-= region
.n
* (guard_size
+ TCG_HIGHWATER
);
894 size_t tcg_tb_phys_invalidate_count(void)
896 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
900 for (i
= 0; i
< n_ctxs
; i
++) {
901 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
903 total
+= qatomic_read(&s
->tb_phys_invalidate_count
);
908 /* pool based memory allocation */
909 void *tcg_malloc_internal(TCGContext
*s
, int size
)
914 if (size
> TCG_POOL_CHUNK_SIZE
) {
915 /* big malloc: insert a new pool (XXX: could optimize) */
916 p
= g_malloc(sizeof(TCGPool
) + size
);
918 p
->next
= s
->pool_first_large
;
919 s
->pool_first_large
= p
;
930 pool_size
= TCG_POOL_CHUNK_SIZE
;
931 p
= g_malloc(sizeof(TCGPool
) + pool_size
);
935 s
->pool_current
->next
= p
;
944 s
->pool_cur
= p
->data
+ size
;
945 s
->pool_end
= p
->data
+ p
->size
;
949 void tcg_pool_reset(TCGContext
*s
)
952 for (p
= s
->pool_first_large
; p
; p
= t
) {
956 s
->pool_first_large
= NULL
;
957 s
->pool_cur
= s
->pool_end
= NULL
;
958 s
->pool_current
= NULL
;
961 typedef struct TCGHelperInfo
{
968 #include "exec/helper-proto.h"
970 static const TCGHelperInfo all_helpers
[] = {
971 #include "exec/helper-tcg.h"
973 static GHashTable
*helper_table
;
975 static int indirect_reg_alloc_order
[ARRAY_SIZE(tcg_target_reg_alloc_order
)];
976 static void process_op_defs(TCGContext
*s
);
977 static TCGTemp
*tcg_global_reg_new_internal(TCGContext
*s
, TCGType type
,
978 TCGReg reg
, const char *name
);
980 void tcg_context_init(TCGContext
*s
)
982 int op
, total_args
, n
, i
;
984 TCGArgConstraint
*args_ct
;
987 memset(s
, 0, sizeof(*s
));
990 /* Count total number of arguments and allocate the corresponding
993 for(op
= 0; op
< NB_OPS
; op
++) {
994 def
= &tcg_op_defs
[op
];
995 n
= def
->nb_iargs
+ def
->nb_oargs
;
999 args_ct
= g_new0(TCGArgConstraint
, total_args
);
1001 for(op
= 0; op
< NB_OPS
; op
++) {
1002 def
= &tcg_op_defs
[op
];
1003 def
->args_ct
= args_ct
;
1004 n
= def
->nb_iargs
+ def
->nb_oargs
;
1008 /* Register helpers. */
1009 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
1010 helper_table
= g_hash_table_new(NULL
, NULL
);
1012 for (i
= 0; i
< ARRAY_SIZE(all_helpers
); ++i
) {
1013 g_hash_table_insert(helper_table
, (gpointer
)all_helpers
[i
].func
,
1014 (gpointer
)&all_helpers
[i
]);
1020 /* Reverse the order of the saved registers, assuming they're all at
1021 the start of tcg_target_reg_alloc_order. */
1022 for (n
= 0; n
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++n
) {
1023 int r
= tcg_target_reg_alloc_order
[n
];
1024 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, r
)) {
1028 for (i
= 0; i
< n
; ++i
) {
1029 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[n
- 1 - i
];
1031 for (; i
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++i
) {
1032 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[i
];
1035 alloc_tcg_plugin_context(s
);
1039 * In user-mode we simply share the init context among threads, since we
1040 * use a single region. See the documentation tcg_region_init() for the
1041 * reasoning behind this.
1042 * In softmmu we will have at most max_cpus TCG threads.
1044 #ifdef CONFIG_USER_ONLY
1045 tcg_ctxs
= &tcg_ctx
;
1048 MachineState
*ms
= MACHINE(qdev_get_machine());
1049 unsigned int max_cpus
= ms
->smp
.max_cpus
;
1050 tcg_ctxs
= g_new(TCGContext
*, max_cpus
);
1053 tcg_debug_assert(!tcg_regset_test_reg(s
->reserved_regs
, TCG_AREG0
));
1054 ts
= tcg_global_reg_new_internal(s
, TCG_TYPE_PTR
, TCG_AREG0
, "env");
1055 cpu_env
= temp_tcgv_ptr(ts
);
1059 * Allocate TBs right before their corresponding translated code, making
1060 * sure that TBs and code are on different cache lines.
1062 TranslationBlock
*tcg_tb_alloc(TCGContext
*s
)
1064 uintptr_t align
= qemu_icache_linesize
;
1065 TranslationBlock
*tb
;
1069 tb
= (void *)ROUND_UP((uintptr_t)s
->code_gen_ptr
, align
);
1070 next
= (void *)ROUND_UP((uintptr_t)(tb
+ 1), align
);
1072 if (unlikely(next
> s
->code_gen_highwater
)) {
1073 if (tcg_region_alloc(s
)) {
1078 qatomic_set(&s
->code_gen_ptr
, next
);
1079 s
->data_gen_ptr
= NULL
;
1083 void tcg_prologue_init(TCGContext
*s
)
1085 size_t prologue_size
, total_size
;
1088 /* Put the prologue at the beginning of code_gen_buffer. */
1089 buf0
= s
->code_gen_buffer
;
1090 total_size
= s
->code_gen_buffer_size
;
1093 s
->data_gen_ptr
= NULL
;
1096 * The region trees are not yet configured, but tcg_splitwx_to_rx
1097 * needs the bounds for an assert.
1099 region
.start
= buf0
;
1100 region
.end
= buf0
+ total_size
;
1102 #ifndef CONFIG_TCG_INTERPRETER
1103 tcg_qemu_tb_exec
= (tcg_prologue_fn
*)tcg_splitwx_to_rx(buf0
);
1106 /* Compute a high-water mark, at which we voluntarily flush the buffer
1107 and start over. The size here is arbitrary, significantly larger
1108 than we expect the code generation for any one opcode to require. */
1109 s
->code_gen_highwater
= s
->code_gen_buffer
+ (total_size
- TCG_HIGHWATER
);
1111 #ifdef TCG_TARGET_NEED_POOL_LABELS
1112 s
->pool_labels
= NULL
;
1115 qemu_thread_jit_write();
1116 /* Generate the prologue. */
1117 tcg_target_qemu_prologue(s
);
1119 #ifdef TCG_TARGET_NEED_POOL_LABELS
1120 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1122 int result
= tcg_out_pool_finalize(s
);
1123 tcg_debug_assert(result
== 0);
1128 #ifndef CONFIG_TCG_INTERPRETER
1129 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0
), (uintptr_t)buf0
,
1130 tcg_ptr_byte_diff(buf1
, buf0
));
1133 /* Deduct the prologue from the buffer. */
1134 prologue_size
= tcg_current_code_size(s
);
1135 s
->code_gen_ptr
= buf1
;
1136 s
->code_gen_buffer
= buf1
;
1138 total_size
-= prologue_size
;
1139 s
->code_gen_buffer_size
= total_size
;
1141 tcg_register_jit(tcg_splitwx_to_rx(s
->code_gen_buffer
), total_size
);
1144 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM
)) {
1145 FILE *logfile
= qemu_log_lock();
1146 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size
);
1147 if (s
->data_gen_ptr
) {
1148 size_t code_size
= s
->data_gen_ptr
- buf0
;
1149 size_t data_size
= prologue_size
- code_size
;
1152 log_disas(buf0
, code_size
);
1154 for (i
= 0; i
< data_size
; i
+= sizeof(tcg_target_ulong
)) {
1155 if (sizeof(tcg_target_ulong
) == 8) {
1156 qemu_log("0x%08" PRIxPTR
": .quad 0x%016" PRIx64
"\n",
1157 (uintptr_t)s
->data_gen_ptr
+ i
,
1158 *(uint64_t *)(s
->data_gen_ptr
+ i
));
1160 qemu_log("0x%08" PRIxPTR
": .long 0x%08x\n",
1161 (uintptr_t)s
->data_gen_ptr
+ i
,
1162 *(uint32_t *)(s
->data_gen_ptr
+ i
));
1166 log_disas(buf0
, prologue_size
);
1170 qemu_log_unlock(logfile
);
1174 /* Assert that goto_ptr is implemented completely. */
1175 if (TCG_TARGET_HAS_goto_ptr
) {
1176 tcg_debug_assert(tcg_code_gen_epilogue
!= NULL
);
1180 void tcg_func_start(TCGContext
*s
)
1183 s
->nb_temps
= s
->nb_globals
;
1185 /* No temps have been previously allocated for size or locality. */
1186 memset(s
->free_temps
, 0, sizeof(s
->free_temps
));
1188 /* No constant temps have been previously allocated. */
1189 for (int i
= 0; i
< TCG_TYPE_COUNT
; ++i
) {
1190 if (s
->const_table
[i
]) {
1191 g_hash_table_remove_all(s
->const_table
[i
]);
1197 s
->current_frame_offset
= s
->frame_start
;
1199 #ifdef CONFIG_DEBUG_TCG
1200 s
->goto_tb_issue_mask
= 0;
1203 QTAILQ_INIT(&s
->ops
);
1204 QTAILQ_INIT(&s
->free_ops
);
1205 QSIMPLEQ_INIT(&s
->labels
);
1208 static TCGTemp
*tcg_temp_alloc(TCGContext
*s
)
1210 int n
= s
->nb_temps
++;
1212 if (n
>= TCG_MAX_TEMPS
) {
1213 /* Signal overflow, starting over with fewer guest insns. */
1214 siglongjmp(s
->jmp_trans
, -2);
1216 return memset(&s
->temps
[n
], 0, sizeof(TCGTemp
));
1219 static TCGTemp
*tcg_global_alloc(TCGContext
*s
)
1223 tcg_debug_assert(s
->nb_globals
== s
->nb_temps
);
1224 tcg_debug_assert(s
->nb_globals
< TCG_MAX_TEMPS
);
1226 ts
= tcg_temp_alloc(s
);
1227 ts
->kind
= TEMP_GLOBAL
;
1232 static TCGTemp
*tcg_global_reg_new_internal(TCGContext
*s
, TCGType type
,
1233 TCGReg reg
, const char *name
)
1237 if (TCG_TARGET_REG_BITS
== 32 && type
!= TCG_TYPE_I32
) {
1241 ts
= tcg_global_alloc(s
);
1242 ts
->base_type
= type
;
1244 ts
->kind
= TEMP_FIXED
;
1247 tcg_regset_set_reg(s
->reserved_regs
, reg
);
1252 void tcg_set_frame(TCGContext
*s
, TCGReg reg
, intptr_t start
, intptr_t size
)
1254 s
->frame_start
= start
;
1255 s
->frame_end
= start
+ size
;
1257 = tcg_global_reg_new_internal(s
, TCG_TYPE_PTR
, reg
, "_frame");
1260 TCGTemp
*tcg_global_mem_new_internal(TCGType type
, TCGv_ptr base
,
1261 intptr_t offset
, const char *name
)
1263 TCGContext
*s
= tcg_ctx
;
1264 TCGTemp
*base_ts
= tcgv_ptr_temp(base
);
1265 TCGTemp
*ts
= tcg_global_alloc(s
);
1266 int indirect_reg
= 0, bigendian
= 0;
1267 #ifdef HOST_WORDS_BIGENDIAN
1271 switch (base_ts
->kind
) {
1275 /* We do not support double-indirect registers. */
1276 tcg_debug_assert(!base_ts
->indirect_reg
);
1277 base_ts
->indirect_base
= 1;
1278 s
->nb_indirects
+= (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
1283 g_assert_not_reached();
1286 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1287 TCGTemp
*ts2
= tcg_global_alloc(s
);
1290 ts
->base_type
= TCG_TYPE_I64
;
1291 ts
->type
= TCG_TYPE_I32
;
1292 ts
->indirect_reg
= indirect_reg
;
1293 ts
->mem_allocated
= 1;
1294 ts
->mem_base
= base_ts
;
1295 ts
->mem_offset
= offset
+ bigendian
* 4;
1296 pstrcpy(buf
, sizeof(buf
), name
);
1297 pstrcat(buf
, sizeof(buf
), "_0");
1298 ts
->name
= strdup(buf
);
1300 tcg_debug_assert(ts2
== ts
+ 1);
1301 ts2
->base_type
= TCG_TYPE_I64
;
1302 ts2
->type
= TCG_TYPE_I32
;
1303 ts2
->indirect_reg
= indirect_reg
;
1304 ts2
->mem_allocated
= 1;
1305 ts2
->mem_base
= base_ts
;
1306 ts2
->mem_offset
= offset
+ (1 - bigendian
) * 4;
1307 pstrcpy(buf
, sizeof(buf
), name
);
1308 pstrcat(buf
, sizeof(buf
), "_1");
1309 ts2
->name
= strdup(buf
);
1311 ts
->base_type
= type
;
1313 ts
->indirect_reg
= indirect_reg
;
1314 ts
->mem_allocated
= 1;
1315 ts
->mem_base
= base_ts
;
1316 ts
->mem_offset
= offset
;
1322 TCGTemp
*tcg_temp_new_internal(TCGType type
, bool temp_local
)
1324 TCGContext
*s
= tcg_ctx
;
1325 TCGTempKind kind
= temp_local
? TEMP_LOCAL
: TEMP_NORMAL
;
1329 k
= type
+ (temp_local
? TCG_TYPE_COUNT
: 0);
1330 idx
= find_first_bit(s
->free_temps
[k
].l
, TCG_MAX_TEMPS
);
1331 if (idx
< TCG_MAX_TEMPS
) {
1332 /* There is already an available temp with the right type. */
1333 clear_bit(idx
, s
->free_temps
[k
].l
);
1335 ts
= &s
->temps
[idx
];
1336 ts
->temp_allocated
= 1;
1337 tcg_debug_assert(ts
->base_type
== type
);
1338 tcg_debug_assert(ts
->kind
== kind
);
1340 ts
= tcg_temp_alloc(s
);
1341 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1342 TCGTemp
*ts2
= tcg_temp_alloc(s
);
1344 ts
->base_type
= type
;
1345 ts
->type
= TCG_TYPE_I32
;
1346 ts
->temp_allocated
= 1;
1349 tcg_debug_assert(ts2
== ts
+ 1);
1350 ts2
->base_type
= TCG_TYPE_I64
;
1351 ts2
->type
= TCG_TYPE_I32
;
1352 ts2
->temp_allocated
= 1;
1355 ts
->base_type
= type
;
1357 ts
->temp_allocated
= 1;
1362 #if defined(CONFIG_DEBUG_TCG)
1368 TCGv_vec
tcg_temp_new_vec(TCGType type
)
1372 #ifdef CONFIG_DEBUG_TCG
1375 assert(TCG_TARGET_HAS_v64
);
1378 assert(TCG_TARGET_HAS_v128
);
1381 assert(TCG_TARGET_HAS_v256
);
1384 g_assert_not_reached();
1388 t
= tcg_temp_new_internal(type
, 0);
1389 return temp_tcgv_vec(t
);
1392 /* Create a new temp of the same type as an existing temp. */
1393 TCGv_vec
tcg_temp_new_vec_matching(TCGv_vec match
)
1395 TCGTemp
*t
= tcgv_vec_temp(match
);
1397 tcg_debug_assert(t
->temp_allocated
!= 0);
1399 t
= tcg_temp_new_internal(t
->base_type
, 0);
1400 return temp_tcgv_vec(t
);
1403 void tcg_temp_free_internal(TCGTemp
*ts
)
1405 TCGContext
*s
= tcg_ctx
;
1408 /* In order to simplify users of tcg_constant_*, silently ignore free. */
1409 if (ts
->kind
== TEMP_CONST
) {
1413 #if defined(CONFIG_DEBUG_TCG)
1415 if (s
->temps_in_use
< 0) {
1416 fprintf(stderr
, "More temporaries freed than allocated!\n");
1420 tcg_debug_assert(ts
->kind
< TEMP_GLOBAL
);
1421 tcg_debug_assert(ts
->temp_allocated
!= 0);
1422 ts
->temp_allocated
= 0;
1425 k
= ts
->base_type
+ (ts
->kind
== TEMP_NORMAL
? 0 : TCG_TYPE_COUNT
);
1426 set_bit(idx
, s
->free_temps
[k
].l
);
1429 TCGTemp
*tcg_constant_internal(TCGType type
, int64_t val
)
1431 TCGContext
*s
= tcg_ctx
;
1432 GHashTable
*h
= s
->const_table
[type
];
1436 h
= g_hash_table_new(g_int64_hash
, g_int64_equal
);
1437 s
->const_table
[type
] = h
;
1440 ts
= g_hash_table_lookup(h
, &val
);
1442 ts
= tcg_temp_alloc(s
);
1444 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1445 TCGTemp
*ts2
= tcg_temp_alloc(s
);
1447 ts
->base_type
= TCG_TYPE_I64
;
1448 ts
->type
= TCG_TYPE_I32
;
1449 ts
->kind
= TEMP_CONST
;
1450 ts
->temp_allocated
= 1;
1452 * Retain the full value of the 64-bit constant in the low
1453 * part, so that the hash table works. Actual uses will
1454 * truncate the value to the low part.
1458 tcg_debug_assert(ts2
== ts
+ 1);
1459 ts2
->base_type
= TCG_TYPE_I64
;
1460 ts2
->type
= TCG_TYPE_I32
;
1461 ts2
->kind
= TEMP_CONST
;
1462 ts2
->temp_allocated
= 1;
1463 ts2
->val
= val
>> 32;
1465 ts
->base_type
= type
;
1467 ts
->kind
= TEMP_CONST
;
1468 ts
->temp_allocated
= 1;
1471 g_hash_table_insert(h
, &ts
->val
, ts
);
1477 TCGv_vec
tcg_constant_vec(TCGType type
, unsigned vece
, int64_t val
)
1479 val
= dup_const(vece
, val
);
1480 return temp_tcgv_vec(tcg_constant_internal(type
, val
));
1483 TCGv_vec
tcg_constant_vec_matching(TCGv_vec match
, unsigned vece
, int64_t val
)
1485 TCGTemp
*t
= tcgv_vec_temp(match
);
1487 tcg_debug_assert(t
->temp_allocated
!= 0);
1488 return tcg_constant_vec(t
->base_type
, vece
, val
);
1491 TCGv_i32
tcg_const_i32(int32_t val
)
1494 t0
= tcg_temp_new_i32();
1495 tcg_gen_movi_i32(t0
, val
);
1499 TCGv_i64
tcg_const_i64(int64_t val
)
1502 t0
= tcg_temp_new_i64();
1503 tcg_gen_movi_i64(t0
, val
);
1507 TCGv_i32
tcg_const_local_i32(int32_t val
)
1510 t0
= tcg_temp_local_new_i32();
1511 tcg_gen_movi_i32(t0
, val
);
1515 TCGv_i64
tcg_const_local_i64(int64_t val
)
1518 t0
= tcg_temp_local_new_i64();
1519 tcg_gen_movi_i64(t0
, val
);
1523 #if defined(CONFIG_DEBUG_TCG)
1524 void tcg_clear_temp_count(void)
1526 TCGContext
*s
= tcg_ctx
;
1527 s
->temps_in_use
= 0;
1530 int tcg_check_temp_count(void)
1532 TCGContext
*s
= tcg_ctx
;
1533 if (s
->temps_in_use
) {
1534 /* Clear the count so that we don't give another
1535 * warning immediately next time around.
1537 s
->temps_in_use
= 0;
1544 /* Return true if OP may appear in the opcode stream.
1545 Test the runtime variable that controls each opcode. */
1546 bool tcg_op_supported(TCGOpcode op
)
1549 = TCG_TARGET_HAS_v64
| TCG_TARGET_HAS_v128
| TCG_TARGET_HAS_v256
;
1552 case INDEX_op_discard
:
1553 case INDEX_op_set_label
:
1557 case INDEX_op_insn_start
:
1558 case INDEX_op_exit_tb
:
1559 case INDEX_op_goto_tb
:
1560 case INDEX_op_qemu_ld_i32
:
1561 case INDEX_op_qemu_st_i32
:
1562 case INDEX_op_qemu_ld_i64
:
1563 case INDEX_op_qemu_st_i64
:
1566 case INDEX_op_qemu_st8_i32
:
1567 return TCG_TARGET_HAS_qemu_st8_i32
;
1569 case INDEX_op_goto_ptr
:
1570 return TCG_TARGET_HAS_goto_ptr
;
1572 case INDEX_op_mov_i32
:
1573 case INDEX_op_setcond_i32
:
1574 case INDEX_op_brcond_i32
:
1575 case INDEX_op_ld8u_i32
:
1576 case INDEX_op_ld8s_i32
:
1577 case INDEX_op_ld16u_i32
:
1578 case INDEX_op_ld16s_i32
:
1579 case INDEX_op_ld_i32
:
1580 case INDEX_op_st8_i32
:
1581 case INDEX_op_st16_i32
:
1582 case INDEX_op_st_i32
:
1583 case INDEX_op_add_i32
:
1584 case INDEX_op_sub_i32
:
1585 case INDEX_op_mul_i32
:
1586 case INDEX_op_and_i32
:
1587 case INDEX_op_or_i32
:
1588 case INDEX_op_xor_i32
:
1589 case INDEX_op_shl_i32
:
1590 case INDEX_op_shr_i32
:
1591 case INDEX_op_sar_i32
:
1594 case INDEX_op_movcond_i32
:
1595 return TCG_TARGET_HAS_movcond_i32
;
1596 case INDEX_op_div_i32
:
1597 case INDEX_op_divu_i32
:
1598 return TCG_TARGET_HAS_div_i32
;
1599 case INDEX_op_rem_i32
:
1600 case INDEX_op_remu_i32
:
1601 return TCG_TARGET_HAS_rem_i32
;
1602 case INDEX_op_div2_i32
:
1603 case INDEX_op_divu2_i32
:
1604 return TCG_TARGET_HAS_div2_i32
;
1605 case INDEX_op_rotl_i32
:
1606 case INDEX_op_rotr_i32
:
1607 return TCG_TARGET_HAS_rot_i32
;
1608 case INDEX_op_deposit_i32
:
1609 return TCG_TARGET_HAS_deposit_i32
;
1610 case INDEX_op_extract_i32
:
1611 return TCG_TARGET_HAS_extract_i32
;
1612 case INDEX_op_sextract_i32
:
1613 return TCG_TARGET_HAS_sextract_i32
;
1614 case INDEX_op_extract2_i32
:
1615 return TCG_TARGET_HAS_extract2_i32
;
1616 case INDEX_op_add2_i32
:
1617 return TCG_TARGET_HAS_add2_i32
;
1618 case INDEX_op_sub2_i32
:
1619 return TCG_TARGET_HAS_sub2_i32
;
1620 case INDEX_op_mulu2_i32
:
1621 return TCG_TARGET_HAS_mulu2_i32
;
1622 case INDEX_op_muls2_i32
:
1623 return TCG_TARGET_HAS_muls2_i32
;
1624 case INDEX_op_muluh_i32
:
1625 return TCG_TARGET_HAS_muluh_i32
;
1626 case INDEX_op_mulsh_i32
:
1627 return TCG_TARGET_HAS_mulsh_i32
;
1628 case INDEX_op_ext8s_i32
:
1629 return TCG_TARGET_HAS_ext8s_i32
;
1630 case INDEX_op_ext16s_i32
:
1631 return TCG_TARGET_HAS_ext16s_i32
;
1632 case INDEX_op_ext8u_i32
:
1633 return TCG_TARGET_HAS_ext8u_i32
;
1634 case INDEX_op_ext16u_i32
:
1635 return TCG_TARGET_HAS_ext16u_i32
;
1636 case INDEX_op_bswap16_i32
:
1637 return TCG_TARGET_HAS_bswap16_i32
;
1638 case INDEX_op_bswap32_i32
:
1639 return TCG_TARGET_HAS_bswap32_i32
;
1640 case INDEX_op_not_i32
:
1641 return TCG_TARGET_HAS_not_i32
;
1642 case INDEX_op_neg_i32
:
1643 return TCG_TARGET_HAS_neg_i32
;
1644 case INDEX_op_andc_i32
:
1645 return TCG_TARGET_HAS_andc_i32
;
1646 case INDEX_op_orc_i32
:
1647 return TCG_TARGET_HAS_orc_i32
;
1648 case INDEX_op_eqv_i32
:
1649 return TCG_TARGET_HAS_eqv_i32
;
1650 case INDEX_op_nand_i32
:
1651 return TCG_TARGET_HAS_nand_i32
;
1652 case INDEX_op_nor_i32
:
1653 return TCG_TARGET_HAS_nor_i32
;
1654 case INDEX_op_clz_i32
:
1655 return TCG_TARGET_HAS_clz_i32
;
1656 case INDEX_op_ctz_i32
:
1657 return TCG_TARGET_HAS_ctz_i32
;
1658 case INDEX_op_ctpop_i32
:
1659 return TCG_TARGET_HAS_ctpop_i32
;
1661 case INDEX_op_brcond2_i32
:
1662 case INDEX_op_setcond2_i32
:
1663 return TCG_TARGET_REG_BITS
== 32;
1665 case INDEX_op_mov_i64
:
1666 case INDEX_op_setcond_i64
:
1667 case INDEX_op_brcond_i64
:
1668 case INDEX_op_ld8u_i64
:
1669 case INDEX_op_ld8s_i64
:
1670 case INDEX_op_ld16u_i64
:
1671 case INDEX_op_ld16s_i64
:
1672 case INDEX_op_ld32u_i64
:
1673 case INDEX_op_ld32s_i64
:
1674 case INDEX_op_ld_i64
:
1675 case INDEX_op_st8_i64
:
1676 case INDEX_op_st16_i64
:
1677 case INDEX_op_st32_i64
:
1678 case INDEX_op_st_i64
:
1679 case INDEX_op_add_i64
:
1680 case INDEX_op_sub_i64
:
1681 case INDEX_op_mul_i64
:
1682 case INDEX_op_and_i64
:
1683 case INDEX_op_or_i64
:
1684 case INDEX_op_xor_i64
:
1685 case INDEX_op_shl_i64
:
1686 case INDEX_op_shr_i64
:
1687 case INDEX_op_sar_i64
:
1688 case INDEX_op_ext_i32_i64
:
1689 case INDEX_op_extu_i32_i64
:
1690 return TCG_TARGET_REG_BITS
== 64;
1692 case INDEX_op_movcond_i64
:
1693 return TCG_TARGET_HAS_movcond_i64
;
1694 case INDEX_op_div_i64
:
1695 case INDEX_op_divu_i64
:
1696 return TCG_TARGET_HAS_div_i64
;
1697 case INDEX_op_rem_i64
:
1698 case INDEX_op_remu_i64
:
1699 return TCG_TARGET_HAS_rem_i64
;
1700 case INDEX_op_div2_i64
:
1701 case INDEX_op_divu2_i64
:
1702 return TCG_TARGET_HAS_div2_i64
;
1703 case INDEX_op_rotl_i64
:
1704 case INDEX_op_rotr_i64
:
1705 return TCG_TARGET_HAS_rot_i64
;
1706 case INDEX_op_deposit_i64
:
1707 return TCG_TARGET_HAS_deposit_i64
;
1708 case INDEX_op_extract_i64
:
1709 return TCG_TARGET_HAS_extract_i64
;
1710 case INDEX_op_sextract_i64
:
1711 return TCG_TARGET_HAS_sextract_i64
;
1712 case INDEX_op_extract2_i64
:
1713 return TCG_TARGET_HAS_extract2_i64
;
1714 case INDEX_op_extrl_i64_i32
:
1715 return TCG_TARGET_HAS_extrl_i64_i32
;
1716 case INDEX_op_extrh_i64_i32
:
1717 return TCG_TARGET_HAS_extrh_i64_i32
;
1718 case INDEX_op_ext8s_i64
:
1719 return TCG_TARGET_HAS_ext8s_i64
;
1720 case INDEX_op_ext16s_i64
:
1721 return TCG_TARGET_HAS_ext16s_i64
;
1722 case INDEX_op_ext32s_i64
:
1723 return TCG_TARGET_HAS_ext32s_i64
;
1724 case INDEX_op_ext8u_i64
:
1725 return TCG_TARGET_HAS_ext8u_i64
;
1726 case INDEX_op_ext16u_i64
:
1727 return TCG_TARGET_HAS_ext16u_i64
;
1728 case INDEX_op_ext32u_i64
:
1729 return TCG_TARGET_HAS_ext32u_i64
;
1730 case INDEX_op_bswap16_i64
:
1731 return TCG_TARGET_HAS_bswap16_i64
;
1732 case INDEX_op_bswap32_i64
:
1733 return TCG_TARGET_HAS_bswap32_i64
;
1734 case INDEX_op_bswap64_i64
:
1735 return TCG_TARGET_HAS_bswap64_i64
;
1736 case INDEX_op_not_i64
:
1737 return TCG_TARGET_HAS_not_i64
;
1738 case INDEX_op_neg_i64
:
1739 return TCG_TARGET_HAS_neg_i64
;
1740 case INDEX_op_andc_i64
:
1741 return TCG_TARGET_HAS_andc_i64
;
1742 case INDEX_op_orc_i64
:
1743 return TCG_TARGET_HAS_orc_i64
;
1744 case INDEX_op_eqv_i64
:
1745 return TCG_TARGET_HAS_eqv_i64
;
1746 case INDEX_op_nand_i64
:
1747 return TCG_TARGET_HAS_nand_i64
;
1748 case INDEX_op_nor_i64
:
1749 return TCG_TARGET_HAS_nor_i64
;
1750 case INDEX_op_clz_i64
:
1751 return TCG_TARGET_HAS_clz_i64
;
1752 case INDEX_op_ctz_i64
:
1753 return TCG_TARGET_HAS_ctz_i64
;
1754 case INDEX_op_ctpop_i64
:
1755 return TCG_TARGET_HAS_ctpop_i64
;
1756 case INDEX_op_add2_i64
:
1757 return TCG_TARGET_HAS_add2_i64
;
1758 case INDEX_op_sub2_i64
:
1759 return TCG_TARGET_HAS_sub2_i64
;
1760 case INDEX_op_mulu2_i64
:
1761 return TCG_TARGET_HAS_mulu2_i64
;
1762 case INDEX_op_muls2_i64
:
1763 return TCG_TARGET_HAS_muls2_i64
;
1764 case INDEX_op_muluh_i64
:
1765 return TCG_TARGET_HAS_muluh_i64
;
1766 case INDEX_op_mulsh_i64
:
1767 return TCG_TARGET_HAS_mulsh_i64
;
1769 case INDEX_op_mov_vec
:
1770 case INDEX_op_dup_vec
:
1771 case INDEX_op_dupm_vec
:
1772 case INDEX_op_ld_vec
:
1773 case INDEX_op_st_vec
:
1774 case INDEX_op_add_vec
:
1775 case INDEX_op_sub_vec
:
1776 case INDEX_op_and_vec
:
1777 case INDEX_op_or_vec
:
1778 case INDEX_op_xor_vec
:
1779 case INDEX_op_cmp_vec
:
1781 case INDEX_op_dup2_vec
:
1782 return have_vec
&& TCG_TARGET_REG_BITS
== 32;
1783 case INDEX_op_not_vec
:
1784 return have_vec
&& TCG_TARGET_HAS_not_vec
;
1785 case INDEX_op_neg_vec
:
1786 return have_vec
&& TCG_TARGET_HAS_neg_vec
;
1787 case INDEX_op_abs_vec
:
1788 return have_vec
&& TCG_TARGET_HAS_abs_vec
;
1789 case INDEX_op_andc_vec
:
1790 return have_vec
&& TCG_TARGET_HAS_andc_vec
;
1791 case INDEX_op_orc_vec
:
1792 return have_vec
&& TCG_TARGET_HAS_orc_vec
;
1793 case INDEX_op_mul_vec
:
1794 return have_vec
&& TCG_TARGET_HAS_mul_vec
;
1795 case INDEX_op_shli_vec
:
1796 case INDEX_op_shri_vec
:
1797 case INDEX_op_sari_vec
:
1798 return have_vec
&& TCG_TARGET_HAS_shi_vec
;
1799 case INDEX_op_shls_vec
:
1800 case INDEX_op_shrs_vec
:
1801 case INDEX_op_sars_vec
:
1802 return have_vec
&& TCG_TARGET_HAS_shs_vec
;
1803 case INDEX_op_shlv_vec
:
1804 case INDEX_op_shrv_vec
:
1805 case INDEX_op_sarv_vec
:
1806 return have_vec
&& TCG_TARGET_HAS_shv_vec
;
1807 case INDEX_op_rotli_vec
:
1808 return have_vec
&& TCG_TARGET_HAS_roti_vec
;
1809 case INDEX_op_rotls_vec
:
1810 return have_vec
&& TCG_TARGET_HAS_rots_vec
;
1811 case INDEX_op_rotlv_vec
:
1812 case INDEX_op_rotrv_vec
:
1813 return have_vec
&& TCG_TARGET_HAS_rotv_vec
;
1814 case INDEX_op_ssadd_vec
:
1815 case INDEX_op_usadd_vec
:
1816 case INDEX_op_sssub_vec
:
1817 case INDEX_op_ussub_vec
:
1818 return have_vec
&& TCG_TARGET_HAS_sat_vec
;
1819 case INDEX_op_smin_vec
:
1820 case INDEX_op_umin_vec
:
1821 case INDEX_op_smax_vec
:
1822 case INDEX_op_umax_vec
:
1823 return have_vec
&& TCG_TARGET_HAS_minmax_vec
;
1824 case INDEX_op_bitsel_vec
:
1825 return have_vec
&& TCG_TARGET_HAS_bitsel_vec
;
1826 case INDEX_op_cmpsel_vec
:
1827 return have_vec
&& TCG_TARGET_HAS_cmpsel_vec
;
1830 tcg_debug_assert(op
> INDEX_op_last_generic
&& op
< NB_OPS
);
1835 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1836 and endian swap. Maybe it would be better to do the alignment
1837 and endian swap in tcg_reg_alloc_call(). */
1838 void tcg_gen_callN(void *func
, TCGTemp
*ret
, int nargs
, TCGTemp
**args
)
1840 int i
, real_args
, nb_rets
, pi
;
1841 unsigned sizemask
, flags
;
1842 TCGHelperInfo
*info
;
1845 info
= g_hash_table_lookup(helper_table
, (gpointer
)func
);
1846 flags
= info
->flags
;
1847 sizemask
= info
->sizemask
;
1849 #ifdef CONFIG_PLUGIN
1850 /* detect non-plugin helpers */
1851 if (tcg_ctx
->plugin_insn
&& unlikely(strncmp(info
->name
, "plugin_", 7))) {
1852 tcg_ctx
->plugin_insn
->calls_helpers
= true;
1856 #if defined(__sparc__) && !defined(__arch64__) \
1857 && !defined(CONFIG_TCG_INTERPRETER)
1858 /* We have 64-bit values in one register, but need to pass as two
1859 separate parameters. Split them. */
1860 int orig_sizemask
= sizemask
;
1861 int orig_nargs
= nargs
;
1862 TCGv_i64 retl
, reth
;
1863 TCGTemp
*split_args
[MAX_OPC_PARAM
];
1867 if (sizemask
!= 0) {
1868 for (i
= real_args
= 0; i
< nargs
; ++i
) {
1869 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1871 TCGv_i64 orig
= temp_tcgv_i64(args
[i
]);
1872 TCGv_i32 h
= tcg_temp_new_i32();
1873 TCGv_i32 l
= tcg_temp_new_i32();
1874 tcg_gen_extr_i64_i32(l
, h
, orig
);
1875 split_args
[real_args
++] = tcgv_i32_temp(h
);
1876 split_args
[real_args
++] = tcgv_i32_temp(l
);
1878 split_args
[real_args
++] = args
[i
];
1885 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1886 for (i
= 0; i
< nargs
; ++i
) {
1887 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1888 int is_signed
= sizemask
& (2 << (i
+1)*2);
1890 TCGv_i64 temp
= tcg_temp_new_i64();
1891 TCGv_i64 orig
= temp_tcgv_i64(args
[i
]);
1893 tcg_gen_ext32s_i64(temp
, orig
);
1895 tcg_gen_ext32u_i64(temp
, orig
);
1897 args
[i
] = tcgv_i64_temp(temp
);
1900 #endif /* TCG_TARGET_EXTEND_ARGS */
1902 op
= tcg_emit_op(INDEX_op_call
);
1906 #if defined(__sparc__) && !defined(__arch64__) \
1907 && !defined(CONFIG_TCG_INTERPRETER)
1908 if (orig_sizemask
& 1) {
1909 /* The 32-bit ABI is going to return the 64-bit value in
1910 the %o0/%o1 register pair. Prepare for this by using
1911 two return temporaries, and reassemble below. */
1912 retl
= tcg_temp_new_i64();
1913 reth
= tcg_temp_new_i64();
1914 op
->args
[pi
++] = tcgv_i64_arg(reth
);
1915 op
->args
[pi
++] = tcgv_i64_arg(retl
);
1918 op
->args
[pi
++] = temp_arg(ret
);
1922 if (TCG_TARGET_REG_BITS
< 64 && (sizemask
& 1)) {
1923 #ifdef HOST_WORDS_BIGENDIAN
1924 op
->args
[pi
++] = temp_arg(ret
+ 1);
1925 op
->args
[pi
++] = temp_arg(ret
);
1927 op
->args
[pi
++] = temp_arg(ret
);
1928 op
->args
[pi
++] = temp_arg(ret
+ 1);
1932 op
->args
[pi
++] = temp_arg(ret
);
1939 TCGOP_CALLO(op
) = nb_rets
;
1942 for (i
= 0; i
< nargs
; i
++) {
1943 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1944 if (TCG_TARGET_REG_BITS
< 64 && is_64bit
) {
1945 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1946 /* some targets want aligned 64 bit args */
1947 if (real_args
& 1) {
1948 op
->args
[pi
++] = TCG_CALL_DUMMY_ARG
;
1952 /* If stack grows up, then we will be placing successive
1953 arguments at lower addresses, which means we need to
1954 reverse the order compared to how we would normally
1955 treat either big or little-endian. For those arguments
1956 that will wind up in registers, this still works for
1957 HPPA (the only current STACK_GROWSUP target) since the
1958 argument registers are *also* allocated in decreasing
1959 order. If another such target is added, this logic may
1960 have to get more complicated to differentiate between
1961 stack arguments and register arguments. */
1962 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1963 op
->args
[pi
++] = temp_arg(args
[i
] + 1);
1964 op
->args
[pi
++] = temp_arg(args
[i
]);
1966 op
->args
[pi
++] = temp_arg(args
[i
]);
1967 op
->args
[pi
++] = temp_arg(args
[i
] + 1);
1973 op
->args
[pi
++] = temp_arg(args
[i
]);
1976 op
->args
[pi
++] = (uintptr_t)func
;
1977 op
->args
[pi
++] = flags
;
1978 TCGOP_CALLI(op
) = real_args
;
1980 /* Make sure the fields didn't overflow. */
1981 tcg_debug_assert(TCGOP_CALLI(op
) == real_args
);
1982 tcg_debug_assert(pi
<= ARRAY_SIZE(op
->args
));
1984 #if defined(__sparc__) && !defined(__arch64__) \
1985 && !defined(CONFIG_TCG_INTERPRETER)
1986 /* Free all of the parts we allocated above. */
1987 for (i
= real_args
= 0; i
< orig_nargs
; ++i
) {
1988 int is_64bit
= orig_sizemask
& (1 << (i
+1)*2);
1990 tcg_temp_free_internal(args
[real_args
++]);
1991 tcg_temp_free_internal(args
[real_args
++]);
1996 if (orig_sizemask
& 1) {
1997 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1998 Note that describing these as TCGv_i64 eliminates an unnecessary
1999 zero-extension that tcg_gen_concat_i32_i64 would create. */
2000 tcg_gen_concat32_i64(temp_tcgv_i64(ret
), retl
, reth
);
2001 tcg_temp_free_i64(retl
);
2002 tcg_temp_free_i64(reth
);
2004 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2005 for (i
= 0; i
< nargs
; ++i
) {
2006 int is_64bit
= sizemask
& (1 << (i
+1)*2);
2008 tcg_temp_free_internal(args
[i
]);
2011 #endif /* TCG_TARGET_EXTEND_ARGS */
2014 static void tcg_reg_alloc_start(TCGContext
*s
)
2018 for (i
= 0, n
= s
->nb_temps
; i
< n
; i
++) {
2019 TCGTemp
*ts
= &s
->temps
[i
];
2020 TCGTempVal val
= TEMP_VAL_MEM
;
2024 val
= TEMP_VAL_CONST
;
2032 val
= TEMP_VAL_DEAD
;
2035 ts
->mem_allocated
= 0;
2038 g_assert_not_reached();
2043 memset(s
->reg_to_temp
, 0, sizeof(s
->reg_to_temp
));
2046 static char *tcg_get_arg_str_ptr(TCGContext
*s
, char *buf
, int buf_size
,
2049 int idx
= temp_idx(ts
);
2054 pstrcpy(buf
, buf_size
, ts
->name
);
2057 snprintf(buf
, buf_size
, "loc%d", idx
- s
->nb_globals
);
2060 snprintf(buf
, buf_size
, "tmp%d", idx
- s
->nb_globals
);
2065 snprintf(buf
, buf_size
, "$0x%x", (int32_t)ts
->val
);
2067 #if TCG_TARGET_REG_BITS > 32
2069 snprintf(buf
, buf_size
, "$0x%" PRIx64
, ts
->val
);
2075 snprintf(buf
, buf_size
, "v%d$0x%" PRIx64
,
2076 64 << (ts
->type
- TCG_TYPE_V64
), ts
->val
);
2079 g_assert_not_reached();
2086 static char *tcg_get_arg_str(TCGContext
*s
, char *buf
,
2087 int buf_size
, TCGArg arg
)
2089 return tcg_get_arg_str_ptr(s
, buf
, buf_size
, arg_temp(arg
));
2092 /* Find helper name. */
2093 static inline const char *tcg_find_helper(TCGContext
*s
, uintptr_t val
)
2095 const char *ret
= NULL
;
2097 TCGHelperInfo
*info
= g_hash_table_lookup(helper_table
, (gpointer
)val
);
2105 static const char * const cond_name
[] =
2107 [TCG_COND_NEVER
] = "never",
2108 [TCG_COND_ALWAYS
] = "always",
2109 [TCG_COND_EQ
] = "eq",
2110 [TCG_COND_NE
] = "ne",
2111 [TCG_COND_LT
] = "lt",
2112 [TCG_COND_GE
] = "ge",
2113 [TCG_COND_LE
] = "le",
2114 [TCG_COND_GT
] = "gt",
2115 [TCG_COND_LTU
] = "ltu",
2116 [TCG_COND_GEU
] = "geu",
2117 [TCG_COND_LEU
] = "leu",
2118 [TCG_COND_GTU
] = "gtu"
2121 static const char * const ldst_name
[] =
2137 static const char * const alignment_name
[(MO_AMASK
>> MO_ASHIFT
) + 1] = {
2138 #ifdef TARGET_ALIGNED_ONLY
2139 [MO_UNALN
>> MO_ASHIFT
] = "un+",
2140 [MO_ALIGN
>> MO_ASHIFT
] = "",
2142 [MO_UNALN
>> MO_ASHIFT
] = "",
2143 [MO_ALIGN
>> MO_ASHIFT
] = "al+",
2145 [MO_ALIGN_2
>> MO_ASHIFT
] = "al2+",
2146 [MO_ALIGN_4
>> MO_ASHIFT
] = "al4+",
2147 [MO_ALIGN_8
>> MO_ASHIFT
] = "al8+",
2148 [MO_ALIGN_16
>> MO_ASHIFT
] = "al16+",
2149 [MO_ALIGN_32
>> MO_ASHIFT
] = "al32+",
2150 [MO_ALIGN_64
>> MO_ASHIFT
] = "al64+",
2153 static inline bool tcg_regset_single(TCGRegSet d
)
2155 return (d
& (d
- 1)) == 0;
2158 static inline TCGReg
tcg_regset_first(TCGRegSet d
)
2160 if (TCG_TARGET_NB_REGS
<= 32) {
2167 static void tcg_dump_ops(TCGContext
*s
, bool have_prefs
)
2172 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
2173 int i
, k
, nb_oargs
, nb_iargs
, nb_cargs
;
2174 const TCGOpDef
*def
;
2179 def
= &tcg_op_defs
[c
];
2181 if (c
== INDEX_op_insn_start
) {
2183 col
+= qemu_log("\n ----");
2185 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
2187 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2188 a
= deposit64(op
->args
[i
* 2], 32, 32, op
->args
[i
* 2 + 1]);
2192 col
+= qemu_log(" " TARGET_FMT_lx
, a
);
2194 } else if (c
== INDEX_op_call
) {
2195 /* variable number of arguments */
2196 nb_oargs
= TCGOP_CALLO(op
);
2197 nb_iargs
= TCGOP_CALLI(op
);
2198 nb_cargs
= def
->nb_cargs
;
2200 /* function name, flags, out args */
2201 col
+= qemu_log(" %s %s,$0x%" TCG_PRIlx
",$%d", def
->name
,
2202 tcg_find_helper(s
, op
->args
[nb_oargs
+ nb_iargs
]),
2203 op
->args
[nb_oargs
+ nb_iargs
+ 1], nb_oargs
);
2204 for (i
= 0; i
< nb_oargs
; i
++) {
2205 col
+= qemu_log(",%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
2208 for (i
= 0; i
< nb_iargs
; i
++) {
2209 TCGArg arg
= op
->args
[nb_oargs
+ i
];
2210 const char *t
= "<dummy>";
2211 if (arg
!= TCG_CALL_DUMMY_ARG
) {
2212 t
= tcg_get_arg_str(s
, buf
, sizeof(buf
), arg
);
2214 col
+= qemu_log(",%s", t
);
2217 col
+= qemu_log(" %s ", def
->name
);
2219 nb_oargs
= def
->nb_oargs
;
2220 nb_iargs
= def
->nb_iargs
;
2221 nb_cargs
= def
->nb_cargs
;
2223 if (def
->flags
& TCG_OPF_VECTOR
) {
2224 col
+= qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op
),
2225 8 << TCGOP_VECE(op
));
2229 for (i
= 0; i
< nb_oargs
; i
++) {
2231 col
+= qemu_log(",");
2233 col
+= qemu_log("%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
2236 for (i
= 0; i
< nb_iargs
; i
++) {
2238 col
+= qemu_log(",");
2240 col
+= qemu_log("%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
2244 case INDEX_op_brcond_i32
:
2245 case INDEX_op_setcond_i32
:
2246 case INDEX_op_movcond_i32
:
2247 case INDEX_op_brcond2_i32
:
2248 case INDEX_op_setcond2_i32
:
2249 case INDEX_op_brcond_i64
:
2250 case INDEX_op_setcond_i64
:
2251 case INDEX_op_movcond_i64
:
2252 case INDEX_op_cmp_vec
:
2253 case INDEX_op_cmpsel_vec
:
2254 if (op
->args
[k
] < ARRAY_SIZE(cond_name
)
2255 && cond_name
[op
->args
[k
]]) {
2256 col
+= qemu_log(",%s", cond_name
[op
->args
[k
++]]);
2258 col
+= qemu_log(",$0x%" TCG_PRIlx
, op
->args
[k
++]);
2262 case INDEX_op_qemu_ld_i32
:
2263 case INDEX_op_qemu_st_i32
:
2264 case INDEX_op_qemu_st8_i32
:
2265 case INDEX_op_qemu_ld_i64
:
2266 case INDEX_op_qemu_st_i64
:
2268 TCGMemOpIdx oi
= op
->args
[k
++];
2269 MemOp op
= get_memop(oi
);
2270 unsigned ix
= get_mmuidx(oi
);
2272 if (op
& ~(MO_AMASK
| MO_BSWAP
| MO_SSIZE
)) {
2273 col
+= qemu_log(",$0x%x,%u", op
, ix
);
2275 const char *s_al
, *s_op
;
2276 s_al
= alignment_name
[(op
& MO_AMASK
) >> MO_ASHIFT
];
2277 s_op
= ldst_name
[op
& (MO_BSWAP
| MO_SSIZE
)];
2278 col
+= qemu_log(",%s%s,%u", s_al
, s_op
, ix
);
2288 case INDEX_op_set_label
:
2290 case INDEX_op_brcond_i32
:
2291 case INDEX_op_brcond_i64
:
2292 case INDEX_op_brcond2_i32
:
2293 col
+= qemu_log("%s$L%d", k
? "," : "",
2294 arg_label(op
->args
[k
])->id
);
2300 for (; i
< nb_cargs
; i
++, k
++) {
2301 col
+= qemu_log("%s$0x%" TCG_PRIlx
, k
? "," : "", op
->args
[k
]);
2305 if (have_prefs
|| op
->life
) {
2307 QemuLogFile
*logfile
;
2310 logfile
= qatomic_rcu_read(&qemu_logfile
);
2312 for (; col
< 40; ++col
) {
2313 putc(' ', logfile
->fd
);
2320 unsigned life
= op
->life
;
2322 if (life
& (SYNC_ARG
* 3)) {
2324 for (i
= 0; i
< 2; ++i
) {
2325 if (life
& (SYNC_ARG
<< i
)) {
2333 for (i
= 0; life
; ++i
, life
>>= 1) {
2342 for (i
= 0; i
< nb_oargs
; ++i
) {
2343 TCGRegSet set
= op
->output_pref
[i
];
2352 } else if (set
== MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS
)) {
2354 #ifdef CONFIG_DEBUG_TCG
2355 } else if (tcg_regset_single(set
)) {
2356 TCGReg reg
= tcg_regset_first(set
);
2357 qemu_log("%s", tcg_target_reg_names
[reg
]);
2359 } else if (TCG_TARGET_NB_REGS
<= 32) {
2360 qemu_log("%#x", (uint32_t)set
);
2362 qemu_log("%#" PRIx64
, (uint64_t)set
);
2371 /* we give more priority to constraints with less registers */
2372 static int get_constraint_priority(const TCGOpDef
*def
, int k
)
2374 const TCGArgConstraint
*arg_ct
= &def
->args_ct
[k
];
2377 if (arg_ct
->oalias
) {
2378 /* an alias is equivalent to a single register */
2381 n
= ctpop64(arg_ct
->regs
);
2383 return TCG_TARGET_NB_REGS
- n
+ 1;
2386 /* sort from highest priority to lowest */
2387 static void sort_constraints(TCGOpDef
*def
, int start
, int n
)
2390 TCGArgConstraint
*a
= def
->args_ct
;
2392 for (i
= 0; i
< n
; i
++) {
2393 a
[start
+ i
].sort_index
= start
+ i
;
2398 for (i
= 0; i
< n
- 1; i
++) {
2399 for (j
= i
+ 1; j
< n
; j
++) {
2400 int p1
= get_constraint_priority(def
, a
[start
+ i
].sort_index
);
2401 int p2
= get_constraint_priority(def
, a
[start
+ j
].sort_index
);
2403 int tmp
= a
[start
+ i
].sort_index
;
2404 a
[start
+ i
].sort_index
= a
[start
+ j
].sort_index
;
2405 a
[start
+ j
].sort_index
= tmp
;
2411 static void process_op_defs(TCGContext
*s
)
2415 for (op
= 0; op
< NB_OPS
; op
++) {
2416 TCGOpDef
*def
= &tcg_op_defs
[op
];
2417 const TCGTargetOpDef
*tdefs
;
2421 if (def
->flags
& TCG_OPF_NOT_PRESENT
) {
2425 nb_args
= def
->nb_iargs
+ def
->nb_oargs
;
2430 tdefs
= tcg_target_op_def(op
);
2431 /* Missing TCGTargetOpDef entry. */
2432 tcg_debug_assert(tdefs
!= NULL
);
2434 type
= (def
->flags
& TCG_OPF_64BIT
? TCG_TYPE_I64
: TCG_TYPE_I32
);
2435 for (i
= 0; i
< nb_args
; i
++) {
2436 const char *ct_str
= tdefs
->args_ct_str
[i
];
2437 /* Incomplete TCGTargetOpDef entry. */
2438 tcg_debug_assert(ct_str
!= NULL
);
2440 while (*ct_str
!= '\0') {
2444 int oarg
= *ct_str
- '0';
2445 tcg_debug_assert(ct_str
== tdefs
->args_ct_str
[i
]);
2446 tcg_debug_assert(oarg
< def
->nb_oargs
);
2447 tcg_debug_assert(def
->args_ct
[oarg
].regs
!= 0);
2448 def
->args_ct
[i
] = def
->args_ct
[oarg
];
2449 /* The output sets oalias. */
2450 def
->args_ct
[oarg
].oalias
= true;
2451 def
->args_ct
[oarg
].alias_index
= i
;
2452 /* The input sets ialias. */
2453 def
->args_ct
[i
].ialias
= true;
2454 def
->args_ct
[i
].alias_index
= oarg
;
2459 def
->args_ct
[i
].newreg
= true;
2463 def
->args_ct
[i
].ct
|= TCG_CT_CONST
;
2467 ct_str
= target_parse_constraint(&def
->args_ct
[i
],
2469 /* Typo in TCGTargetOpDef constraint. */
2470 tcg_debug_assert(ct_str
!= NULL
);
2475 /* TCGTargetOpDef entry with too much information? */
2476 tcg_debug_assert(i
== TCG_MAX_OP_ARGS
|| tdefs
->args_ct_str
[i
] == NULL
);
2478 /* sort the constraints (XXX: this is just an heuristic) */
2479 sort_constraints(def
, 0, def
->nb_oargs
);
2480 sort_constraints(def
, def
->nb_oargs
, def
->nb_iargs
);
2484 void tcg_op_remove(TCGContext
*s
, TCGOp
*op
)
2490 label
= arg_label(op
->args
[0]);
2493 case INDEX_op_brcond_i32
:
2494 case INDEX_op_brcond_i64
:
2495 label
= arg_label(op
->args
[3]);
2498 case INDEX_op_brcond2_i32
:
2499 label
= arg_label(op
->args
[5]);
2506 QTAILQ_REMOVE(&s
->ops
, op
, link
);
2507 QTAILQ_INSERT_TAIL(&s
->free_ops
, op
, link
);
2510 #ifdef CONFIG_PROFILER
2511 qatomic_set(&s
->prof
.del_op_count
, s
->prof
.del_op_count
+ 1);
2515 static TCGOp
*tcg_op_alloc(TCGOpcode opc
)
2517 TCGContext
*s
= tcg_ctx
;
2520 if (likely(QTAILQ_EMPTY(&s
->free_ops
))) {
2521 op
= tcg_malloc(sizeof(TCGOp
));
2523 op
= QTAILQ_FIRST(&s
->free_ops
);
2524 QTAILQ_REMOVE(&s
->free_ops
, op
, link
);
2526 memset(op
, 0, offsetof(TCGOp
, link
));
2533 TCGOp
*tcg_emit_op(TCGOpcode opc
)
2535 TCGOp
*op
= tcg_op_alloc(opc
);
2536 QTAILQ_INSERT_TAIL(&tcg_ctx
->ops
, op
, link
);
2540 TCGOp
*tcg_op_insert_before(TCGContext
*s
, TCGOp
*old_op
, TCGOpcode opc
)
2542 TCGOp
*new_op
= tcg_op_alloc(opc
);
2543 QTAILQ_INSERT_BEFORE(old_op
, new_op
, link
);
2547 TCGOp
*tcg_op_insert_after(TCGContext
*s
, TCGOp
*old_op
, TCGOpcode opc
)
2549 TCGOp
*new_op
= tcg_op_alloc(opc
);
2550 QTAILQ_INSERT_AFTER(&s
->ops
, old_op
, new_op
, link
);
2554 /* Reachable analysis : remove unreachable code. */
2555 static void reachable_code_pass(TCGContext
*s
)
2557 TCGOp
*op
, *op_next
;
2560 QTAILQ_FOREACH_SAFE(op
, &s
->ops
, link
, op_next
) {
2566 case INDEX_op_set_label
:
2567 label
= arg_label(op
->args
[0]);
2568 if (label
->refs
== 0) {
2570 * While there is an occasional backward branch, virtually
2571 * all branches generated by the translators are forward.
2572 * Which means that generally we will have already removed
2573 * all references to the label that will be, and there is
2574 * little to be gained by iterating.
2578 /* Once we see a label, insns become live again. */
2583 * Optimization can fold conditional branches to unconditional.
2584 * If we find a label with one reference which is preceded by
2585 * an unconditional branch to it, remove both. This needed to
2586 * wait until the dead code in between them was removed.
2588 if (label
->refs
== 1) {
2589 TCGOp
*op_prev
= QTAILQ_PREV(op
, link
);
2590 if (op_prev
->opc
== INDEX_op_br
&&
2591 label
== arg_label(op_prev
->args
[0])) {
2592 tcg_op_remove(s
, op_prev
);
2600 case INDEX_op_exit_tb
:
2601 case INDEX_op_goto_ptr
:
2602 /* Unconditional branches; everything following is dead. */
2607 /* Notice noreturn helper calls, raising exceptions. */
2608 call_flags
= op
->args
[TCGOP_CALLO(op
) + TCGOP_CALLI(op
) + 1];
2609 if (call_flags
& TCG_CALL_NO_RETURN
) {
2614 case INDEX_op_insn_start
:
2615 /* Never remove -- we need to keep these for unwind. */
2624 tcg_op_remove(s
, op
);
2632 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2633 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2635 /* For liveness_pass_1, the register preferences for a given temp. */
2636 static inline TCGRegSet
*la_temp_pref(TCGTemp
*ts
)
2638 return ts
->state_ptr
;
2641 /* For liveness_pass_1, reset the preferences for a given temp to the
2642 * maximal regset for its type.
2644 static inline void la_reset_pref(TCGTemp
*ts
)
2647 = (ts
->state
== TS_DEAD
? 0 : tcg_target_available_regs
[ts
->type
]);
2650 /* liveness analysis: end of function: all temps are dead, and globals
2651 should be in memory. */
2652 static void la_func_end(TCGContext
*s
, int ng
, int nt
)
2656 for (i
= 0; i
< ng
; ++i
) {
2657 s
->temps
[i
].state
= TS_DEAD
| TS_MEM
;
2658 la_reset_pref(&s
->temps
[i
]);
2660 for (i
= ng
; i
< nt
; ++i
) {
2661 s
->temps
[i
].state
= TS_DEAD
;
2662 la_reset_pref(&s
->temps
[i
]);
2666 /* liveness analysis: end of basic block: all temps are dead, globals
2667 and local temps should be in memory. */
2668 static void la_bb_end(TCGContext
*s
, int ng
, int nt
)
2672 for (i
= 0; i
< nt
; ++i
) {
2673 TCGTemp
*ts
= &s
->temps
[i
];
2680 state
= TS_DEAD
| TS_MEM
;
2687 g_assert_not_reached();
2694 /* liveness analysis: sync globals back to memory. */
2695 static void la_global_sync(TCGContext
*s
, int ng
)
2699 for (i
= 0; i
< ng
; ++i
) {
2700 int state
= s
->temps
[i
].state
;
2701 s
->temps
[i
].state
= state
| TS_MEM
;
2702 if (state
== TS_DEAD
) {
2703 /* If the global was previously dead, reset prefs. */
2704 la_reset_pref(&s
->temps
[i
]);
2710 * liveness analysis: conditional branch: all temps are dead,
2711 * globals and local temps should be synced.
2713 static void la_bb_sync(TCGContext
*s
, int ng
, int nt
)
2715 la_global_sync(s
, ng
);
2717 for (int i
= ng
; i
< nt
; ++i
) {
2718 TCGTemp
*ts
= &s
->temps
[i
];
2724 ts
->state
= state
| TS_MEM
;
2725 if (state
!= TS_DEAD
) {
2730 s
->temps
[i
].state
= TS_DEAD
;
2735 g_assert_not_reached();
2737 la_reset_pref(&s
->temps
[i
]);
2741 /* liveness analysis: sync globals back to memory and kill. */
2742 static void la_global_kill(TCGContext
*s
, int ng
)
2746 for (i
= 0; i
< ng
; i
++) {
2747 s
->temps
[i
].state
= TS_DEAD
| TS_MEM
;
2748 la_reset_pref(&s
->temps
[i
]);
2752 /* liveness analysis: note live globals crossing calls. */
2753 static void la_cross_call(TCGContext
*s
, int nt
)
2755 TCGRegSet mask
= ~tcg_target_call_clobber_regs
;
2758 for (i
= 0; i
< nt
; i
++) {
2759 TCGTemp
*ts
= &s
->temps
[i
];
2760 if (!(ts
->state
& TS_DEAD
)) {
2761 TCGRegSet
*pset
= la_temp_pref(ts
);
2762 TCGRegSet set
= *pset
;
2765 /* If the combination is not possible, restart. */
2767 set
= tcg_target_available_regs
[ts
->type
] & mask
;
2774 /* Liveness analysis : update the opc_arg_life array to tell if a
2775 given input arguments is dead. Instructions updating dead
2776 temporaries are removed. */
2777 static void liveness_pass_1(TCGContext
*s
)
2779 int nb_globals
= s
->nb_globals
;
2780 int nb_temps
= s
->nb_temps
;
2781 TCGOp
*op
, *op_prev
;
2785 prefs
= tcg_malloc(sizeof(TCGRegSet
) * nb_temps
);
2786 for (i
= 0; i
< nb_temps
; ++i
) {
2787 s
->temps
[i
].state_ptr
= prefs
+ i
;
2790 /* ??? Should be redundant with the exit_tb that ends the TB. */
2791 la_func_end(s
, nb_globals
, nb_temps
);
2793 QTAILQ_FOREACH_REVERSE_SAFE(op
, &s
->ops
, link
, op_prev
) {
2794 int nb_iargs
, nb_oargs
;
2795 TCGOpcode opc_new
, opc_new2
;
2797 TCGLifeData arg_life
= 0;
2799 TCGOpcode opc
= op
->opc
;
2800 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
2808 nb_oargs
= TCGOP_CALLO(op
);
2809 nb_iargs
= TCGOP_CALLI(op
);
2810 call_flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
2812 /* pure functions can be removed if their result is unused */
2813 if (call_flags
& TCG_CALL_NO_SIDE_EFFECTS
) {
2814 for (i
= 0; i
< nb_oargs
; i
++) {
2815 ts
= arg_temp(op
->args
[i
]);
2816 if (ts
->state
!= TS_DEAD
) {
2817 goto do_not_remove_call
;
2824 /* Output args are dead. */
2825 for (i
= 0; i
< nb_oargs
; i
++) {
2826 ts
= arg_temp(op
->args
[i
]);
2827 if (ts
->state
& TS_DEAD
) {
2828 arg_life
|= DEAD_ARG
<< i
;
2830 if (ts
->state
& TS_MEM
) {
2831 arg_life
|= SYNC_ARG
<< i
;
2833 ts
->state
= TS_DEAD
;
2836 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2837 op
->output_pref
[i
] = 0;
2840 if (!(call_flags
& (TCG_CALL_NO_WRITE_GLOBALS
|
2841 TCG_CALL_NO_READ_GLOBALS
))) {
2842 la_global_kill(s
, nb_globals
);
2843 } else if (!(call_flags
& TCG_CALL_NO_READ_GLOBALS
)) {
2844 la_global_sync(s
, nb_globals
);
2847 /* Record arguments that die in this helper. */
2848 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
2849 ts
= arg_temp(op
->args
[i
]);
2850 if (ts
&& ts
->state
& TS_DEAD
) {
2851 arg_life
|= DEAD_ARG
<< i
;
2855 /* For all live registers, remove call-clobbered prefs. */
2856 la_cross_call(s
, nb_temps
);
2858 nb_call_regs
= ARRAY_SIZE(tcg_target_call_iarg_regs
);
2860 /* Input arguments are live for preceding opcodes. */
2861 for (i
= 0; i
< nb_iargs
; i
++) {
2862 ts
= arg_temp(op
->args
[i
+ nb_oargs
]);
2863 if (ts
&& ts
->state
& TS_DEAD
) {
2864 /* For those arguments that die, and will be allocated
2865 * in registers, clear the register set for that arg,
2866 * to be filled in below. For args that will be on
2867 * the stack, reset to any available reg.
2870 = (i
< nb_call_regs
? 0 :
2871 tcg_target_available_regs
[ts
->type
]);
2872 ts
->state
&= ~TS_DEAD
;
2876 /* For each input argument, add its input register to prefs.
2877 If a temp is used once, this produces a single set bit. */
2878 for (i
= 0; i
< MIN(nb_call_regs
, nb_iargs
); i
++) {
2879 ts
= arg_temp(op
->args
[i
+ nb_oargs
]);
2881 tcg_regset_set_reg(*la_temp_pref(ts
),
2882 tcg_target_call_iarg_regs
[i
]);
2887 case INDEX_op_insn_start
:
2889 case INDEX_op_discard
:
2890 /* mark the temporary as dead */
2891 ts
= arg_temp(op
->args
[0]);
2892 ts
->state
= TS_DEAD
;
2896 case INDEX_op_add2_i32
:
2897 opc_new
= INDEX_op_add_i32
;
2899 case INDEX_op_sub2_i32
:
2900 opc_new
= INDEX_op_sub_i32
;
2902 case INDEX_op_add2_i64
:
2903 opc_new
= INDEX_op_add_i64
;
2905 case INDEX_op_sub2_i64
:
2906 opc_new
= INDEX_op_sub_i64
;
2910 /* Test if the high part of the operation is dead, but not
2911 the low part. The result can be optimized to a simple
2912 add or sub. This happens often for x86_64 guest when the
2913 cpu mode is set to 32 bit. */
2914 if (arg_temp(op
->args
[1])->state
== TS_DEAD
) {
2915 if (arg_temp(op
->args
[0])->state
== TS_DEAD
) {
2918 /* Replace the opcode and adjust the args in place,
2919 leaving 3 unused args at the end. */
2920 op
->opc
= opc
= opc_new
;
2921 op
->args
[1] = op
->args
[2];
2922 op
->args
[2] = op
->args
[4];
2923 /* Fall through and mark the single-word operation live. */
2929 case INDEX_op_mulu2_i32
:
2930 opc_new
= INDEX_op_mul_i32
;
2931 opc_new2
= INDEX_op_muluh_i32
;
2932 have_opc_new2
= TCG_TARGET_HAS_muluh_i32
;
2934 case INDEX_op_muls2_i32
:
2935 opc_new
= INDEX_op_mul_i32
;
2936 opc_new2
= INDEX_op_mulsh_i32
;
2937 have_opc_new2
= TCG_TARGET_HAS_mulsh_i32
;
2939 case INDEX_op_mulu2_i64
:
2940 opc_new
= INDEX_op_mul_i64
;
2941 opc_new2
= INDEX_op_muluh_i64
;
2942 have_opc_new2
= TCG_TARGET_HAS_muluh_i64
;
2944 case INDEX_op_muls2_i64
:
2945 opc_new
= INDEX_op_mul_i64
;
2946 opc_new2
= INDEX_op_mulsh_i64
;
2947 have_opc_new2
= TCG_TARGET_HAS_mulsh_i64
;
2952 if (arg_temp(op
->args
[1])->state
== TS_DEAD
) {
2953 if (arg_temp(op
->args
[0])->state
== TS_DEAD
) {
2954 /* Both parts of the operation are dead. */
2957 /* The high part of the operation is dead; generate the low. */
2958 op
->opc
= opc
= opc_new
;
2959 op
->args
[1] = op
->args
[2];
2960 op
->args
[2] = op
->args
[3];
2961 } else if (arg_temp(op
->args
[0])->state
== TS_DEAD
&& have_opc_new2
) {
2962 /* The low part of the operation is dead; generate the high. */
2963 op
->opc
= opc
= opc_new2
;
2964 op
->args
[0] = op
->args
[1];
2965 op
->args
[1] = op
->args
[2];
2966 op
->args
[2] = op
->args
[3];
2970 /* Mark the single-word operation live. */
2975 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2976 nb_iargs
= def
->nb_iargs
;
2977 nb_oargs
= def
->nb_oargs
;
2979 /* Test if the operation can be removed because all
2980 its outputs are dead. We assume that nb_oargs == 0
2981 implies side effects */
2982 if (!(def
->flags
& TCG_OPF_SIDE_EFFECTS
) && nb_oargs
!= 0) {
2983 for (i
= 0; i
< nb_oargs
; i
++) {
2984 if (arg_temp(op
->args
[i
])->state
!= TS_DEAD
) {
2993 tcg_op_remove(s
, op
);
2997 for (i
= 0; i
< nb_oargs
; i
++) {
2998 ts
= arg_temp(op
->args
[i
]);
3000 /* Remember the preference of the uses that followed. */
3001 op
->output_pref
[i
] = *la_temp_pref(ts
);
3003 /* Output args are dead. */
3004 if (ts
->state
& TS_DEAD
) {
3005 arg_life
|= DEAD_ARG
<< i
;
3007 if (ts
->state
& TS_MEM
) {
3008 arg_life
|= SYNC_ARG
<< i
;
3010 ts
->state
= TS_DEAD
;
3014 /* If end of basic block, update. */
3015 if (def
->flags
& TCG_OPF_BB_EXIT
) {
3016 la_func_end(s
, nb_globals
, nb_temps
);
3017 } else if (def
->flags
& TCG_OPF_COND_BRANCH
) {
3018 la_bb_sync(s
, nb_globals
, nb_temps
);
3019 } else if (def
->flags
& TCG_OPF_BB_END
) {
3020 la_bb_end(s
, nb_globals
, nb_temps
);
3021 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
3022 la_global_sync(s
, nb_globals
);
3023 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
3024 la_cross_call(s
, nb_temps
);
3028 /* Record arguments that die in this opcode. */
3029 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3030 ts
= arg_temp(op
->args
[i
]);
3031 if (ts
->state
& TS_DEAD
) {
3032 arg_life
|= DEAD_ARG
<< i
;
3036 /* Input arguments are live for preceding opcodes. */
3037 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3038 ts
= arg_temp(op
->args
[i
]);
3039 if (ts
->state
& TS_DEAD
) {
3040 /* For operands that were dead, initially allow
3041 all regs for the type. */
3042 *la_temp_pref(ts
) = tcg_target_available_regs
[ts
->type
];
3043 ts
->state
&= ~TS_DEAD
;
3047 /* Incorporate constraints for this operand. */
3049 case INDEX_op_mov_i32
:
3050 case INDEX_op_mov_i64
:
3051 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3052 have proper constraints. That said, special case
3053 moves to propagate preferences backward. */
3054 if (IS_DEAD_ARG(1)) {
3055 *la_temp_pref(arg_temp(op
->args
[0]))
3056 = *la_temp_pref(arg_temp(op
->args
[1]));
3061 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3062 const TCGArgConstraint
*ct
= &def
->args_ct
[i
];
3063 TCGRegSet set
, *pset
;
3065 ts
= arg_temp(op
->args
[i
]);
3066 pset
= la_temp_pref(ts
);
3071 set
&= op
->output_pref
[ct
->alias_index
];
3073 /* If the combination is not possible, restart. */
3083 op
->life
= arg_life
;
3087 /* Liveness analysis: Convert indirect regs to direct temporaries. */
3088 static bool liveness_pass_2(TCGContext
*s
)
3090 int nb_globals
= s
->nb_globals
;
3092 bool changes
= false;
3093 TCGOp
*op
, *op_next
;
3095 /* Create a temporary for each indirect global. */
3096 for (i
= 0; i
< nb_globals
; ++i
) {
3097 TCGTemp
*its
= &s
->temps
[i
];
3098 if (its
->indirect_reg
) {
3099 TCGTemp
*dts
= tcg_temp_alloc(s
);
3100 dts
->type
= its
->type
;
3101 dts
->base_type
= its
->base_type
;
3102 its
->state_ptr
= dts
;
3104 its
->state_ptr
= NULL
;
3106 /* All globals begin dead. */
3107 its
->state
= TS_DEAD
;
3109 for (nb_temps
= s
->nb_temps
; i
< nb_temps
; ++i
) {
3110 TCGTemp
*its
= &s
->temps
[i
];
3111 its
->state_ptr
= NULL
;
3112 its
->state
= TS_DEAD
;
3115 QTAILQ_FOREACH_SAFE(op
, &s
->ops
, link
, op_next
) {
3116 TCGOpcode opc
= op
->opc
;
3117 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
3118 TCGLifeData arg_life
= op
->life
;
3119 int nb_iargs
, nb_oargs
, call_flags
;
3120 TCGTemp
*arg_ts
, *dir_ts
;
3122 if (opc
== INDEX_op_call
) {
3123 nb_oargs
= TCGOP_CALLO(op
);
3124 nb_iargs
= TCGOP_CALLI(op
);
3125 call_flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
3127 nb_iargs
= def
->nb_iargs
;
3128 nb_oargs
= def
->nb_oargs
;
3130 /* Set flags similar to how calls require. */
3131 if (def
->flags
& TCG_OPF_COND_BRANCH
) {
3132 /* Like reading globals: sync_globals */
3133 call_flags
= TCG_CALL_NO_WRITE_GLOBALS
;
3134 } else if (def
->flags
& TCG_OPF_BB_END
) {
3135 /* Like writing globals: save_globals */
3137 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
3138 /* Like reading globals: sync_globals */
3139 call_flags
= TCG_CALL_NO_WRITE_GLOBALS
;
3141 /* No effect on globals. */
3142 call_flags
= (TCG_CALL_NO_READ_GLOBALS
|
3143 TCG_CALL_NO_WRITE_GLOBALS
);
3147 /* Make sure that input arguments are available. */
3148 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
3149 arg_ts
= arg_temp(op
->args
[i
]);
3151 dir_ts
= arg_ts
->state_ptr
;
3152 if (dir_ts
&& arg_ts
->state
== TS_DEAD
) {
3153 TCGOpcode lopc
= (arg_ts
->type
== TCG_TYPE_I32
3156 TCGOp
*lop
= tcg_op_insert_before(s
, op
, lopc
);
3158 lop
->args
[0] = temp_arg(dir_ts
);
3159 lop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3160 lop
->args
[2] = arg_ts
->mem_offset
;
3162 /* Loaded, but synced with memory. */
3163 arg_ts
->state
= TS_MEM
;
3168 /* Perform input replacement, and mark inputs that became dead.
3169 No action is required except keeping temp_state up to date
3170 so that we reload when needed. */
3171 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
3172 arg_ts
= arg_temp(op
->args
[i
]);
3174 dir_ts
= arg_ts
->state_ptr
;
3176 op
->args
[i
] = temp_arg(dir_ts
);
3178 if (IS_DEAD_ARG(i
)) {
3179 arg_ts
->state
= TS_DEAD
;
3185 /* Liveness analysis should ensure that the following are
3186 all correct, for call sites and basic block end points. */
3187 if (call_flags
& TCG_CALL_NO_READ_GLOBALS
) {
3189 } else if (call_flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
3190 for (i
= 0; i
< nb_globals
; ++i
) {
3191 /* Liveness should see that globals are synced back,
3192 that is, either TS_DEAD or TS_MEM. */
3193 arg_ts
= &s
->temps
[i
];
3194 tcg_debug_assert(arg_ts
->state_ptr
== 0
3195 || arg_ts
->state
!= 0);
3198 for (i
= 0; i
< nb_globals
; ++i
) {
3199 /* Liveness should see that globals are saved back,
3200 that is, TS_DEAD, waiting to be reloaded. */
3201 arg_ts
= &s
->temps
[i
];
3202 tcg_debug_assert(arg_ts
->state_ptr
== 0
3203 || arg_ts
->state
== TS_DEAD
);
3207 /* Outputs become available. */
3208 if (opc
== INDEX_op_mov_i32
|| opc
== INDEX_op_mov_i64
) {
3209 arg_ts
= arg_temp(op
->args
[0]);
3210 dir_ts
= arg_ts
->state_ptr
;
3212 op
->args
[0] = temp_arg(dir_ts
);
3215 /* The output is now live and modified. */
3218 if (NEED_SYNC_ARG(0)) {
3219 TCGOpcode sopc
= (arg_ts
->type
== TCG_TYPE_I32
3222 TCGOp
*sop
= tcg_op_insert_after(s
, op
, sopc
);
3223 TCGTemp
*out_ts
= dir_ts
;
3225 if (IS_DEAD_ARG(0)) {
3226 out_ts
= arg_temp(op
->args
[1]);
3227 arg_ts
->state
= TS_DEAD
;
3228 tcg_op_remove(s
, op
);
3230 arg_ts
->state
= TS_MEM
;
3233 sop
->args
[0] = temp_arg(out_ts
);
3234 sop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3235 sop
->args
[2] = arg_ts
->mem_offset
;
3237 tcg_debug_assert(!IS_DEAD_ARG(0));
3241 for (i
= 0; i
< nb_oargs
; i
++) {
3242 arg_ts
= arg_temp(op
->args
[i
]);
3243 dir_ts
= arg_ts
->state_ptr
;
3247 op
->args
[i
] = temp_arg(dir_ts
);
3250 /* The output is now live and modified. */
3253 /* Sync outputs upon their last write. */
3254 if (NEED_SYNC_ARG(i
)) {
3255 TCGOpcode sopc
= (arg_ts
->type
== TCG_TYPE_I32
3258 TCGOp
*sop
= tcg_op_insert_after(s
, op
, sopc
);
3260 sop
->args
[0] = temp_arg(dir_ts
);
3261 sop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3262 sop
->args
[2] = arg_ts
->mem_offset
;
3264 arg_ts
->state
= TS_MEM
;
3266 /* Drop outputs that are dead. */
3267 if (IS_DEAD_ARG(i
)) {
3268 arg_ts
->state
= TS_DEAD
;
3277 #ifdef CONFIG_DEBUG_TCG
3278 static void dump_regs(TCGContext
*s
)
3284 for(i
= 0; i
< s
->nb_temps
; i
++) {
3286 printf(" %10s: ", tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), ts
));
3287 switch(ts
->val_type
) {
3289 printf("%s", tcg_target_reg_names
[ts
->reg
]);
3292 printf("%d(%s)", (int)ts
->mem_offset
,
3293 tcg_target_reg_names
[ts
->mem_base
->reg
]);
3295 case TEMP_VAL_CONST
:
3296 printf("$0x%" PRIx64
, ts
->val
);
3308 for(i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
3309 if (s
->reg_to_temp
[i
] != NULL
) {
3311 tcg_target_reg_names
[i
],
3312 tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), s
->reg_to_temp
[i
]));
3317 static void check_regs(TCGContext
*s
)
3324 for (reg
= 0; reg
< TCG_TARGET_NB_REGS
; reg
++) {
3325 ts
= s
->reg_to_temp
[reg
];
3327 if (ts
->val_type
!= TEMP_VAL_REG
|| ts
->reg
!= reg
) {
3328 printf("Inconsistency for register %s:\n",
3329 tcg_target_reg_names
[reg
]);
3334 for (k
= 0; k
< s
->nb_temps
; k
++) {
3336 if (ts
->val_type
== TEMP_VAL_REG
3337 && ts
->kind
!= TEMP_FIXED
3338 && s
->reg_to_temp
[ts
->reg
] != ts
) {
3339 printf("Inconsistency for temp %s:\n",
3340 tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), ts
));
3342 printf("reg state:\n");
3350 static void temp_allocate_frame(TCGContext
*s
, TCGTemp
*ts
)
3352 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3353 /* Sparc64 stack is accessed with offset of 2047 */
3354 s
->current_frame_offset
= (s
->current_frame_offset
+
3355 (tcg_target_long
)sizeof(tcg_target_long
) - 1) &
3356 ~(sizeof(tcg_target_long
) - 1);
3358 if (s
->current_frame_offset
+ (tcg_target_long
)sizeof(tcg_target_long
) >
3362 ts
->mem_offset
= s
->current_frame_offset
;
3363 ts
->mem_base
= s
->frame_temp
;
3364 ts
->mem_allocated
= 1;
3365 s
->current_frame_offset
+= sizeof(tcg_target_long
);
3368 static void temp_load(TCGContext
*, TCGTemp
*, TCGRegSet
, TCGRegSet
, TCGRegSet
);
3370 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3371 mark it free; otherwise mark it dead. */
3372 static void temp_free_or_dead(TCGContext
*s
, TCGTemp
*ts
, int free_or_dead
)
3374 TCGTempVal new_type
;
3381 new_type
= TEMP_VAL_MEM
;
3384 new_type
= free_or_dead
< 0 ? TEMP_VAL_MEM
: TEMP_VAL_DEAD
;
3387 new_type
= TEMP_VAL_CONST
;
3390 g_assert_not_reached();
3392 if (ts
->val_type
== TEMP_VAL_REG
) {
3393 s
->reg_to_temp
[ts
->reg
] = NULL
;
3395 ts
->val_type
= new_type
;
3398 /* Mark a temporary as dead. */
3399 static inline void temp_dead(TCGContext
*s
, TCGTemp
*ts
)
3401 temp_free_or_dead(s
, ts
, 1);
3404 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3405 registers needs to be allocated to store a constant. If 'free_or_dead'
3406 is non-zero, subsequently release the temporary; if it is positive, the
3407 temp is dead; if it is negative, the temp is free. */
3408 static void temp_sync(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet allocated_regs
,
3409 TCGRegSet preferred_regs
, int free_or_dead
)
3411 if (!temp_readonly(ts
) && !ts
->mem_coherent
) {
3412 if (!ts
->mem_allocated
) {
3413 temp_allocate_frame(s
, ts
);
3415 switch (ts
->val_type
) {
3416 case TEMP_VAL_CONST
:
3417 /* If we're going to free the temp immediately, then we won't
3418 require it later in a register, so attempt to store the
3419 constant to memory directly. */
3421 && tcg_out_sti(s
, ts
->type
, ts
->val
,
3422 ts
->mem_base
->reg
, ts
->mem_offset
)) {
3425 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
3426 allocated_regs
, preferred_regs
);
3430 tcg_out_st(s
, ts
->type
, ts
->reg
,
3431 ts
->mem_base
->reg
, ts
->mem_offset
);
3441 ts
->mem_coherent
= 1;
3444 temp_free_or_dead(s
, ts
, free_or_dead
);
3448 /* free register 'reg' by spilling the corresponding temporary if necessary */
3449 static void tcg_reg_free(TCGContext
*s
, TCGReg reg
, TCGRegSet allocated_regs
)
3451 TCGTemp
*ts
= s
->reg_to_temp
[reg
];
3453 temp_sync(s
, ts
, allocated_regs
, 0, -1);
3459 * @required_regs: Set of registers in which we must allocate.
3460 * @allocated_regs: Set of registers which must be avoided.
3461 * @preferred_regs: Set of registers we should prefer.
3462 * @rev: True if we search the registers in "indirect" order.
3464 * The allocated register must be in @required_regs & ~@allocated_regs,
3465 * but if we can put it in @preferred_regs we may save a move later.
3467 static TCGReg
tcg_reg_alloc(TCGContext
*s
, TCGRegSet required_regs
,
3468 TCGRegSet allocated_regs
,
3469 TCGRegSet preferred_regs
, bool rev
)
3471 int i
, j
, f
, n
= ARRAY_SIZE(tcg_target_reg_alloc_order
);
3472 TCGRegSet reg_ct
[2];
3475 reg_ct
[1] = required_regs
& ~allocated_regs
;
3476 tcg_debug_assert(reg_ct
[1] != 0);
3477 reg_ct
[0] = reg_ct
[1] & preferred_regs
;
3479 /* Skip the preferred_regs option if it cannot be satisfied,
3480 or if the preference made no difference. */
3481 f
= reg_ct
[0] == 0 || reg_ct
[0] == reg_ct
[1];
3483 order
= rev
? indirect_reg_alloc_order
: tcg_target_reg_alloc_order
;
3485 /* Try free registers, preferences first. */
3486 for (j
= f
; j
< 2; j
++) {
3487 TCGRegSet set
= reg_ct
[j
];
3489 if (tcg_regset_single(set
)) {
3490 /* One register in the set. */
3491 TCGReg reg
= tcg_regset_first(set
);
3492 if (s
->reg_to_temp
[reg
] == NULL
) {
3496 for (i
= 0; i
< n
; i
++) {
3497 TCGReg reg
= order
[i
];
3498 if (s
->reg_to_temp
[reg
] == NULL
&&
3499 tcg_regset_test_reg(set
, reg
)) {
3506 /* We must spill something. */
3507 for (j
= f
; j
< 2; j
++) {
3508 TCGRegSet set
= reg_ct
[j
];
3510 if (tcg_regset_single(set
)) {
3511 /* One register in the set. */
3512 TCGReg reg
= tcg_regset_first(set
);
3513 tcg_reg_free(s
, reg
, allocated_regs
);
3516 for (i
= 0; i
< n
; i
++) {
3517 TCGReg reg
= order
[i
];
3518 if (tcg_regset_test_reg(set
, reg
)) {
3519 tcg_reg_free(s
, reg
, allocated_regs
);
3529 /* Make sure the temporary is in a register. If needed, allocate the register
3530 from DESIRED while avoiding ALLOCATED. */
3531 static void temp_load(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet desired_regs
,
3532 TCGRegSet allocated_regs
, TCGRegSet preferred_regs
)
3536 switch (ts
->val_type
) {
3539 case TEMP_VAL_CONST
:
3540 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
,
3541 preferred_regs
, ts
->indirect_base
);
3542 if (ts
->type
<= TCG_TYPE_I64
) {
3543 tcg_out_movi(s
, ts
->type
, reg
, ts
->val
);
3545 uint64_t val
= ts
->val
;
3549 * Find the minimal vector element that matches the constant.
3550 * The targets will, in general, have to do this search anyway,
3551 * do this generically.
3553 if (val
== dup_const(MO_8
, val
)) {
3555 } else if (val
== dup_const(MO_16
, val
)) {
3557 } else if (val
== dup_const(MO_32
, val
)) {
3561 tcg_out_dupi_vec(s
, ts
->type
, vece
, reg
, ts
->val
);
3563 ts
->mem_coherent
= 0;
3566 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
,
3567 preferred_regs
, ts
->indirect_base
);
3568 tcg_out_ld(s
, ts
->type
, reg
, ts
->mem_base
->reg
, ts
->mem_offset
);
3569 ts
->mem_coherent
= 1;
3576 ts
->val_type
= TEMP_VAL_REG
;
3577 s
->reg_to_temp
[reg
] = ts
;
3580 /* Save a temporary to memory. 'allocated_regs' is used in case a
3581 temporary registers needs to be allocated to store a constant. */
3582 static void temp_save(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet allocated_regs
)
3584 /* The liveness analysis already ensures that globals are back
3585 in memory. Keep an tcg_debug_assert for safety. */
3586 tcg_debug_assert(ts
->val_type
== TEMP_VAL_MEM
|| temp_readonly(ts
));
3589 /* save globals to their canonical location and assume they can be
3590 modified be the following code. 'allocated_regs' is used in case a
3591 temporary registers needs to be allocated to store a constant. */
3592 static void save_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
3596 for (i
= 0, n
= s
->nb_globals
; i
< n
; i
++) {
3597 temp_save(s
, &s
->temps
[i
], allocated_regs
);
3601 /* sync globals to their canonical location and assume they can be
3602 read by the following code. 'allocated_regs' is used in case a
3603 temporary registers needs to be allocated to store a constant. */
3604 static void sync_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
3608 for (i
= 0, n
= s
->nb_globals
; i
< n
; i
++) {
3609 TCGTemp
*ts
= &s
->temps
[i
];
3610 tcg_debug_assert(ts
->val_type
!= TEMP_VAL_REG
3611 || ts
->kind
== TEMP_FIXED
3612 || ts
->mem_coherent
);
3616 /* at the end of a basic block, we assume all temporaries are dead and
3617 all globals are stored at their canonical location. */
3618 static void tcg_reg_alloc_bb_end(TCGContext
*s
, TCGRegSet allocated_regs
)
3622 for (i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
3623 TCGTemp
*ts
= &s
->temps
[i
];
3627 temp_save(s
, ts
, allocated_regs
);
3630 /* The liveness analysis already ensures that temps are dead.
3631 Keep an tcg_debug_assert for safety. */
3632 tcg_debug_assert(ts
->val_type
== TEMP_VAL_DEAD
);
3635 /* Similarly, we should have freed any allocated register. */
3636 tcg_debug_assert(ts
->val_type
== TEMP_VAL_CONST
);
3639 g_assert_not_reached();
3643 save_globals(s
, allocated_regs
);
3647 * At a conditional branch, we assume all temporaries are dead and
3648 * all globals and local temps are synced to their location.
3650 static void tcg_reg_alloc_cbranch(TCGContext
*s
, TCGRegSet allocated_regs
)
3652 sync_globals(s
, allocated_regs
);
3654 for (int i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
3655 TCGTemp
*ts
= &s
->temps
[i
];
3657 * The liveness analysis already ensures that temps are dead.
3658 * Keep tcg_debug_asserts for safety.
3662 tcg_debug_assert(ts
->val_type
!= TEMP_VAL_REG
|| ts
->mem_coherent
);
3665 tcg_debug_assert(ts
->val_type
== TEMP_VAL_DEAD
);
3670 g_assert_not_reached();
3676 * Specialized code generation for INDEX_op_mov_* with a constant.
3678 static void tcg_reg_alloc_do_movi(TCGContext
*s
, TCGTemp
*ots
,
3679 tcg_target_ulong val
, TCGLifeData arg_life
,
3680 TCGRegSet preferred_regs
)
3682 /* ENV should not be modified. */
3683 tcg_debug_assert(!temp_readonly(ots
));
3685 /* The movi is not explicitly generated here. */
3686 if (ots
->val_type
== TEMP_VAL_REG
) {
3687 s
->reg_to_temp
[ots
->reg
] = NULL
;
3689 ots
->val_type
= TEMP_VAL_CONST
;
3691 ots
->mem_coherent
= 0;
3692 if (NEED_SYNC_ARG(0)) {
3693 temp_sync(s
, ots
, s
->reserved_regs
, preferred_regs
, IS_DEAD_ARG(0));
3694 } else if (IS_DEAD_ARG(0)) {
3700 * Specialized code generation for INDEX_op_mov_*.
3702 static void tcg_reg_alloc_mov(TCGContext
*s
, const TCGOp
*op
)
3704 const TCGLifeData arg_life
= op
->life
;
3705 TCGRegSet allocated_regs
, preferred_regs
;
3707 TCGType otype
, itype
;
3709 allocated_regs
= s
->reserved_regs
;
3710 preferred_regs
= op
->output_pref
[0];
3711 ots
= arg_temp(op
->args
[0]);
3712 ts
= arg_temp(op
->args
[1]);
3714 /* ENV should not be modified. */
3715 tcg_debug_assert(!temp_readonly(ots
));
3717 /* Note that otype != itype for no-op truncation. */
3721 if (ts
->val_type
== TEMP_VAL_CONST
) {
3722 /* propagate constant or generate sti */
3723 tcg_target_ulong val
= ts
->val
;
3724 if (IS_DEAD_ARG(1)) {
3727 tcg_reg_alloc_do_movi(s
, ots
, val
, arg_life
, preferred_regs
);
3731 /* If the source value is in memory we're going to be forced
3732 to have it in a register in order to perform the copy. Copy
3733 the SOURCE value into its own register first, that way we
3734 don't have to reload SOURCE the next time it is used. */
3735 if (ts
->val_type
== TEMP_VAL_MEM
) {
3736 temp_load(s
, ts
, tcg_target_available_regs
[itype
],
3737 allocated_regs
, preferred_regs
);
3740 tcg_debug_assert(ts
->val_type
== TEMP_VAL_REG
);
3741 if (IS_DEAD_ARG(0)) {
3742 /* mov to a non-saved dead register makes no sense (even with
3743 liveness analysis disabled). */
3744 tcg_debug_assert(NEED_SYNC_ARG(0));
3745 if (!ots
->mem_allocated
) {
3746 temp_allocate_frame(s
, ots
);
3748 tcg_out_st(s
, otype
, ts
->reg
, ots
->mem_base
->reg
, ots
->mem_offset
);
3749 if (IS_DEAD_ARG(1)) {
3754 if (IS_DEAD_ARG(1) && ts
->kind
!= TEMP_FIXED
) {
3755 /* the mov can be suppressed */
3756 if (ots
->val_type
== TEMP_VAL_REG
) {
3757 s
->reg_to_temp
[ots
->reg
] = NULL
;
3762 if (ots
->val_type
!= TEMP_VAL_REG
) {
3763 /* When allocating a new register, make sure to not spill the
3765 tcg_regset_set_reg(allocated_regs
, ts
->reg
);
3766 ots
->reg
= tcg_reg_alloc(s
, tcg_target_available_regs
[otype
],
3767 allocated_regs
, preferred_regs
,
3768 ots
->indirect_base
);
3770 if (!tcg_out_mov(s
, otype
, ots
->reg
, ts
->reg
)) {
3772 * Cross register class move not supported.
3773 * Store the source register into the destination slot
3774 * and leave the destination temp as TEMP_VAL_MEM.
3776 assert(!temp_readonly(ots
));
3777 if (!ts
->mem_allocated
) {
3778 temp_allocate_frame(s
, ots
);
3780 tcg_out_st(s
, ts
->type
, ts
->reg
,
3781 ots
->mem_base
->reg
, ots
->mem_offset
);
3782 ots
->mem_coherent
= 1;
3783 temp_free_or_dead(s
, ots
, -1);
3787 ots
->val_type
= TEMP_VAL_REG
;
3788 ots
->mem_coherent
= 0;
3789 s
->reg_to_temp
[ots
->reg
] = ots
;
3790 if (NEED_SYNC_ARG(0)) {
3791 temp_sync(s
, ots
, allocated_regs
, 0, 0);
3797 * Specialized code generation for INDEX_op_dup_vec.
3799 static void tcg_reg_alloc_dup(TCGContext
*s
, const TCGOp
*op
)
3801 const TCGLifeData arg_life
= op
->life
;
3802 TCGRegSet dup_out_regs
, dup_in_regs
;
3804 TCGType itype
, vtype
;
3805 intptr_t endian_fixup
;
3809 ots
= arg_temp(op
->args
[0]);
3810 its
= arg_temp(op
->args
[1]);
3812 /* ENV should not be modified. */
3813 tcg_debug_assert(!temp_readonly(ots
));
3816 vece
= TCGOP_VECE(op
);
3817 vtype
= TCGOP_VECL(op
) + TCG_TYPE_V64
;
3819 if (its
->val_type
== TEMP_VAL_CONST
) {
3820 /* Propagate constant via movi -> dupi. */
3821 tcg_target_ulong val
= its
->val
;
3822 if (IS_DEAD_ARG(1)) {
3825 tcg_reg_alloc_do_movi(s
, ots
, val
, arg_life
, op
->output_pref
[0]);
3829 dup_out_regs
= tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[0].regs
;
3830 dup_in_regs
= tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[1].regs
;
3832 /* Allocate the output register now. */
3833 if (ots
->val_type
!= TEMP_VAL_REG
) {
3834 TCGRegSet allocated_regs
= s
->reserved_regs
;
3836 if (!IS_DEAD_ARG(1) && its
->val_type
== TEMP_VAL_REG
) {
3837 /* Make sure to not spill the input register. */
3838 tcg_regset_set_reg(allocated_regs
, its
->reg
);
3840 ots
->reg
= tcg_reg_alloc(s
, dup_out_regs
, allocated_regs
,
3841 op
->output_pref
[0], ots
->indirect_base
);
3842 ots
->val_type
= TEMP_VAL_REG
;
3843 ots
->mem_coherent
= 0;
3844 s
->reg_to_temp
[ots
->reg
] = ots
;
3847 switch (its
->val_type
) {
3850 * The dup constriaints must be broad, covering all possible VECE.
3851 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3852 * to fail, indicating that extra moves are required for that case.
3854 if (tcg_regset_test_reg(dup_in_regs
, its
->reg
)) {
3855 if (tcg_out_dup_vec(s
, vtype
, vece
, ots
->reg
, its
->reg
)) {
3858 /* Try again from memory or a vector input register. */
3860 if (!its
->mem_coherent
) {
3862 * The input register is not synced, and so an extra store
3863 * would be required to use memory. Attempt an integer-vector
3864 * register move first. We do not have a TCGRegSet for this.
3866 if (tcg_out_mov(s
, itype
, ots
->reg
, its
->reg
)) {
3869 /* Sync the temp back to its slot and load from there. */
3870 temp_sync(s
, its
, s
->reserved_regs
, 0, 0);
3875 #ifdef HOST_WORDS_BIGENDIAN
3876 endian_fixup
= itype
== TCG_TYPE_I32
? 4 : 8;
3877 endian_fixup
-= 1 << vece
;
3881 if (tcg_out_dupm_vec(s
, vtype
, vece
, ots
->reg
, its
->mem_base
->reg
,
3882 its
->mem_offset
+ endian_fixup
)) {
3885 tcg_out_ld(s
, itype
, ots
->reg
, its
->mem_base
->reg
, its
->mem_offset
);
3889 g_assert_not_reached();
3892 /* We now have a vector input register, so dup must succeed. */
3893 ok
= tcg_out_dup_vec(s
, vtype
, vece
, ots
->reg
, ots
->reg
);
3894 tcg_debug_assert(ok
);
3897 if (IS_DEAD_ARG(1)) {
3900 if (NEED_SYNC_ARG(0)) {
3901 temp_sync(s
, ots
, s
->reserved_regs
, 0, 0);
3903 if (IS_DEAD_ARG(0)) {
3908 static void tcg_reg_alloc_op(TCGContext
*s
, const TCGOp
*op
)
3910 const TCGLifeData arg_life
= op
->life
;
3911 const TCGOpDef
* const def
= &tcg_op_defs
[op
->opc
];
3912 TCGRegSet i_allocated_regs
;
3913 TCGRegSet o_allocated_regs
;
3914 int i
, k
, nb_iargs
, nb_oargs
;
3917 const TCGArgConstraint
*arg_ct
;
3919 TCGArg new_args
[TCG_MAX_OP_ARGS
];
3920 int const_args
[TCG_MAX_OP_ARGS
];
3922 nb_oargs
= def
->nb_oargs
;
3923 nb_iargs
= def
->nb_iargs
;
3925 /* copy constants */
3926 memcpy(new_args
+ nb_oargs
+ nb_iargs
,
3927 op
->args
+ nb_oargs
+ nb_iargs
,
3928 sizeof(TCGArg
) * def
->nb_cargs
);
3930 i_allocated_regs
= s
->reserved_regs
;
3931 o_allocated_regs
= s
->reserved_regs
;
3933 /* satisfy input constraints */
3934 for (k
= 0; k
< nb_iargs
; k
++) {
3935 TCGRegSet i_preferred_regs
, o_preferred_regs
;
3937 i
= def
->args_ct
[nb_oargs
+ k
].sort_index
;
3939 arg_ct
= &def
->args_ct
[i
];
3942 if (ts
->val_type
== TEMP_VAL_CONST
3943 && tcg_target_const_match(ts
->val
, ts
->type
, arg_ct
)) {
3944 /* constant is OK for instruction */
3946 new_args
[i
] = ts
->val
;
3950 i_preferred_regs
= o_preferred_regs
= 0;
3951 if (arg_ct
->ialias
) {
3952 o_preferred_regs
= op
->output_pref
[arg_ct
->alias_index
];
3955 * If the input is readonly, then it cannot also be an
3956 * output and aliased to itself. If the input is not
3957 * dead after the instruction, we must allocate a new
3958 * register and move it.
3960 if (temp_readonly(ts
) || !IS_DEAD_ARG(i
)) {
3961 goto allocate_in_reg
;
3965 * Check if the current register has already been allocated
3966 * for another input aliased to an output.
3968 if (ts
->val_type
== TEMP_VAL_REG
) {
3970 for (int k2
= 0; k2
< k
; k2
++) {
3971 int i2
= def
->args_ct
[nb_oargs
+ k2
].sort_index
;
3972 if (def
->args_ct
[i2
].ialias
&& reg
== new_args
[i2
]) {
3973 goto allocate_in_reg
;
3977 i_preferred_regs
= o_preferred_regs
;
3980 temp_load(s
, ts
, arg_ct
->regs
, i_allocated_regs
, i_preferred_regs
);
3983 if (!tcg_regset_test_reg(arg_ct
->regs
, reg
)) {
3986 * Allocate a new register matching the constraint
3987 * and move the temporary register into it.
3989 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
3990 i_allocated_regs
, 0);
3991 reg
= tcg_reg_alloc(s
, arg_ct
->regs
, i_allocated_regs
,
3992 o_preferred_regs
, ts
->indirect_base
);
3993 if (!tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
)) {
3995 * Cross register class move not supported. Sync the
3996 * temp back to its slot and load from there.
3998 temp_sync(s
, ts
, i_allocated_regs
, 0, 0);
3999 tcg_out_ld(s
, ts
->type
, reg
,
4000 ts
->mem_base
->reg
, ts
->mem_offset
);
4005 tcg_regset_set_reg(i_allocated_regs
, reg
);
4008 /* mark dead temporaries and free the associated registers */
4009 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
4010 if (IS_DEAD_ARG(i
)) {
4011 temp_dead(s
, arg_temp(op
->args
[i
]));
4015 if (def
->flags
& TCG_OPF_COND_BRANCH
) {
4016 tcg_reg_alloc_cbranch(s
, i_allocated_regs
);
4017 } else if (def
->flags
& TCG_OPF_BB_END
) {
4018 tcg_reg_alloc_bb_end(s
, i_allocated_regs
);
4020 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
4021 /* XXX: permit generic clobber register list ? */
4022 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
4023 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
4024 tcg_reg_free(s
, i
, i_allocated_regs
);
4028 if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
4029 /* sync globals if the op has side effects and might trigger
4031 sync_globals(s
, i_allocated_regs
);
4034 /* satisfy the output constraints */
4035 for(k
= 0; k
< nb_oargs
; k
++) {
4036 i
= def
->args_ct
[k
].sort_index
;
4038 arg_ct
= &def
->args_ct
[i
];
4041 /* ENV should not be modified. */
4042 tcg_debug_assert(!temp_readonly(ts
));
4044 if (arg_ct
->oalias
&& !const_args
[arg_ct
->alias_index
]) {
4045 reg
= new_args
[arg_ct
->alias_index
];
4046 } else if (arg_ct
->newreg
) {
4047 reg
= tcg_reg_alloc(s
, arg_ct
->regs
,
4048 i_allocated_regs
| o_allocated_regs
,
4049 op
->output_pref
[k
], ts
->indirect_base
);
4051 reg
= tcg_reg_alloc(s
, arg_ct
->regs
, o_allocated_regs
,
4052 op
->output_pref
[k
], ts
->indirect_base
);
4054 tcg_regset_set_reg(o_allocated_regs
, reg
);
4055 if (ts
->val_type
== TEMP_VAL_REG
) {
4056 s
->reg_to_temp
[ts
->reg
] = NULL
;
4058 ts
->val_type
= TEMP_VAL_REG
;
4061 * Temp value is modified, so the value kept in memory is
4062 * potentially not the same.
4064 ts
->mem_coherent
= 0;
4065 s
->reg_to_temp
[reg
] = ts
;
4070 /* emit instruction */
4071 if (def
->flags
& TCG_OPF_VECTOR
) {
4072 tcg_out_vec_op(s
, op
->opc
, TCGOP_VECL(op
), TCGOP_VECE(op
),
4073 new_args
, const_args
);
4075 tcg_out_op(s
, op
->opc
, new_args
, const_args
);
4078 /* move the outputs in the correct register if needed */
4079 for(i
= 0; i
< nb_oargs
; i
++) {
4080 ts
= arg_temp(op
->args
[i
]);
4082 /* ENV should not be modified. */
4083 tcg_debug_assert(!temp_readonly(ts
));
4085 if (NEED_SYNC_ARG(i
)) {
4086 temp_sync(s
, ts
, o_allocated_regs
, 0, IS_DEAD_ARG(i
));
4087 } else if (IS_DEAD_ARG(i
)) {
4093 static bool tcg_reg_alloc_dup2(TCGContext
*s
, const TCGOp
*op
)
4095 const TCGLifeData arg_life
= op
->life
;
4096 TCGTemp
*ots
, *itsl
, *itsh
;
4097 TCGType vtype
= TCGOP_VECL(op
) + TCG_TYPE_V64
;
4099 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4100 tcg_debug_assert(TCG_TARGET_REG_BITS
== 32);
4101 tcg_debug_assert(TCGOP_VECE(op
) == MO_64
);
4103 ots
= arg_temp(op
->args
[0]);
4104 itsl
= arg_temp(op
->args
[1]);
4105 itsh
= arg_temp(op
->args
[2]);
4107 /* ENV should not be modified. */
4108 tcg_debug_assert(!temp_readonly(ots
));
4110 /* Allocate the output register now. */
4111 if (ots
->val_type
!= TEMP_VAL_REG
) {
4112 TCGRegSet allocated_regs
= s
->reserved_regs
;
4113 TCGRegSet dup_out_regs
=
4114 tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[0].regs
;
4116 /* Make sure to not spill the input registers. */
4117 if (!IS_DEAD_ARG(1) && itsl
->val_type
== TEMP_VAL_REG
) {
4118 tcg_regset_set_reg(allocated_regs
, itsl
->reg
);
4120 if (!IS_DEAD_ARG(2) && itsh
->val_type
== TEMP_VAL_REG
) {
4121 tcg_regset_set_reg(allocated_regs
, itsh
->reg
);
4124 ots
->reg
= tcg_reg_alloc(s
, dup_out_regs
, allocated_regs
,
4125 op
->output_pref
[0], ots
->indirect_base
);
4126 ots
->val_type
= TEMP_VAL_REG
;
4127 ots
->mem_coherent
= 0;
4128 s
->reg_to_temp
[ots
->reg
] = ots
;
4131 /* Promote dup2 of immediates to dupi_vec. */
4132 if (itsl
->val_type
== TEMP_VAL_CONST
&& itsh
->val_type
== TEMP_VAL_CONST
) {
4133 uint64_t val
= deposit64(itsl
->val
, 32, 32, itsh
->val
);
4136 if (val
== dup_const(MO_8
, val
)) {
4138 } else if (val
== dup_const(MO_16
, val
)) {
4140 } else if (val
== dup_const(MO_32
, val
)) {
4144 tcg_out_dupi_vec(s
, vtype
, vece
, ots
->reg
, val
);
4148 /* If the two inputs form one 64-bit value, try dupm_vec. */
4149 if (itsl
+ 1 == itsh
&& itsl
->base_type
== TCG_TYPE_I64
) {
4150 if (!itsl
->mem_coherent
) {
4151 temp_sync(s
, itsl
, s
->reserved_regs
, 0, 0);
4153 if (!itsh
->mem_coherent
) {
4154 temp_sync(s
, itsh
, s
->reserved_regs
, 0, 0);
4156 #ifdef HOST_WORDS_BIGENDIAN
4157 TCGTemp
*its
= itsh
;
4159 TCGTemp
*its
= itsl
;
4161 if (tcg_out_dupm_vec(s
, vtype
, MO_64
, ots
->reg
,
4162 its
->mem_base
->reg
, its
->mem_offset
)) {
4167 /* Fall back to generic expansion. */
4171 if (IS_DEAD_ARG(1)) {
4174 if (IS_DEAD_ARG(2)) {
4177 if (NEED_SYNC_ARG(0)) {
4178 temp_sync(s
, ots
, s
->reserved_regs
, 0, IS_DEAD_ARG(0));
4179 } else if (IS_DEAD_ARG(0)) {
4185 #ifdef TCG_TARGET_STACK_GROWSUP
4186 #define STACK_DIR(x) (-(x))
4188 #define STACK_DIR(x) (x)
4191 static void tcg_reg_alloc_call(TCGContext
*s
, TCGOp
*op
)
4193 const int nb_oargs
= TCGOP_CALLO(op
);
4194 const int nb_iargs
= TCGOP_CALLI(op
);
4195 const TCGLifeData arg_life
= op
->life
;
4196 int flags
, nb_regs
, i
;
4200 intptr_t stack_offset
;
4201 size_t call_stack_size
;
4202 tcg_insn_unit
*func_addr
;
4204 TCGRegSet allocated_regs
;
4206 func_addr
= (tcg_insn_unit
*)(intptr_t)op
->args
[nb_oargs
+ nb_iargs
];
4207 flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
4209 nb_regs
= ARRAY_SIZE(tcg_target_call_iarg_regs
);
4210 if (nb_regs
> nb_iargs
) {
4214 /* assign stack slots first */
4215 call_stack_size
= (nb_iargs
- nb_regs
) * sizeof(tcg_target_long
);
4216 call_stack_size
= (call_stack_size
+ TCG_TARGET_STACK_ALIGN
- 1) &
4217 ~(TCG_TARGET_STACK_ALIGN
- 1);
4218 allocate_args
= (call_stack_size
> TCG_STATIC_CALL_ARGS_SIZE
);
4219 if (allocate_args
) {
4220 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4221 preallocate call stack */
4225 stack_offset
= TCG_TARGET_CALL_STACK_OFFSET
;
4226 for (i
= nb_regs
; i
< nb_iargs
; i
++) {
4227 arg
= op
->args
[nb_oargs
+ i
];
4228 #ifdef TCG_TARGET_STACK_GROWSUP
4229 stack_offset
-= sizeof(tcg_target_long
);
4231 if (arg
!= TCG_CALL_DUMMY_ARG
) {
4233 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
4234 s
->reserved_regs
, 0);
4235 tcg_out_st(s
, ts
->type
, ts
->reg
, TCG_REG_CALL_STACK
, stack_offset
);
4237 #ifndef TCG_TARGET_STACK_GROWSUP
4238 stack_offset
+= sizeof(tcg_target_long
);
4242 /* assign input registers */
4243 allocated_regs
= s
->reserved_regs
;
4244 for (i
= 0; i
< nb_regs
; i
++) {
4245 arg
= op
->args
[nb_oargs
+ i
];
4246 if (arg
!= TCG_CALL_DUMMY_ARG
) {
4248 reg
= tcg_target_call_iarg_regs
[i
];
4250 if (ts
->val_type
== TEMP_VAL_REG
) {
4251 if (ts
->reg
!= reg
) {
4252 tcg_reg_free(s
, reg
, allocated_regs
);
4253 if (!tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
)) {
4255 * Cross register class move not supported. Sync the
4256 * temp back to its slot and load from there.
4258 temp_sync(s
, ts
, allocated_regs
, 0, 0);
4259 tcg_out_ld(s
, ts
->type
, reg
,
4260 ts
->mem_base
->reg
, ts
->mem_offset
);
4264 TCGRegSet arg_set
= 0;
4266 tcg_reg_free(s
, reg
, allocated_regs
);
4267 tcg_regset_set_reg(arg_set
, reg
);
4268 temp_load(s
, ts
, arg_set
, allocated_regs
, 0);
4271 tcg_regset_set_reg(allocated_regs
, reg
);
4275 /* mark dead temporaries and free the associated registers */
4276 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
4277 if (IS_DEAD_ARG(i
)) {
4278 temp_dead(s
, arg_temp(op
->args
[i
]));
4282 /* clobber call registers */
4283 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
4284 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
4285 tcg_reg_free(s
, i
, allocated_regs
);
4289 /* Save globals if they might be written by the helper, sync them if
4290 they might be read. */
4291 if (flags
& TCG_CALL_NO_READ_GLOBALS
) {
4293 } else if (flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
4294 sync_globals(s
, allocated_regs
);
4296 save_globals(s
, allocated_regs
);
4299 tcg_out_call(s
, func_addr
);
4301 /* assign output registers and emit moves if needed */
4302 for(i
= 0; i
< nb_oargs
; i
++) {
4306 /* ENV should not be modified. */
4307 tcg_debug_assert(!temp_readonly(ts
));
4309 reg
= tcg_target_call_oarg_regs
[i
];
4310 tcg_debug_assert(s
->reg_to_temp
[reg
] == NULL
);
4311 if (ts
->val_type
== TEMP_VAL_REG
) {
4312 s
->reg_to_temp
[ts
->reg
] = NULL
;
4314 ts
->val_type
= TEMP_VAL_REG
;
4316 ts
->mem_coherent
= 0;
4317 s
->reg_to_temp
[reg
] = ts
;
4318 if (NEED_SYNC_ARG(i
)) {
4319 temp_sync(s
, ts
, allocated_regs
, 0, IS_DEAD_ARG(i
));
4320 } else if (IS_DEAD_ARG(i
)) {
4326 #ifdef CONFIG_PROFILER
4328 /* avoid copy/paste errors */
4329 #define PROF_ADD(to, from, field) \
4331 (to)->field += qatomic_read(&((from)->field)); \
4334 #define PROF_MAX(to, from, field) \
4336 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4337 if (val__ > (to)->field) { \
4338 (to)->field = val__; \
4342 /* Pass in a zero'ed @prof */
4344 void tcg_profile_snapshot(TCGProfile
*prof
, bool counters
, bool table
)
4346 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
4349 for (i
= 0; i
< n_ctxs
; i
++) {
4350 TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
4351 const TCGProfile
*orig
= &s
->prof
;
4354 PROF_ADD(prof
, orig
, cpu_exec_time
);
4355 PROF_ADD(prof
, orig
, tb_count1
);
4356 PROF_ADD(prof
, orig
, tb_count
);
4357 PROF_ADD(prof
, orig
, op_count
);
4358 PROF_MAX(prof
, orig
, op_count_max
);
4359 PROF_ADD(prof
, orig
, temp_count
);
4360 PROF_MAX(prof
, orig
, temp_count_max
);
4361 PROF_ADD(prof
, orig
, del_op_count
);
4362 PROF_ADD(prof
, orig
, code_in_len
);
4363 PROF_ADD(prof
, orig
, code_out_len
);
4364 PROF_ADD(prof
, orig
, search_out_len
);
4365 PROF_ADD(prof
, orig
, interm_time
);
4366 PROF_ADD(prof
, orig
, code_time
);
4367 PROF_ADD(prof
, orig
, la_time
);
4368 PROF_ADD(prof
, orig
, opt_time
);
4369 PROF_ADD(prof
, orig
, restore_count
);
4370 PROF_ADD(prof
, orig
, restore_time
);
4375 for (i
= 0; i
< NB_OPS
; i
++) {
4376 PROF_ADD(prof
, orig
, table_op_count
[i
]);
4385 static void tcg_profile_snapshot_counters(TCGProfile
*prof
)
4387 tcg_profile_snapshot(prof
, true, false);
4390 static void tcg_profile_snapshot_table(TCGProfile
*prof
)
4392 tcg_profile_snapshot(prof
, false, true);
4395 void tcg_dump_op_count(void)
4397 TCGProfile prof
= {};
4400 tcg_profile_snapshot_table(&prof
);
4401 for (i
= 0; i
< NB_OPS
; i
++) {
4402 qemu_printf("%s %" PRId64
"\n", tcg_op_defs
[i
].name
,
4403 prof
.table_op_count
[i
]);
4407 int64_t tcg_cpu_exec_time(void)
4409 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
4413 for (i
= 0; i
< n_ctxs
; i
++) {
4414 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
4415 const TCGProfile
*prof
= &s
->prof
;
4417 ret
+= qatomic_read(&prof
->cpu_exec_time
);
4422 void tcg_dump_op_count(void)
4424 qemu_printf("[TCG profiler not compiled]\n");
4427 int64_t tcg_cpu_exec_time(void)
4429 error_report("%s: TCG profiler not compiled", __func__
);
4435 int tcg_gen_code(TCGContext
*s
, TranslationBlock
*tb
)
4437 #ifdef CONFIG_PROFILER
4438 TCGProfile
*prof
= &s
->prof
;
4443 #ifdef CONFIG_PROFILER
4447 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
4450 qatomic_set(&prof
->op_count
, prof
->op_count
+ n
);
4451 if (n
> prof
->op_count_max
) {
4452 qatomic_set(&prof
->op_count_max
, n
);
4456 qatomic_set(&prof
->temp_count
, prof
->temp_count
+ n
);
4457 if (n
> prof
->temp_count_max
) {
4458 qatomic_set(&prof
->temp_count_max
, n
);
4464 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP
)
4465 && qemu_log_in_addr_range(tb
->pc
))) {
4466 FILE *logfile
= qemu_log_lock();
4468 tcg_dump_ops(s
, false);
4470 qemu_log_unlock(logfile
);
4474 #ifdef CONFIG_DEBUG_TCG
4475 /* Ensure all labels referenced have been emitted. */
4480 QSIMPLEQ_FOREACH(l
, &s
->labels
, next
) {
4481 if (unlikely(!l
->present
) && l
->refs
) {
4482 qemu_log_mask(CPU_LOG_TB_OP
,
4483 "$L%d referenced but not present.\n", l
->id
);
4491 #ifdef CONFIG_PROFILER
4492 qatomic_set(&prof
->opt_time
, prof
->opt_time
- profile_getclock());
4495 #ifdef USE_TCG_OPTIMIZATIONS
4499 #ifdef CONFIG_PROFILER
4500 qatomic_set(&prof
->opt_time
, prof
->opt_time
+ profile_getclock());
4501 qatomic_set(&prof
->la_time
, prof
->la_time
- profile_getclock());
4504 reachable_code_pass(s
);
4507 if (s
->nb_indirects
> 0) {
4509 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND
)
4510 && qemu_log_in_addr_range(tb
->pc
))) {
4511 FILE *logfile
= qemu_log_lock();
4512 qemu_log("OP before indirect lowering:\n");
4513 tcg_dump_ops(s
, false);
4515 qemu_log_unlock(logfile
);
4518 /* Replace indirect temps with direct temps. */
4519 if (liveness_pass_2(s
)) {
4520 /* If changes were made, re-run liveness. */
4525 #ifdef CONFIG_PROFILER
4526 qatomic_set(&prof
->la_time
, prof
->la_time
+ profile_getclock());
4530 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT
)
4531 && qemu_log_in_addr_range(tb
->pc
))) {
4532 FILE *logfile
= qemu_log_lock();
4533 qemu_log("OP after optimization and liveness analysis:\n");
4534 tcg_dump_ops(s
, true);
4536 qemu_log_unlock(logfile
);
4540 tcg_reg_alloc_start(s
);
4543 * Reset the buffer pointers when restarting after overflow.
4544 * TODO: Move this into translate-all.c with the rest of the
4545 * buffer management. Having only this done here is confusing.
4547 s
->code_buf
= tcg_splitwx_to_rw(tb
->tc
.ptr
);
4548 s
->code_ptr
= s
->code_buf
;
4550 #ifdef TCG_TARGET_NEED_LDST_LABELS
4551 QSIMPLEQ_INIT(&s
->ldst_labels
);
4553 #ifdef TCG_TARGET_NEED_POOL_LABELS
4554 s
->pool_labels
= NULL
;
4558 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
4559 TCGOpcode opc
= op
->opc
;
4561 #ifdef CONFIG_PROFILER
4562 qatomic_set(&prof
->table_op_count
[opc
], prof
->table_op_count
[opc
] + 1);
4566 case INDEX_op_mov_i32
:
4567 case INDEX_op_mov_i64
:
4568 case INDEX_op_mov_vec
:
4569 tcg_reg_alloc_mov(s
, op
);
4571 case INDEX_op_dup_vec
:
4572 tcg_reg_alloc_dup(s
, op
);
4574 case INDEX_op_insn_start
:
4575 if (num_insns
>= 0) {
4576 size_t off
= tcg_current_code_size(s
);
4577 s
->gen_insn_end_off
[num_insns
] = off
;
4578 /* Assert that we do not overflow our stored offset. */
4579 assert(s
->gen_insn_end_off
[num_insns
] == off
);
4582 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
4584 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4585 a
= deposit64(op
->args
[i
* 2], 32, 32, op
->args
[i
* 2 + 1]);
4589 s
->gen_insn_data
[num_insns
][i
] = a
;
4592 case INDEX_op_discard
:
4593 temp_dead(s
, arg_temp(op
->args
[0]));
4595 case INDEX_op_set_label
:
4596 tcg_reg_alloc_bb_end(s
, s
->reserved_regs
);
4597 tcg_out_label(s
, arg_label(op
->args
[0]));
4600 tcg_reg_alloc_call(s
, op
);
4602 case INDEX_op_dup2_vec
:
4603 if (tcg_reg_alloc_dup2(s
, op
)) {
4608 /* Sanity check that we've not introduced any unhandled opcodes. */
4609 tcg_debug_assert(tcg_op_supported(opc
));
4610 /* Note: in order to speed up the code, it would be much
4611 faster to have specialized register allocator functions for
4612 some common argument patterns */
4613 tcg_reg_alloc_op(s
, op
);
4616 #ifdef CONFIG_DEBUG_TCG
4619 /* Test for (pending) buffer overflow. The assumption is that any
4620 one operation beginning below the high water mark cannot overrun
4621 the buffer completely. Thus we can test for overflow after
4622 generating code without having to check during generation. */
4623 if (unlikely((void *)s
->code_ptr
> s
->code_gen_highwater
)) {
4626 /* Test for TB overflow, as seen by gen_insn_end_off. */
4627 if (unlikely(tcg_current_code_size(s
) > UINT16_MAX
)) {
4631 tcg_debug_assert(num_insns
>= 0);
4632 s
->gen_insn_end_off
[num_insns
] = tcg_current_code_size(s
);
4634 /* Generate TB finalization at the end of block */
4635 #ifdef TCG_TARGET_NEED_LDST_LABELS
4636 i
= tcg_out_ldst_finalize(s
);
4641 #ifdef TCG_TARGET_NEED_POOL_LABELS
4642 i
= tcg_out_pool_finalize(s
);
4647 if (!tcg_resolve_relocs(s
)) {
4651 #ifndef CONFIG_TCG_INTERPRETER
4652 /* flush instruction cache */
4653 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s
->code_buf
),
4654 (uintptr_t)s
->code_buf
,
4655 tcg_ptr_byte_diff(s
->code_ptr
, s
->code_buf
));
4658 return tcg_current_code_size(s
);
4661 #ifdef CONFIG_PROFILER
4662 void tcg_dump_info(void)
4664 TCGProfile prof
= {};
4665 const TCGProfile
*s
;
4667 int64_t tb_div_count
;
4670 tcg_profile_snapshot_counters(&prof
);
4672 tb_count
= s
->tb_count
;
4673 tb_div_count
= tb_count
? tb_count
: 1;
4674 tot
= s
->interm_time
+ s
->code_time
;
4676 qemu_printf("JIT cycles %" PRId64
" (%0.3f s at 2.4 GHz)\n",
4678 qemu_printf("translated TBs %" PRId64
" (aborted=%" PRId64
4680 tb_count
, s
->tb_count1
- tb_count
,
4681 (double)(s
->tb_count1
- s
->tb_count
)
4682 / (s
->tb_count1
? s
->tb_count1
: 1) * 100.0);
4683 qemu_printf("avg ops/TB %0.1f max=%d\n",
4684 (double)s
->op_count
/ tb_div_count
, s
->op_count_max
);
4685 qemu_printf("deleted ops/TB %0.2f\n",
4686 (double)s
->del_op_count
/ tb_div_count
);
4687 qemu_printf("avg temps/TB %0.2f max=%d\n",
4688 (double)s
->temp_count
/ tb_div_count
, s
->temp_count_max
);
4689 qemu_printf("avg host code/TB %0.1f\n",
4690 (double)s
->code_out_len
/ tb_div_count
);
4691 qemu_printf("avg search data/TB %0.1f\n",
4692 (double)s
->search_out_len
/ tb_div_count
);
4694 qemu_printf("cycles/op %0.1f\n",
4695 s
->op_count
? (double)tot
/ s
->op_count
: 0);
4696 qemu_printf("cycles/in byte %0.1f\n",
4697 s
->code_in_len
? (double)tot
/ s
->code_in_len
: 0);
4698 qemu_printf("cycles/out byte %0.1f\n",
4699 s
->code_out_len
? (double)tot
/ s
->code_out_len
: 0);
4700 qemu_printf("cycles/search byte %0.1f\n",
4701 s
->search_out_len
? (double)tot
/ s
->search_out_len
: 0);
4705 qemu_printf(" gen_interm time %0.1f%%\n",
4706 (double)s
->interm_time
/ tot
* 100.0);
4707 qemu_printf(" gen_code time %0.1f%%\n",
4708 (double)s
->code_time
/ tot
* 100.0);
4709 qemu_printf("optim./code time %0.1f%%\n",
4710 (double)s
->opt_time
/ (s
->code_time
? s
->code_time
: 1)
4712 qemu_printf("liveness/code time %0.1f%%\n",
4713 (double)s
->la_time
/ (s
->code_time
? s
->code_time
: 1) * 100.0);
4714 qemu_printf("cpu_restore count %" PRId64
"\n",
4716 qemu_printf(" avg cycles %0.1f\n",
4717 s
->restore_count
? (double)s
->restore_time
/ s
->restore_count
: 0);
4720 void tcg_dump_info(void)
4722 qemu_printf("[TCG profiler not compiled]\n");
4726 #ifdef ELF_HOST_MACHINE
4727 /* In order to use this feature, the backend needs to do three things:
4729 (1) Define ELF_HOST_MACHINE to indicate both what value to
4730 put into the ELF image and to indicate support for the feature.
4732 (2) Define tcg_register_jit. This should create a buffer containing
4733 the contents of a .debug_frame section that describes the post-
4734 prologue unwind info for the tcg machine.
4736 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4739 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4746 struct jit_code_entry
{
4747 struct jit_code_entry
*next_entry
;
4748 struct jit_code_entry
*prev_entry
;
4749 const void *symfile_addr
;
4750 uint64_t symfile_size
;
4753 struct jit_descriptor
{
4755 uint32_t action_flag
;
4756 struct jit_code_entry
*relevant_entry
;
4757 struct jit_code_entry
*first_entry
;
4760 void __jit_debug_register_code(void) __attribute__((noinline
));
4761 void __jit_debug_register_code(void)
4766 /* Must statically initialize the version, because GDB may check
4767 the version before we can set it. */
4768 struct jit_descriptor __jit_debug_descriptor
= { 1, 0, 0, 0 };
4770 /* End GDB interface. */
4772 static int find_string(const char *strtab
, const char *str
)
4774 const char *p
= strtab
+ 1;
4777 if (strcmp(p
, str
) == 0) {
4784 static void tcg_register_jit_int(const void *buf_ptr
, size_t buf_size
,
4785 const void *debug_frame
,
4786 size_t debug_frame_size
)
4788 struct __attribute__((packed
)) DebugInfo
{
4795 uintptr_t cu_low_pc
;
4796 uintptr_t cu_high_pc
;
4799 uintptr_t fn_low_pc
;
4800 uintptr_t fn_high_pc
;
4809 struct DebugInfo di
;
4814 struct ElfImage
*img
;
4816 static const struct ElfImage img_template
= {
4818 .e_ident
[EI_MAG0
] = ELFMAG0
,
4819 .e_ident
[EI_MAG1
] = ELFMAG1
,
4820 .e_ident
[EI_MAG2
] = ELFMAG2
,
4821 .e_ident
[EI_MAG3
] = ELFMAG3
,
4822 .e_ident
[EI_CLASS
] = ELF_CLASS
,
4823 .e_ident
[EI_DATA
] = ELF_DATA
,
4824 .e_ident
[EI_VERSION
] = EV_CURRENT
,
4826 .e_machine
= ELF_HOST_MACHINE
,
4827 .e_version
= EV_CURRENT
,
4828 .e_phoff
= offsetof(struct ElfImage
, phdr
),
4829 .e_shoff
= offsetof(struct ElfImage
, shdr
),
4830 .e_ehsize
= sizeof(ElfW(Shdr
)),
4831 .e_phentsize
= sizeof(ElfW(Phdr
)),
4833 .e_shentsize
= sizeof(ElfW(Shdr
)),
4834 .e_shnum
= ARRAY_SIZE(img
->shdr
),
4835 .e_shstrndx
= ARRAY_SIZE(img
->shdr
) - 1,
4836 #ifdef ELF_HOST_FLAGS
4837 .e_flags
= ELF_HOST_FLAGS
,
4840 .e_ident
[EI_OSABI
] = ELF_OSABI
,
4848 [0] = { .sh_type
= SHT_NULL
},
4849 /* Trick: The contents of code_gen_buffer are not present in
4850 this fake ELF file; that got allocated elsewhere. Therefore
4851 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4852 will not look for contents. We can record any address. */
4854 .sh_type
= SHT_NOBITS
,
4855 .sh_flags
= SHF_EXECINSTR
| SHF_ALLOC
,
4857 [2] = { /* .debug_info */
4858 .sh_type
= SHT_PROGBITS
,
4859 .sh_offset
= offsetof(struct ElfImage
, di
),
4860 .sh_size
= sizeof(struct DebugInfo
),
4862 [3] = { /* .debug_abbrev */
4863 .sh_type
= SHT_PROGBITS
,
4864 .sh_offset
= offsetof(struct ElfImage
, da
),
4865 .sh_size
= sizeof(img
->da
),
4867 [4] = { /* .debug_frame */
4868 .sh_type
= SHT_PROGBITS
,
4869 .sh_offset
= sizeof(struct ElfImage
),
4871 [5] = { /* .symtab */
4872 .sh_type
= SHT_SYMTAB
,
4873 .sh_offset
= offsetof(struct ElfImage
, sym
),
4874 .sh_size
= sizeof(img
->sym
),
4876 .sh_link
= ARRAY_SIZE(img
->shdr
) - 1,
4877 .sh_entsize
= sizeof(ElfW(Sym
)),
4879 [6] = { /* .strtab */
4880 .sh_type
= SHT_STRTAB
,
4881 .sh_offset
= offsetof(struct ElfImage
, str
),
4882 .sh_size
= sizeof(img
->str
),
4886 [1] = { /* code_gen_buffer */
4887 .st_info
= ELF_ST_INFO(STB_GLOBAL
, STT_FUNC
),
4892 .len
= sizeof(struct DebugInfo
) - 4,
4894 .ptr_size
= sizeof(void *),
4896 .cu_lang
= 0x8001, /* DW_LANG_Mips_Assembler */
4898 .fn_name
= "code_gen_buffer"
4901 1, /* abbrev number (the cu) */
4902 0x11, 1, /* DW_TAG_compile_unit, has children */
4903 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4904 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4905 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4906 0, 0, /* end of abbrev */
4907 2, /* abbrev number (the fn) */
4908 0x2e, 0, /* DW_TAG_subprogram, no children */
4909 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4910 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4911 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4912 0, 0, /* end of abbrev */
4913 0 /* no more abbrev */
4915 .str
= "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4916 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4919 /* We only need a single jit entry; statically allocate it. */
4920 static struct jit_code_entry one_entry
;
4922 uintptr_t buf
= (uintptr_t)buf_ptr
;
4923 size_t img_size
= sizeof(struct ElfImage
) + debug_frame_size
;
4924 DebugFrameHeader
*dfh
;
4926 img
= g_malloc(img_size
);
4927 *img
= img_template
;
4929 img
->phdr
.p_vaddr
= buf
;
4930 img
->phdr
.p_paddr
= buf
;
4931 img
->phdr
.p_memsz
= buf_size
;
4933 img
->shdr
[1].sh_name
= find_string(img
->str
, ".text");
4934 img
->shdr
[1].sh_addr
= buf
;
4935 img
->shdr
[1].sh_size
= buf_size
;
4937 img
->shdr
[2].sh_name
= find_string(img
->str
, ".debug_info");
4938 img
->shdr
[3].sh_name
= find_string(img
->str
, ".debug_abbrev");
4940 img
->shdr
[4].sh_name
= find_string(img
->str
, ".debug_frame");
4941 img
->shdr
[4].sh_size
= debug_frame_size
;
4943 img
->shdr
[5].sh_name
= find_string(img
->str
, ".symtab");
4944 img
->shdr
[6].sh_name
= find_string(img
->str
, ".strtab");
4946 img
->sym
[1].st_name
= find_string(img
->str
, "code_gen_buffer");
4947 img
->sym
[1].st_value
= buf
;
4948 img
->sym
[1].st_size
= buf_size
;
4950 img
->di
.cu_low_pc
= buf
;
4951 img
->di
.cu_high_pc
= buf
+ buf_size
;
4952 img
->di
.fn_low_pc
= buf
;
4953 img
->di
.fn_high_pc
= buf
+ buf_size
;
4955 dfh
= (DebugFrameHeader
*)(img
+ 1);
4956 memcpy(dfh
, debug_frame
, debug_frame_size
);
4957 dfh
->fde
.func_start
= buf
;
4958 dfh
->fde
.func_len
= buf_size
;
4961 /* Enable this block to be able to debug the ELF image file creation.
4962 One can use readelf, objdump, or other inspection utilities. */
4964 FILE *f
= fopen("/tmp/qemu.jit", "w+b");
4966 if (fwrite(img
, img_size
, 1, f
) != img_size
) {
4967 /* Avoid stupid unused return value warning for fwrite. */
4974 one_entry
.symfile_addr
= img
;
4975 one_entry
.symfile_size
= img_size
;
4977 __jit_debug_descriptor
.action_flag
= JIT_REGISTER_FN
;
4978 __jit_debug_descriptor
.relevant_entry
= &one_entry
;
4979 __jit_debug_descriptor
.first_entry
= &one_entry
;
4980 __jit_debug_register_code();
4983 /* No support for the feature. Provide the entry point expected by exec.c,
4984 and implement the internal function we declared earlier. */
4986 static void tcg_register_jit_int(const void *buf
, size_t size
,
4987 const void *debug_frame
,
4988 size_t debug_frame_size
)
4992 void tcg_register_jit(const void *buf
, size_t buf_size
)
4995 #endif /* ELF_HOST_MACHINE */
4997 #if !TCG_TARGET_MAYBE_vec
4998 void tcg_expand_vec_op(TCGOpcode o
, TCGType t
, unsigned e
, TCGArg a0
, ...)
5000 g_assert_not_reached();