Merge tag 'v2.11.0-rc0'
[qemu/ar7.git] / tcg / tcg.c
blob0099f3ea7d9d404511abe0673f4526810fec0908
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
33 #include "qemu/cutils.h"
34 #include "qemu/host-utils.h"
35 #include "qemu/timer.h"
37 /* Note: the long term plan is to reduce the dependencies on the QEMU
38 CPU definitions. Currently they are used for qemu_ld/st
39 instructions */
40 #define NO_CPU_IO_DEFS
41 #include "cpu.h"
43 #include "exec/cpu-common.h"
44 #include "exec/exec-all.h"
46 #include "tcg-op.h"
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS ELFCLASS32
50 #else
51 # define ELF_CLASS ELFCLASS64
52 #endif
53 #ifdef HOST_WORDS_BIGENDIAN
54 # define ELF_DATA ELFDATA2MSB
55 #else
56 # define ELF_DATA ELFDATA2LSB
57 #endif
59 #include "elf.h"
60 #include "exec/log.h"
61 #include "sysemu/sysemu.h"
63 /* Forward declarations for functions declared in tcg-target.inc.c and
64 used here. */
65 static void tcg_target_init(TCGContext *s);
66 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
69 intptr_t value, intptr_t addend);
71 /* The CIE and FDE header definitions will be common to all hosts. */
72 typedef struct {
73 uint32_t len __attribute__((aligned((sizeof(void *)))));
74 uint32_t id;
75 uint8_t version;
76 char augmentation[1];
77 uint8_t code_align;
78 uint8_t data_align;
79 uint8_t return_column;
80 } DebugFrameCIE;
82 typedef struct QEMU_PACKED {
83 uint32_t len __attribute__((aligned((sizeof(void *)))));
84 uint32_t cie_offset;
85 uintptr_t func_start;
86 uintptr_t func_len;
87 } DebugFrameFDEHeader;
89 typedef struct QEMU_PACKED {
90 DebugFrameCIE cie;
91 DebugFrameFDEHeader fde;
92 } DebugFrameHeader;
94 static void tcg_register_jit_int(void *buf, size_t size,
95 const void *debug_frame,
96 size_t debug_frame_size)
97 __attribute__((unused));
99 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
100 static const char *target_parse_constraint(TCGArgConstraint *ct,
101 const char *ct_str, TCGType type);
102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103 intptr_t arg2);
104 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
105 static void tcg_out_movi(TCGContext *s, TCGType type,
106 TCGReg ret, tcg_target_long arg);
107 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
108 const int *const_args);
109 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
110 intptr_t arg2);
111 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
112 TCGReg base, intptr_t ofs);
113 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
114 static int tcg_target_const_match(tcg_target_long val, TCGType type,
115 const TCGArgConstraint *arg_ct);
116 #ifdef TCG_TARGET_NEED_LDST_LABELS
117 static bool tcg_out_ldst_finalize(TCGContext *s);
118 #endif
120 #define TCG_HIGHWATER 1024
122 static TCGContext **tcg_ctxs;
123 static unsigned int n_tcg_ctxs;
124 TCGv_env cpu_env = 0;
127 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
128 * dynamically allocate from as demand dictates. Given appropriate region
129 * sizing, this minimizes flushes even when some TCG threads generate a lot
130 * more code than others.
132 struct tcg_region_state {
133 QemuMutex lock;
135 /* fields set at init time */
136 void *start;
137 void *start_aligned;
138 void *end;
139 size_t n;
140 size_t size; /* size of one region */
141 size_t stride; /* .size + guard size */
143 /* fields protected by the lock */
144 size_t current; /* current region index */
145 size_t agg_size_full; /* aggregate size of full regions */
148 static struct tcg_region_state region;
150 static TCGRegSet tcg_target_available_regs[2];
151 static TCGRegSet tcg_target_call_clobber_regs;
153 #if TCG_TARGET_INSN_UNIT_SIZE == 1
154 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
156 *s->code_ptr++ = v;
159 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
160 uint8_t v)
162 *p = v;
164 #endif
166 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
167 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
169 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
170 *s->code_ptr++ = v;
171 } else {
172 tcg_insn_unit *p = s->code_ptr;
173 memcpy(p, &v, sizeof(v));
174 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
178 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
179 uint16_t v)
181 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
182 *p = v;
183 } else {
184 memcpy(p, &v, sizeof(v));
187 #endif
189 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
190 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
192 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
193 *s->code_ptr++ = v;
194 } else {
195 tcg_insn_unit *p = s->code_ptr;
196 memcpy(p, &v, sizeof(v));
197 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
201 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
202 uint32_t v)
204 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
205 *p = v;
206 } else {
207 memcpy(p, &v, sizeof(v));
210 #endif
212 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
213 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
215 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
216 *s->code_ptr++ = v;
217 } else {
218 tcg_insn_unit *p = s->code_ptr;
219 memcpy(p, &v, sizeof(v));
220 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
224 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
225 uint64_t v)
227 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
228 *p = v;
229 } else {
230 memcpy(p, &v, sizeof(v));
233 #endif
235 /* label relocation processing */
237 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
238 TCGLabel *l, intptr_t addend)
240 TCGRelocation *r;
242 if (l->has_value) {
243 /* FIXME: This may break relocations on RISC targets that
244 modify instruction fields in place. The caller may not have
245 written the initial value. */
246 patch_reloc(code_ptr, type, l->u.value, addend);
247 } else {
248 /* add a new relocation entry */
249 r = tcg_malloc(sizeof(TCGRelocation));
250 r->type = type;
251 r->ptr = code_ptr;
252 r->addend = addend;
253 r->next = l->u.first_reloc;
254 l->u.first_reloc = r;
258 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
260 intptr_t value = (intptr_t)ptr;
261 TCGRelocation *r;
263 tcg_debug_assert(!l->has_value);
265 for (r = l->u.first_reloc; r != NULL; r = r->next) {
266 patch_reloc(r->ptr, r->type, value, r->addend);
269 l->has_value = 1;
270 l->u.value_ptr = ptr;
273 TCGLabel *gen_new_label(void)
275 TCGContext *s = tcg_ctx;
276 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
278 *l = (TCGLabel){
279 .id = s->nb_labels++
282 return l;
285 #include "tcg-target.inc.c"
287 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
289 void *start, *end;
291 start = region.start_aligned + curr_region * region.stride;
292 end = start + region.size;
294 if (curr_region == 0) {
295 start = region.start;
297 if (curr_region == region.n - 1) {
298 end = region.end;
301 *pstart = start;
302 *pend = end;
305 static void tcg_region_assign(TCGContext *s, size_t curr_region)
307 void *start, *end;
309 tcg_region_bounds(curr_region, &start, &end);
311 s->code_gen_buffer = start;
312 s->code_gen_ptr = start;
313 s->code_gen_buffer_size = end - start;
314 s->code_gen_highwater = end - TCG_HIGHWATER;
317 static bool tcg_region_alloc__locked(TCGContext *s)
319 if (region.current == region.n) {
320 return true;
322 tcg_region_assign(s, region.current);
323 region.current++;
324 return false;
328 * Request a new region once the one in use has filled up.
329 * Returns true on error.
331 static bool tcg_region_alloc(TCGContext *s)
333 bool err;
334 /* read the region size now; alloc__locked will overwrite it on success */
335 size_t size_full = s->code_gen_buffer_size;
337 qemu_mutex_lock(&region.lock);
338 err = tcg_region_alloc__locked(s);
339 if (!err) {
340 region.agg_size_full += size_full - TCG_HIGHWATER;
342 qemu_mutex_unlock(&region.lock);
343 return err;
347 * Perform a context's first region allocation.
348 * This function does _not_ increment region.agg_size_full.
350 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
352 return tcg_region_alloc__locked(s);
355 /* Call from a safe-work context */
356 void tcg_region_reset_all(void)
358 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
359 unsigned int i;
361 qemu_mutex_lock(&region.lock);
362 region.current = 0;
363 region.agg_size_full = 0;
365 for (i = 0; i < n_ctxs; i++) {
366 TCGContext *s = atomic_read(&tcg_ctxs[i]);
367 bool err = tcg_region_initial_alloc__locked(s);
369 g_assert(!err);
371 qemu_mutex_unlock(&region.lock);
374 #ifdef CONFIG_USER_ONLY
375 static size_t tcg_n_regions(void)
377 return 1;
379 #else
381 * It is likely that some vCPUs will translate more code than others, so we
382 * first try to set more regions than max_cpus, with those regions being of
383 * reasonable size. If that's not possible we make do by evenly dividing
384 * the code_gen_buffer among the vCPUs.
386 static size_t tcg_n_regions(void)
388 size_t i;
390 /* Use a single region if all we have is one vCPU thread */
391 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
392 return 1;
395 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
396 for (i = 8; i > 0; i--) {
397 size_t regions_per_thread = i;
398 size_t region_size;
400 region_size = tcg_init_ctx.code_gen_buffer_size;
401 region_size /= max_cpus * regions_per_thread;
403 if (region_size >= 2 * 1024u * 1024) {
404 return max_cpus * regions_per_thread;
407 /* If we can't, then just allocate one region per vCPU thread */
408 return max_cpus;
410 #endif
413 * Initializes region partitioning.
415 * Called at init time from the parent thread (i.e. the one calling
416 * tcg_context_init), after the target's TCG globals have been set.
418 * Region partitioning works by splitting code_gen_buffer into separate regions,
419 * and then assigning regions to TCG threads so that the threads can translate
420 * code in parallel without synchronization.
422 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
423 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
424 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
425 * must have been parsed before calling this function, since it calls
426 * qemu_tcg_mttcg_enabled().
428 * In user-mode we use a single region. Having multiple regions in user-mode
429 * is not supported, because the number of vCPU threads (recall that each thread
430 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
431 * OS, and usually this number is huge (tens of thousands is not uncommon).
432 * Thus, given this large bound on the number of vCPU threads and the fact
433 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
434 * that the availability of at least one region per vCPU thread.
436 * However, this user-mode limitation is unlikely to be a significant problem
437 * in practice. Multi-threaded guests share most if not all of their translated
438 * code, which makes parallel code generation less appealing than in softmmu.
440 void tcg_region_init(void)
442 void *buf = tcg_init_ctx.code_gen_buffer;
443 void *aligned;
444 size_t size = tcg_init_ctx.code_gen_buffer_size;
445 size_t page_size = qemu_real_host_page_size;
446 size_t region_size;
447 size_t n_regions;
448 size_t i;
450 n_regions = tcg_n_regions();
452 /* The first region will be 'aligned - buf' bytes larger than the others */
453 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
454 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
456 * Make region_size a multiple of page_size, using aligned as the start.
457 * As a result of this we might end up with a few extra pages at the end of
458 * the buffer; we will assign those to the last region.
460 region_size = (size - (aligned - buf)) / n_regions;
461 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
463 /* A region must have at least 2 pages; one code, one guard */
464 g_assert(region_size >= 2 * page_size);
466 /* init the region struct */
467 qemu_mutex_init(&region.lock);
468 region.n = n_regions;
469 region.size = region_size - page_size;
470 region.stride = region_size;
471 region.start = buf;
472 region.start_aligned = aligned;
473 /* page-align the end, since its last page will be a guard page */
474 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
475 /* account for that last guard page */
476 region.end -= page_size;
478 /* set guard pages */
479 for (i = 0; i < region.n; i++) {
480 void *start, *end;
481 int rc;
483 tcg_region_bounds(i, &start, &end);
484 rc = qemu_mprotect_none(end, page_size);
485 g_assert(!rc);
488 /* In user-mode we support only one ctx, so do the initial allocation now */
489 #ifdef CONFIG_USER_ONLY
491 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
493 g_assert(!err);
495 #endif
499 * All TCG threads except the parent (i.e. the one that called tcg_context_init
500 * and registered the target's TCG globals) must register with this function
501 * before initiating translation.
503 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
504 * of tcg_region_init() for the reasoning behind this.
506 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
507 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
508 * is not used anymore for translation once this function is called.
510 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
511 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
513 #ifdef CONFIG_USER_ONLY
514 void tcg_register_thread(void)
516 tcg_ctx = &tcg_init_ctx;
518 #else
519 void tcg_register_thread(void)
521 TCGContext *s = g_malloc(sizeof(*s));
522 unsigned int i, n;
523 bool err;
525 *s = tcg_init_ctx;
527 /* Relink mem_base. */
528 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
529 if (tcg_init_ctx.temps[i].mem_base) {
530 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
531 tcg_debug_assert(b >= 0 && b < n);
532 s->temps[i].mem_base = &s->temps[b];
536 /* Claim an entry in tcg_ctxs */
537 n = atomic_fetch_inc(&n_tcg_ctxs);
538 g_assert(n < max_cpus);
539 atomic_set(&tcg_ctxs[n], s);
541 tcg_ctx = s;
542 qemu_mutex_lock(&region.lock);
543 err = tcg_region_initial_alloc__locked(tcg_ctx);
544 g_assert(!err);
545 qemu_mutex_unlock(&region.lock);
547 #endif /* !CONFIG_USER_ONLY */
550 * Returns the size (in bytes) of all translated code (i.e. from all regions)
551 * currently in the cache.
552 * See also: tcg_code_capacity()
553 * Do not confuse with tcg_current_code_size(); that one applies to a single
554 * TCG context.
556 size_t tcg_code_size(void)
558 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
559 unsigned int i;
560 size_t total;
562 qemu_mutex_lock(&region.lock);
563 total = region.agg_size_full;
564 for (i = 0; i < n_ctxs; i++) {
565 const TCGContext *s = atomic_read(&tcg_ctxs[i]);
566 size_t size;
568 size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
569 g_assert(size <= s->code_gen_buffer_size);
570 total += size;
572 qemu_mutex_unlock(&region.lock);
573 return total;
577 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
578 * regions.
579 * See also: tcg_code_size()
581 size_t tcg_code_capacity(void)
583 size_t guard_size, capacity;
585 /* no need for synchronization; these variables are set at init time */
586 guard_size = region.stride - region.size;
587 capacity = region.end + guard_size - region.start;
588 capacity -= region.n * (guard_size + TCG_HIGHWATER);
589 return capacity;
592 /* pool based memory allocation */
593 void *tcg_malloc_internal(TCGContext *s, int size)
595 TCGPool *p;
596 int pool_size;
598 if (size > TCG_POOL_CHUNK_SIZE) {
599 /* big malloc: insert a new pool (XXX: could optimize) */
600 p = g_malloc(sizeof(TCGPool) + size);
601 p->size = size;
602 p->next = s->pool_first_large;
603 s->pool_first_large = p;
604 return p->data;
605 } else {
606 p = s->pool_current;
607 if (!p) {
608 p = s->pool_first;
609 if (!p)
610 goto new_pool;
611 } else {
612 if (!p->next) {
613 new_pool:
614 pool_size = TCG_POOL_CHUNK_SIZE;
615 p = g_malloc(sizeof(TCGPool) + pool_size);
616 p->size = pool_size;
617 p->next = NULL;
618 if (s->pool_current)
619 s->pool_current->next = p;
620 else
621 s->pool_first = p;
622 } else {
623 p = p->next;
627 s->pool_current = p;
628 s->pool_cur = p->data + size;
629 s->pool_end = p->data + p->size;
630 return p->data;
633 void tcg_pool_reset(TCGContext *s)
635 TCGPool *p, *t;
636 for (p = s->pool_first_large; p; p = t) {
637 t = p->next;
638 g_free(p);
640 s->pool_first_large = NULL;
641 s->pool_cur = s->pool_end = NULL;
642 s->pool_current = NULL;
645 typedef struct TCGHelperInfo {
646 void *func;
647 const char *name;
648 unsigned flags;
649 unsigned sizemask;
650 } TCGHelperInfo;
652 #include "exec/helper-proto.h"
654 static const TCGHelperInfo all_helpers[] = {
655 #include "exec/helper-tcg.h"
657 static GHashTable *helper_table;
659 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
660 static void process_op_defs(TCGContext *s);
661 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
662 TCGReg reg, const char *name);
664 void tcg_context_init(TCGContext *s)
666 int op, total_args, n, i;
667 TCGOpDef *def;
668 TCGArgConstraint *args_ct;
669 int *sorted_args;
670 TCGTemp *ts;
672 memset(s, 0, sizeof(*s));
673 s->nb_globals = 0;
675 /* Count total number of arguments and allocate the corresponding
676 space */
677 total_args = 0;
678 for(op = 0; op < NB_OPS; op++) {
679 def = &tcg_op_defs[op];
680 n = def->nb_iargs + def->nb_oargs;
681 total_args += n;
684 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
685 sorted_args = g_malloc(sizeof(int) * total_args);
687 for(op = 0; op < NB_OPS; op++) {
688 def = &tcg_op_defs[op];
689 def->args_ct = args_ct;
690 def->sorted_args = sorted_args;
691 n = def->nb_iargs + def->nb_oargs;
692 sorted_args += n;
693 args_ct += n;
696 /* Register helpers. */
697 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
698 helper_table = g_hash_table_new(NULL, NULL);
700 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
701 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
702 (gpointer)&all_helpers[i]);
705 tcg_target_init(s);
706 process_op_defs(s);
708 /* Reverse the order of the saved registers, assuming they're all at
709 the start of tcg_target_reg_alloc_order. */
710 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
711 int r = tcg_target_reg_alloc_order[n];
712 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
713 break;
716 for (i = 0; i < n; ++i) {
717 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
719 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
720 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
723 tcg_ctx = s;
725 * In user-mode we simply share the init context among threads, since we
726 * use a single region. See the documentation tcg_region_init() for the
727 * reasoning behind this.
728 * In softmmu we will have at most max_cpus TCG threads.
730 #ifdef CONFIG_USER_ONLY
731 tcg_ctxs = &tcg_ctx;
732 n_tcg_ctxs = 1;
733 #else
734 tcg_ctxs = g_new(TCGContext *, max_cpus);
735 #endif
737 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
738 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
739 cpu_env = temp_tcgv_ptr(ts);
743 * Allocate TBs right before their corresponding translated code, making
744 * sure that TBs and code are on different cache lines.
746 TranslationBlock *tcg_tb_alloc(TCGContext *s)
748 uintptr_t align = qemu_icache_linesize;
749 TranslationBlock *tb;
750 void *next;
752 retry:
753 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
754 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
756 if (unlikely(next > s->code_gen_highwater)) {
757 if (tcg_region_alloc(s)) {
758 return NULL;
760 goto retry;
762 atomic_set(&s->code_gen_ptr, next);
763 s->data_gen_ptr = NULL;
764 return tb;
767 void tcg_prologue_init(TCGContext *s)
769 size_t prologue_size, total_size;
770 void *buf0, *buf1;
772 /* Put the prologue at the beginning of code_gen_buffer. */
773 buf0 = s->code_gen_buffer;
774 total_size = s->code_gen_buffer_size;
775 s->code_ptr = buf0;
776 s->code_buf = buf0;
777 s->data_gen_ptr = NULL;
778 s->code_gen_prologue = buf0;
780 /* Compute a high-water mark, at which we voluntarily flush the buffer
781 and start over. The size here is arbitrary, significantly larger
782 than we expect the code generation for any one opcode to require. */
783 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
785 #ifdef TCG_TARGET_NEED_POOL_LABELS
786 s->pool_labels = NULL;
787 #endif
789 /* Generate the prologue. */
790 tcg_target_qemu_prologue(s);
792 #ifdef TCG_TARGET_NEED_POOL_LABELS
793 /* Allow the prologue to put e.g. guest_base into a pool entry. */
795 bool ok = tcg_out_pool_finalize(s);
796 tcg_debug_assert(ok);
798 #endif
800 buf1 = s->code_ptr;
801 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
803 /* Deduct the prologue from the buffer. */
804 prologue_size = tcg_current_code_size(s);
805 s->code_gen_ptr = buf1;
806 s->code_gen_buffer = buf1;
807 s->code_buf = buf1;
808 total_size -= prologue_size;
809 s->code_gen_buffer_size = total_size;
811 tcg_register_jit(s->code_gen_buffer, total_size);
813 #ifdef DEBUG_DISAS
814 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
815 qemu_log_lock();
816 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
817 if (s->data_gen_ptr) {
818 size_t code_size = s->data_gen_ptr - buf0;
819 size_t data_size = prologue_size - code_size;
820 size_t i;
822 log_disas(buf0, code_size);
824 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
825 if (sizeof(tcg_target_ulong) == 8) {
826 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
827 (uintptr_t)s->data_gen_ptr + i,
828 *(uint64_t *)(s->data_gen_ptr + i));
829 } else {
830 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
831 (uintptr_t)s->data_gen_ptr + i,
832 *(uint32_t *)(s->data_gen_ptr + i));
835 } else {
836 log_disas(buf0, prologue_size);
838 qemu_log("\n");
839 qemu_log_flush();
840 qemu_log_unlock();
842 #endif
844 /* Assert that goto_ptr is implemented completely. */
845 if (TCG_TARGET_HAS_goto_ptr) {
846 tcg_debug_assert(s->code_gen_epilogue != NULL);
850 void tcg_func_start(TCGContext *s)
852 tcg_pool_reset(s);
853 s->nb_temps = s->nb_globals;
855 /* No temps have been previously allocated for size or locality. */
856 memset(s->free_temps, 0, sizeof(s->free_temps));
858 s->nb_labels = 0;
859 s->current_frame_offset = s->frame_start;
861 #ifdef CONFIG_DEBUG_TCG
862 s->goto_tb_issue_mask = 0;
863 #endif
865 s->gen_op_buf[0].next = 1;
866 s->gen_op_buf[0].prev = 0;
867 s->gen_next_op_idx = 1;
870 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
872 int n = s->nb_temps++;
873 tcg_debug_assert(n < TCG_MAX_TEMPS);
874 return memset(&s->temps[n], 0, sizeof(TCGTemp));
877 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
879 TCGTemp *ts;
881 tcg_debug_assert(s->nb_globals == s->nb_temps);
882 s->nb_globals++;
883 ts = tcg_temp_alloc(s);
884 ts->temp_global = 1;
886 return ts;
889 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
890 TCGReg reg, const char *name)
892 TCGTemp *ts;
894 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
895 tcg_abort();
898 ts = tcg_global_alloc(s);
899 ts->base_type = type;
900 ts->type = type;
901 ts->fixed_reg = 1;
902 ts->reg = reg;
903 ts->name = name;
904 tcg_regset_set_reg(s->reserved_regs, reg);
906 return ts;
909 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
911 s->frame_start = start;
912 s->frame_end = start + size;
913 s->frame_temp
914 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
917 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
918 intptr_t offset, const char *name)
920 TCGContext *s = tcg_ctx;
921 TCGTemp *base_ts = tcgv_ptr_temp(base);
922 TCGTemp *ts = tcg_global_alloc(s);
923 int indirect_reg = 0, bigendian = 0;
924 #ifdef HOST_WORDS_BIGENDIAN
925 bigendian = 1;
926 #endif
928 if (!base_ts->fixed_reg) {
929 /* We do not support double-indirect registers. */
930 tcg_debug_assert(!base_ts->indirect_reg);
931 base_ts->indirect_base = 1;
932 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
933 ? 2 : 1);
934 indirect_reg = 1;
937 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
938 TCGTemp *ts2 = tcg_global_alloc(s);
939 char buf[64];
941 ts->base_type = TCG_TYPE_I64;
942 ts->type = TCG_TYPE_I32;
943 ts->indirect_reg = indirect_reg;
944 ts->mem_allocated = 1;
945 ts->mem_base = base_ts;
946 ts->mem_offset = offset + bigendian * 4;
947 pstrcpy(buf, sizeof(buf), name);
948 pstrcat(buf, sizeof(buf), "_0");
949 ts->name = strdup(buf);
951 tcg_debug_assert(ts2 == ts + 1);
952 ts2->base_type = TCG_TYPE_I64;
953 ts2->type = TCG_TYPE_I32;
954 ts2->indirect_reg = indirect_reg;
955 ts2->mem_allocated = 1;
956 ts2->mem_base = base_ts;
957 ts2->mem_offset = offset + (1 - bigendian) * 4;
958 pstrcpy(buf, sizeof(buf), name);
959 pstrcat(buf, sizeof(buf), "_1");
960 ts2->name = strdup(buf);
961 } else {
962 ts->base_type = type;
963 ts->type = type;
964 ts->indirect_reg = indirect_reg;
965 ts->mem_allocated = 1;
966 ts->mem_base = base_ts;
967 ts->mem_offset = offset;
968 ts->name = name;
970 return ts;
973 static TCGTemp *tcg_temp_new_internal(TCGType type, int temp_local)
975 TCGContext *s = tcg_ctx;
976 TCGTemp *ts;
977 int idx, k;
979 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
980 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
981 if (idx < TCG_MAX_TEMPS) {
982 /* There is already an available temp with the right type. */
983 clear_bit(idx, s->free_temps[k].l);
985 ts = &s->temps[idx];
986 ts->temp_allocated = 1;
987 tcg_debug_assert(ts->base_type == type);
988 tcg_debug_assert(ts->temp_local == temp_local);
989 } else {
990 ts = tcg_temp_alloc(s);
991 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
992 TCGTemp *ts2 = tcg_temp_alloc(s);
994 ts->base_type = type;
995 ts->type = TCG_TYPE_I32;
996 ts->temp_allocated = 1;
997 ts->temp_local = temp_local;
999 tcg_debug_assert(ts2 == ts + 1);
1000 ts2->base_type = TCG_TYPE_I64;
1001 ts2->type = TCG_TYPE_I32;
1002 ts2->temp_allocated = 1;
1003 ts2->temp_local = temp_local;
1004 } else {
1005 ts->base_type = type;
1006 ts->type = type;
1007 ts->temp_allocated = 1;
1008 ts->temp_local = temp_local;
1012 #if defined(CONFIG_DEBUG_TCG)
1013 s->temps_in_use++;
1014 #endif
1015 return ts;
1018 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
1020 TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
1021 return temp_tcgv_i32(t);
1024 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
1026 TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
1027 return temp_tcgv_i64(t);
1030 static void tcg_temp_free_internal(TCGTemp *ts)
1032 TCGContext *s = tcg_ctx;
1033 int k, idx;
1035 #if defined(CONFIG_DEBUG_TCG)
1036 s->temps_in_use--;
1037 if (s->temps_in_use < 0) {
1038 fprintf(stderr, "More temporaries freed than allocated!\n");
1040 #endif
1042 tcg_debug_assert(ts->temp_global == 0);
1043 tcg_debug_assert(ts->temp_allocated != 0);
1044 ts->temp_allocated = 0;
1046 idx = temp_idx(ts);
1047 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1048 set_bit(idx, s->free_temps[k].l);
1051 void tcg_temp_free_i32(TCGv_i32 arg)
1053 tcg_temp_free_internal(tcgv_i32_temp(arg));
1056 void tcg_temp_free_i64(TCGv_i64 arg)
1058 tcg_temp_free_internal(tcgv_i64_temp(arg));
1061 TCGv_i32 tcg_const_i32(int32_t val)
1063 TCGv_i32 t0;
1064 t0 = tcg_temp_new_i32();
1065 tcg_gen_movi_i32(t0, val);
1066 return t0;
1069 TCGv_i64 tcg_const_i64(int64_t val)
1071 TCGv_i64 t0;
1072 t0 = tcg_temp_new_i64();
1073 tcg_gen_movi_i64(t0, val);
1074 return t0;
1077 TCGv_i32 tcg_const_local_i32(int32_t val)
1079 TCGv_i32 t0;
1080 t0 = tcg_temp_local_new_i32();
1081 tcg_gen_movi_i32(t0, val);
1082 return t0;
1085 TCGv_i64 tcg_const_local_i64(int64_t val)
1087 TCGv_i64 t0;
1088 t0 = tcg_temp_local_new_i64();
1089 tcg_gen_movi_i64(t0, val);
1090 return t0;
1093 #if defined(CONFIG_DEBUG_TCG)
1094 void tcg_clear_temp_count(void)
1096 TCGContext *s = tcg_ctx;
1097 s->temps_in_use = 0;
1100 int tcg_check_temp_count(void)
1102 TCGContext *s = tcg_ctx;
1103 if (s->temps_in_use) {
1104 /* Clear the count so that we don't give another
1105 * warning immediately next time around.
1107 s->temps_in_use = 0;
1108 return 1;
1110 return 0;
1112 #endif
1114 /* Return true if OP may appear in the opcode stream.
1115 Test the runtime variable that controls each opcode. */
1116 bool tcg_op_supported(TCGOpcode op)
1118 switch (op) {
1119 case INDEX_op_discard:
1120 case INDEX_op_set_label:
1121 case INDEX_op_call:
1122 case INDEX_op_br:
1123 case INDEX_op_mb:
1124 case INDEX_op_insn_start:
1125 case INDEX_op_exit_tb:
1126 case INDEX_op_goto_tb:
1127 case INDEX_op_qemu_ld_i32:
1128 case INDEX_op_qemu_st_i32:
1129 case INDEX_op_qemu_ld_i64:
1130 case INDEX_op_qemu_st_i64:
1131 return true;
1133 case INDEX_op_goto_ptr:
1134 return TCG_TARGET_HAS_goto_ptr;
1136 case INDEX_op_mov_i32:
1137 case INDEX_op_movi_i32:
1138 case INDEX_op_setcond_i32:
1139 case INDEX_op_brcond_i32:
1140 case INDEX_op_ld8u_i32:
1141 case INDEX_op_ld8s_i32:
1142 case INDEX_op_ld16u_i32:
1143 case INDEX_op_ld16s_i32:
1144 case INDEX_op_ld_i32:
1145 case INDEX_op_st8_i32:
1146 case INDEX_op_st16_i32:
1147 case INDEX_op_st_i32:
1148 case INDEX_op_add_i32:
1149 case INDEX_op_sub_i32:
1150 case INDEX_op_mul_i32:
1151 case INDEX_op_and_i32:
1152 case INDEX_op_or_i32:
1153 case INDEX_op_xor_i32:
1154 case INDEX_op_shl_i32:
1155 case INDEX_op_shr_i32:
1156 case INDEX_op_sar_i32:
1157 return true;
1159 case INDEX_op_movcond_i32:
1160 return TCG_TARGET_HAS_movcond_i32;
1161 case INDEX_op_div_i32:
1162 case INDEX_op_divu_i32:
1163 return TCG_TARGET_HAS_div_i32;
1164 case INDEX_op_rem_i32:
1165 case INDEX_op_remu_i32:
1166 return TCG_TARGET_HAS_rem_i32;
1167 case INDEX_op_div2_i32:
1168 case INDEX_op_divu2_i32:
1169 return TCG_TARGET_HAS_div2_i32;
1170 case INDEX_op_rotl_i32:
1171 case INDEX_op_rotr_i32:
1172 return TCG_TARGET_HAS_rot_i32;
1173 case INDEX_op_deposit_i32:
1174 return TCG_TARGET_HAS_deposit_i32;
1175 case INDEX_op_extract_i32:
1176 return TCG_TARGET_HAS_extract_i32;
1177 case INDEX_op_sextract_i32:
1178 return TCG_TARGET_HAS_sextract_i32;
1179 case INDEX_op_add2_i32:
1180 return TCG_TARGET_HAS_add2_i32;
1181 case INDEX_op_sub2_i32:
1182 return TCG_TARGET_HAS_sub2_i32;
1183 case INDEX_op_mulu2_i32:
1184 return TCG_TARGET_HAS_mulu2_i32;
1185 case INDEX_op_muls2_i32:
1186 return TCG_TARGET_HAS_muls2_i32;
1187 case INDEX_op_muluh_i32:
1188 return TCG_TARGET_HAS_muluh_i32;
1189 case INDEX_op_mulsh_i32:
1190 return TCG_TARGET_HAS_mulsh_i32;
1191 case INDEX_op_ext8s_i32:
1192 return TCG_TARGET_HAS_ext8s_i32;
1193 case INDEX_op_ext16s_i32:
1194 return TCG_TARGET_HAS_ext16s_i32;
1195 case INDEX_op_ext8u_i32:
1196 return TCG_TARGET_HAS_ext8u_i32;
1197 case INDEX_op_ext16u_i32:
1198 return TCG_TARGET_HAS_ext16u_i32;
1199 case INDEX_op_bswap16_i32:
1200 return TCG_TARGET_HAS_bswap16_i32;
1201 case INDEX_op_bswap32_i32:
1202 return TCG_TARGET_HAS_bswap32_i32;
1203 case INDEX_op_not_i32:
1204 return TCG_TARGET_HAS_not_i32;
1205 case INDEX_op_neg_i32:
1206 return TCG_TARGET_HAS_neg_i32;
1207 case INDEX_op_andc_i32:
1208 return TCG_TARGET_HAS_andc_i32;
1209 case INDEX_op_orc_i32:
1210 return TCG_TARGET_HAS_orc_i32;
1211 case INDEX_op_eqv_i32:
1212 return TCG_TARGET_HAS_eqv_i32;
1213 case INDEX_op_nand_i32:
1214 return TCG_TARGET_HAS_nand_i32;
1215 case INDEX_op_nor_i32:
1216 return TCG_TARGET_HAS_nor_i32;
1217 case INDEX_op_clz_i32:
1218 return TCG_TARGET_HAS_clz_i32;
1219 case INDEX_op_ctz_i32:
1220 return TCG_TARGET_HAS_ctz_i32;
1221 case INDEX_op_ctpop_i32:
1222 return TCG_TARGET_HAS_ctpop_i32;
1224 case INDEX_op_brcond2_i32:
1225 case INDEX_op_setcond2_i32:
1226 return TCG_TARGET_REG_BITS == 32;
1228 case INDEX_op_mov_i64:
1229 case INDEX_op_movi_i64:
1230 case INDEX_op_setcond_i64:
1231 case INDEX_op_brcond_i64:
1232 case INDEX_op_ld8u_i64:
1233 case INDEX_op_ld8s_i64:
1234 case INDEX_op_ld16u_i64:
1235 case INDEX_op_ld16s_i64:
1236 case INDEX_op_ld32u_i64:
1237 case INDEX_op_ld32s_i64:
1238 case INDEX_op_ld_i64:
1239 case INDEX_op_st8_i64:
1240 case INDEX_op_st16_i64:
1241 case INDEX_op_st32_i64:
1242 case INDEX_op_st_i64:
1243 case INDEX_op_add_i64:
1244 case INDEX_op_sub_i64:
1245 case INDEX_op_mul_i64:
1246 case INDEX_op_and_i64:
1247 case INDEX_op_or_i64:
1248 case INDEX_op_xor_i64:
1249 case INDEX_op_shl_i64:
1250 case INDEX_op_shr_i64:
1251 case INDEX_op_sar_i64:
1252 case INDEX_op_ext_i32_i64:
1253 case INDEX_op_extu_i32_i64:
1254 return TCG_TARGET_REG_BITS == 64;
1256 case INDEX_op_movcond_i64:
1257 return TCG_TARGET_HAS_movcond_i64;
1258 case INDEX_op_div_i64:
1259 case INDEX_op_divu_i64:
1260 return TCG_TARGET_HAS_div_i64;
1261 case INDEX_op_rem_i64:
1262 case INDEX_op_remu_i64:
1263 return TCG_TARGET_HAS_rem_i64;
1264 case INDEX_op_div2_i64:
1265 case INDEX_op_divu2_i64:
1266 return TCG_TARGET_HAS_div2_i64;
1267 case INDEX_op_rotl_i64:
1268 case INDEX_op_rotr_i64:
1269 return TCG_TARGET_HAS_rot_i64;
1270 case INDEX_op_deposit_i64:
1271 return TCG_TARGET_HAS_deposit_i64;
1272 case INDEX_op_extract_i64:
1273 return TCG_TARGET_HAS_extract_i64;
1274 case INDEX_op_sextract_i64:
1275 return TCG_TARGET_HAS_sextract_i64;
1276 case INDEX_op_extrl_i64_i32:
1277 return TCG_TARGET_HAS_extrl_i64_i32;
1278 case INDEX_op_extrh_i64_i32:
1279 return TCG_TARGET_HAS_extrh_i64_i32;
1280 case INDEX_op_ext8s_i64:
1281 return TCG_TARGET_HAS_ext8s_i64;
1282 case INDEX_op_ext16s_i64:
1283 return TCG_TARGET_HAS_ext16s_i64;
1284 case INDEX_op_ext32s_i64:
1285 return TCG_TARGET_HAS_ext32s_i64;
1286 case INDEX_op_ext8u_i64:
1287 return TCG_TARGET_HAS_ext8u_i64;
1288 case INDEX_op_ext16u_i64:
1289 return TCG_TARGET_HAS_ext16u_i64;
1290 case INDEX_op_ext32u_i64:
1291 return TCG_TARGET_HAS_ext32u_i64;
1292 case INDEX_op_bswap16_i64:
1293 return TCG_TARGET_HAS_bswap16_i64;
1294 case INDEX_op_bswap32_i64:
1295 return TCG_TARGET_HAS_bswap32_i64;
1296 case INDEX_op_bswap64_i64:
1297 return TCG_TARGET_HAS_bswap64_i64;
1298 case INDEX_op_not_i64:
1299 return TCG_TARGET_HAS_not_i64;
1300 case INDEX_op_neg_i64:
1301 return TCG_TARGET_HAS_neg_i64;
1302 case INDEX_op_andc_i64:
1303 return TCG_TARGET_HAS_andc_i64;
1304 case INDEX_op_orc_i64:
1305 return TCG_TARGET_HAS_orc_i64;
1306 case INDEX_op_eqv_i64:
1307 return TCG_TARGET_HAS_eqv_i64;
1308 case INDEX_op_nand_i64:
1309 return TCG_TARGET_HAS_nand_i64;
1310 case INDEX_op_nor_i64:
1311 return TCG_TARGET_HAS_nor_i64;
1312 case INDEX_op_clz_i64:
1313 return TCG_TARGET_HAS_clz_i64;
1314 case INDEX_op_ctz_i64:
1315 return TCG_TARGET_HAS_ctz_i64;
1316 case INDEX_op_ctpop_i64:
1317 return TCG_TARGET_HAS_ctpop_i64;
1318 case INDEX_op_add2_i64:
1319 return TCG_TARGET_HAS_add2_i64;
1320 case INDEX_op_sub2_i64:
1321 return TCG_TARGET_HAS_sub2_i64;
1322 case INDEX_op_mulu2_i64:
1323 return TCG_TARGET_HAS_mulu2_i64;
1324 case INDEX_op_muls2_i64:
1325 return TCG_TARGET_HAS_muls2_i64;
1326 case INDEX_op_muluh_i64:
1327 return TCG_TARGET_HAS_muluh_i64;
1328 case INDEX_op_mulsh_i64:
1329 return TCG_TARGET_HAS_mulsh_i64;
1331 case NB_OPS:
1332 break;
1334 g_assert_not_reached();
1337 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1338 and endian swap. Maybe it would be better to do the alignment
1339 and endian swap in tcg_reg_alloc_call(). */
1340 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1342 TCGContext *s = tcg_ctx;
1343 int i, real_args, nb_rets, pi;
1344 unsigned sizemask, flags;
1345 TCGHelperInfo *info;
1346 TCGOp *op;
1348 info = g_hash_table_lookup(helper_table, (gpointer)func);
1349 flags = info->flags;
1350 sizemask = info->sizemask;
1352 #if defined(__sparc__) && !defined(__arch64__) \
1353 && !defined(CONFIG_TCG_INTERPRETER)
1354 /* We have 64-bit values in one register, but need to pass as two
1355 separate parameters. Split them. */
1356 int orig_sizemask = sizemask;
1357 int orig_nargs = nargs;
1358 TCGv_i64 retl, reth;
1359 TCGTemp *split_args[MAX_OPC_PARAM];
1361 TCGV_UNUSED_I64(retl);
1362 TCGV_UNUSED_I64(reth);
1363 if (sizemask != 0) {
1364 for (i = real_args = 0; i < nargs; ++i) {
1365 int is_64bit = sizemask & (1 << (i+1)*2);
1366 if (is_64bit) {
1367 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1368 TCGv_i32 h = tcg_temp_new_i32();
1369 TCGv_i32 l = tcg_temp_new_i32();
1370 tcg_gen_extr_i64_i32(l, h, orig);
1371 split_args[real_args++] = tcgv_i32_temp(h);
1372 split_args[real_args++] = tcgv_i32_temp(l);
1373 } else {
1374 split_args[real_args++] = args[i];
1377 nargs = real_args;
1378 args = split_args;
1379 sizemask = 0;
1381 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1382 for (i = 0; i < nargs; ++i) {
1383 int is_64bit = sizemask & (1 << (i+1)*2);
1384 int is_signed = sizemask & (2 << (i+1)*2);
1385 if (!is_64bit) {
1386 TCGv_i64 temp = tcg_temp_new_i64();
1387 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1388 if (is_signed) {
1389 tcg_gen_ext32s_i64(temp, orig);
1390 } else {
1391 tcg_gen_ext32u_i64(temp, orig);
1393 args[i] = tcgv_i64_temp(temp);
1396 #endif /* TCG_TARGET_EXTEND_ARGS */
1398 i = s->gen_next_op_idx;
1399 tcg_debug_assert(i < OPC_BUF_SIZE);
1400 s->gen_op_buf[0].prev = i;
1401 s->gen_next_op_idx = i + 1;
1402 op = &s->gen_op_buf[i];
1404 /* Set links for sequential allocation during translation. */
1405 memset(op, 0, offsetof(TCGOp, args));
1406 op->opc = INDEX_op_call;
1407 op->prev = i - 1;
1408 op->next = i + 1;
1410 pi = 0;
1411 if (ret != NULL) {
1412 #if defined(__sparc__) && !defined(__arch64__) \
1413 && !defined(CONFIG_TCG_INTERPRETER)
1414 if (orig_sizemask & 1) {
1415 /* The 32-bit ABI is going to return the 64-bit value in
1416 the %o0/%o1 register pair. Prepare for this by using
1417 two return temporaries, and reassemble below. */
1418 retl = tcg_temp_new_i64();
1419 reth = tcg_temp_new_i64();
1420 op->args[pi++] = tcgv_i64_arg(reth);
1421 op->args[pi++] = tcgv_i64_arg(retl);
1422 nb_rets = 2;
1423 } else {
1424 op->args[pi++] = temp_arg(ret);
1425 nb_rets = 1;
1427 #else
1428 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1429 #ifdef HOST_WORDS_BIGENDIAN
1430 op->args[pi++] = temp_arg(ret + 1);
1431 op->args[pi++] = temp_arg(ret);
1432 #else
1433 op->args[pi++] = temp_arg(ret);
1434 op->args[pi++] = temp_arg(ret + 1);
1435 #endif
1436 nb_rets = 2;
1437 } else {
1438 op->args[pi++] = temp_arg(ret);
1439 nb_rets = 1;
1441 #endif
1442 } else {
1443 nb_rets = 0;
1445 op->callo = nb_rets;
1447 real_args = 0;
1448 for (i = 0; i < nargs; i++) {
1449 int is_64bit = sizemask & (1 << (i+1)*2);
1450 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1451 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1452 /* some targets want aligned 64 bit args */
1453 if (real_args & 1) {
1454 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1455 real_args++;
1457 #endif
1458 /* If stack grows up, then we will be placing successive
1459 arguments at lower addresses, which means we need to
1460 reverse the order compared to how we would normally
1461 treat either big or little-endian. For those arguments
1462 that will wind up in registers, this still works for
1463 HPPA (the only current STACK_GROWSUP target) since the
1464 argument registers are *also* allocated in decreasing
1465 order. If another such target is added, this logic may
1466 have to get more complicated to differentiate between
1467 stack arguments and register arguments. */
1468 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1469 op->args[pi++] = temp_arg(args[i] + 1);
1470 op->args[pi++] = temp_arg(args[i]);
1471 #else
1472 op->args[pi++] = temp_arg(args[i]);
1473 op->args[pi++] = temp_arg(args[i] + 1);
1474 #endif
1475 real_args += 2;
1476 continue;
1479 op->args[pi++] = temp_arg(args[i]);
1480 real_args++;
1482 op->args[pi++] = (uintptr_t)func;
1483 op->args[pi++] = flags;
1484 op->calli = real_args;
1486 /* Make sure the fields didn't overflow. */
1487 tcg_debug_assert(op->calli == real_args);
1488 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1490 #if defined(__sparc__) && !defined(__arch64__) \
1491 && !defined(CONFIG_TCG_INTERPRETER)
1492 /* Free all of the parts we allocated above. */
1493 for (i = real_args = 0; i < orig_nargs; ++i) {
1494 int is_64bit = orig_sizemask & (1 << (i+1)*2);
1495 if (is_64bit) {
1496 tcg_temp_free_internal(args[real_args++]);
1497 tcg_temp_free_internal(args[real_args++]);
1498 } else {
1499 real_args++;
1502 if (orig_sizemask & 1) {
1503 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1504 Note that describing these as TCGv_i64 eliminates an unnecessary
1505 zero-extension that tcg_gen_concat_i32_i64 would create. */
1506 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1507 tcg_temp_free_i64(retl);
1508 tcg_temp_free_i64(reth);
1510 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1511 for (i = 0; i < nargs; ++i) {
1512 int is_64bit = sizemask & (1 << (i+1)*2);
1513 if (!is_64bit) {
1514 tcg_temp_free_internal(args[i]);
1517 #endif /* TCG_TARGET_EXTEND_ARGS */
1520 static void tcg_reg_alloc_start(TCGContext *s)
1522 int i, n;
1523 TCGTemp *ts;
1525 for (i = 0, n = s->nb_globals; i < n; i++) {
1526 ts = &s->temps[i];
1527 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1529 for (n = s->nb_temps; i < n; i++) {
1530 ts = &s->temps[i];
1531 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1532 ts->mem_allocated = 0;
1533 ts->fixed_reg = 0;
1536 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1539 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1540 TCGTemp *ts)
1542 int idx = temp_idx(ts);
1544 if (ts->temp_global) {
1545 pstrcpy(buf, buf_size, ts->name);
1546 } else if (ts->temp_local) {
1547 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1548 } else {
1549 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1551 return buf;
1554 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1555 int buf_size, TCGArg arg)
1557 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1560 /* Find helper name. */
1561 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1563 const char *ret = NULL;
1564 if (helper_table) {
1565 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1566 if (info) {
1567 ret = info->name;
1570 return ret;
1573 static const char * const cond_name[] =
1575 [TCG_COND_NEVER] = "never",
1576 [TCG_COND_ALWAYS] = "always",
1577 [TCG_COND_EQ] = "eq",
1578 [TCG_COND_NE] = "ne",
1579 [TCG_COND_LT] = "lt",
1580 [TCG_COND_GE] = "ge",
1581 [TCG_COND_LE] = "le",
1582 [TCG_COND_GT] = "gt",
1583 [TCG_COND_LTU] = "ltu",
1584 [TCG_COND_GEU] = "geu",
1585 [TCG_COND_LEU] = "leu",
1586 [TCG_COND_GTU] = "gtu"
1589 static const char * const ldst_name[] =
1591 [MO_UB] = "ub",
1592 [MO_SB] = "sb",
1593 [MO_LEUW] = "leuw",
1594 [MO_LESW] = "lesw",
1595 [MO_LEUL] = "leul",
1596 [MO_LESL] = "lesl",
1597 [MO_LEQ] = "leq",
1598 [MO_BEUW] = "beuw",
1599 [MO_BESW] = "besw",
1600 [MO_BEUL] = "beul",
1601 [MO_BESL] = "besl",
1602 [MO_BEQ] = "beq",
1605 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1606 #ifdef ALIGNED_ONLY
1607 [MO_UNALN >> MO_ASHIFT] = "un+",
1608 [MO_ALIGN >> MO_ASHIFT] = "",
1609 #else
1610 [MO_UNALN >> MO_ASHIFT] = "",
1611 [MO_ALIGN >> MO_ASHIFT] = "al+",
1612 #endif
1613 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
1614 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
1615 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
1616 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1617 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1618 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1621 void tcg_dump_ops(TCGContext *s)
1623 char buf[128];
1624 TCGOp *op;
1625 int oi;
1627 for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) {
1628 int i, k, nb_oargs, nb_iargs, nb_cargs;
1629 const TCGOpDef *def;
1630 TCGOpcode c;
1631 int col = 0;
1633 op = &s->gen_op_buf[oi];
1634 c = op->opc;
1635 def = &tcg_op_defs[c];
1637 if (c == INDEX_op_insn_start) {
1638 col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : "");
1640 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1641 target_ulong a;
1642 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1643 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1644 #else
1645 a = op->args[i];
1646 #endif
1647 col += qemu_log(" " TARGET_FMT_lx, a);
1649 } else if (c == INDEX_op_call) {
1650 /* variable number of arguments */
1651 nb_oargs = op->callo;
1652 nb_iargs = op->calli;
1653 nb_cargs = def->nb_cargs;
1655 /* function name, flags, out args */
1656 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1657 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1658 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1659 for (i = 0; i < nb_oargs; i++) {
1660 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1661 op->args[i]));
1663 for (i = 0; i < nb_iargs; i++) {
1664 TCGArg arg = op->args[nb_oargs + i];
1665 const char *t = "<dummy>";
1666 if (arg != TCG_CALL_DUMMY_ARG) {
1667 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1669 col += qemu_log(",%s", t);
1671 } else {
1672 col += qemu_log(" %s ", def->name);
1674 nb_oargs = def->nb_oargs;
1675 nb_iargs = def->nb_iargs;
1676 nb_cargs = def->nb_cargs;
1678 k = 0;
1679 for (i = 0; i < nb_oargs; i++) {
1680 if (k != 0) {
1681 col += qemu_log(",");
1683 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1684 op->args[k++]));
1686 for (i = 0; i < nb_iargs; i++) {
1687 if (k != 0) {
1688 col += qemu_log(",");
1690 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1691 op->args[k++]));
1693 switch (c) {
1694 case INDEX_op_brcond_i32:
1695 case INDEX_op_setcond_i32:
1696 case INDEX_op_movcond_i32:
1697 case INDEX_op_brcond2_i32:
1698 case INDEX_op_setcond2_i32:
1699 case INDEX_op_brcond_i64:
1700 case INDEX_op_setcond_i64:
1701 case INDEX_op_movcond_i64:
1702 if (op->args[k] < ARRAY_SIZE(cond_name)
1703 && cond_name[op->args[k]]) {
1704 col += qemu_log(",%s", cond_name[op->args[k++]]);
1705 } else {
1706 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1708 i = 1;
1709 break;
1710 case INDEX_op_qemu_ld_i32:
1711 case INDEX_op_qemu_st_i32:
1712 case INDEX_op_qemu_ld_i64:
1713 case INDEX_op_qemu_st_i64:
1715 TCGMemOpIdx oi = op->args[k++];
1716 TCGMemOp op = get_memop(oi);
1717 unsigned ix = get_mmuidx(oi);
1719 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1720 col += qemu_log(",$0x%x,%u", op, ix);
1721 } else {
1722 const char *s_al, *s_op;
1723 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1724 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1725 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1727 i = 1;
1729 break;
1730 default:
1731 i = 0;
1732 break;
1734 switch (c) {
1735 case INDEX_op_set_label:
1736 case INDEX_op_br:
1737 case INDEX_op_brcond_i32:
1738 case INDEX_op_brcond_i64:
1739 case INDEX_op_brcond2_i32:
1740 col += qemu_log("%s$L%d", k ? "," : "",
1741 arg_label(op->args[k])->id);
1742 i++, k++;
1743 break;
1744 default:
1745 break;
1747 for (; i < nb_cargs; i++, k++) {
1748 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1751 if (op->life) {
1752 unsigned life = op->life;
1754 for (; col < 48; ++col) {
1755 putc(' ', qemu_logfile);
1758 if (life & (SYNC_ARG * 3)) {
1759 qemu_log(" sync:");
1760 for (i = 0; i < 2; ++i) {
1761 if (life & (SYNC_ARG << i)) {
1762 qemu_log(" %d", i);
1766 life /= DEAD_ARG;
1767 if (life) {
1768 qemu_log(" dead:");
1769 for (i = 0; life; ++i, life >>= 1) {
1770 if (life & 1) {
1771 qemu_log(" %d", i);
1776 qemu_log("\n");
1780 /* we give more priority to constraints with less registers */
1781 static int get_constraint_priority(const TCGOpDef *def, int k)
1783 const TCGArgConstraint *arg_ct;
1785 int i, n;
1786 arg_ct = &def->args_ct[k];
1787 if (arg_ct->ct & TCG_CT_ALIAS) {
1788 /* an alias is equivalent to a single register */
1789 n = 1;
1790 } else {
1791 if (!(arg_ct->ct & TCG_CT_REG))
1792 return 0;
1793 n = 0;
1794 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1795 if (tcg_regset_test_reg(arg_ct->u.regs, i))
1796 n++;
1799 return TCG_TARGET_NB_REGS - n + 1;
1802 /* sort from highest priority to lowest */
1803 static void sort_constraints(TCGOpDef *def, int start, int n)
1805 int i, j, p1, p2, tmp;
1807 for(i = 0; i < n; i++)
1808 def->sorted_args[start + i] = start + i;
1809 if (n <= 1)
1810 return;
1811 for(i = 0; i < n - 1; i++) {
1812 for(j = i + 1; j < n; j++) {
1813 p1 = get_constraint_priority(def, def->sorted_args[start + i]);
1814 p2 = get_constraint_priority(def, def->sorted_args[start + j]);
1815 if (p1 < p2) {
1816 tmp = def->sorted_args[start + i];
1817 def->sorted_args[start + i] = def->sorted_args[start + j];
1818 def->sorted_args[start + j] = tmp;
1824 static void process_op_defs(TCGContext *s)
1826 TCGOpcode op;
1828 for (op = 0; op < NB_OPS; op++) {
1829 TCGOpDef *def = &tcg_op_defs[op];
1830 const TCGTargetOpDef *tdefs;
1831 TCGType type;
1832 int i, nb_args;
1834 if (def->flags & TCG_OPF_NOT_PRESENT) {
1835 continue;
1838 nb_args = def->nb_iargs + def->nb_oargs;
1839 if (nb_args == 0) {
1840 continue;
1843 tdefs = tcg_target_op_def(op);
1844 /* Missing TCGTargetOpDef entry. */
1845 tcg_debug_assert(tdefs != NULL);
1847 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
1848 for (i = 0; i < nb_args; i++) {
1849 const char *ct_str = tdefs->args_ct_str[i];
1850 /* Incomplete TCGTargetOpDef entry. */
1851 tcg_debug_assert(ct_str != NULL);
1853 def->args_ct[i].u.regs = 0;
1854 def->args_ct[i].ct = 0;
1855 while (*ct_str != '\0') {
1856 switch(*ct_str) {
1857 case '0' ... '9':
1859 int oarg = *ct_str - '0';
1860 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
1861 tcg_debug_assert(oarg < def->nb_oargs);
1862 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
1863 /* TCG_CT_ALIAS is for the output arguments.
1864 The input is tagged with TCG_CT_IALIAS. */
1865 def->args_ct[i] = def->args_ct[oarg];
1866 def->args_ct[oarg].ct |= TCG_CT_ALIAS;
1867 def->args_ct[oarg].alias_index = i;
1868 def->args_ct[i].ct |= TCG_CT_IALIAS;
1869 def->args_ct[i].alias_index = oarg;
1871 ct_str++;
1872 break;
1873 case '&':
1874 def->args_ct[i].ct |= TCG_CT_NEWREG;
1875 ct_str++;
1876 break;
1877 case 'i':
1878 def->args_ct[i].ct |= TCG_CT_CONST;
1879 ct_str++;
1880 break;
1881 default:
1882 ct_str = target_parse_constraint(&def->args_ct[i],
1883 ct_str, type);
1884 /* Typo in TCGTargetOpDef constraint. */
1885 tcg_debug_assert(ct_str != NULL);
1890 /* TCGTargetOpDef entry with too much information? */
1891 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
1893 /* sort the constraints (XXX: this is just an heuristic) */
1894 sort_constraints(def, 0, def->nb_oargs);
1895 sort_constraints(def, def->nb_oargs, def->nb_iargs);
1899 void tcg_op_remove(TCGContext *s, TCGOp *op)
1901 int next = op->next;
1902 int prev = op->prev;
1904 /* We should never attempt to remove the list terminator. */
1905 tcg_debug_assert(op != &s->gen_op_buf[0]);
1907 s->gen_op_buf[next].prev = prev;
1908 s->gen_op_buf[prev].next = next;
1910 memset(op, 0, sizeof(*op));
1912 #ifdef CONFIG_PROFILER
1913 atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
1914 #endif
1917 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
1918 TCGOpcode opc, int nargs)
1920 int oi = s->gen_next_op_idx;
1921 int prev = old_op->prev;
1922 int next = old_op - s->gen_op_buf;
1923 TCGOp *new_op;
1925 tcg_debug_assert(oi < OPC_BUF_SIZE);
1926 s->gen_next_op_idx = oi + 1;
1928 new_op = &s->gen_op_buf[oi];
1929 *new_op = (TCGOp){
1930 .opc = opc,
1931 .prev = prev,
1932 .next = next
1934 s->gen_op_buf[prev].next = oi;
1935 old_op->prev = oi;
1937 return new_op;
1940 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
1941 TCGOpcode opc, int nargs)
1943 int oi = s->gen_next_op_idx;
1944 int prev = old_op - s->gen_op_buf;
1945 int next = old_op->next;
1946 TCGOp *new_op;
1948 tcg_debug_assert(oi < OPC_BUF_SIZE);
1949 s->gen_next_op_idx = oi + 1;
1951 new_op = &s->gen_op_buf[oi];
1952 *new_op = (TCGOp){
1953 .opc = opc,
1954 .prev = prev,
1955 .next = next
1957 s->gen_op_buf[next].prev = oi;
1958 old_op->next = oi;
1960 return new_op;
1963 #define TS_DEAD 1
1964 #define TS_MEM 2
1966 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
1967 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
1969 /* liveness analysis: end of function: all temps are dead, and globals
1970 should be in memory. */
1971 static void tcg_la_func_end(TCGContext *s)
1973 int ng = s->nb_globals;
1974 int nt = s->nb_temps;
1975 int i;
1977 for (i = 0; i < ng; ++i) {
1978 s->temps[i].state = TS_DEAD | TS_MEM;
1980 for (i = ng; i < nt; ++i) {
1981 s->temps[i].state = TS_DEAD;
1985 /* liveness analysis: end of basic block: all temps are dead, globals
1986 and local temps should be in memory. */
1987 static void tcg_la_bb_end(TCGContext *s)
1989 int ng = s->nb_globals;
1990 int nt = s->nb_temps;
1991 int i;
1993 for (i = 0; i < ng; ++i) {
1994 s->temps[i].state = TS_DEAD | TS_MEM;
1996 for (i = ng; i < nt; ++i) {
1997 s->temps[i].state = (s->temps[i].temp_local
1998 ? TS_DEAD | TS_MEM
1999 : TS_DEAD);
2003 /* Liveness analysis : update the opc_arg_life array to tell if a
2004 given input arguments is dead. Instructions updating dead
2005 temporaries are removed. */
2006 static void liveness_pass_1(TCGContext *s)
2008 int nb_globals = s->nb_globals;
2009 int oi, oi_prev;
2011 tcg_la_func_end(s);
2013 for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
2014 int i, nb_iargs, nb_oargs;
2015 TCGOpcode opc_new, opc_new2;
2016 bool have_opc_new2;
2017 TCGLifeData arg_life = 0;
2018 TCGTemp *arg_ts;
2020 TCGOp * const op = &s->gen_op_buf[oi];
2021 TCGOpcode opc = op->opc;
2022 const TCGOpDef *def = &tcg_op_defs[opc];
2024 oi_prev = op->prev;
2026 switch (opc) {
2027 case INDEX_op_call:
2029 int call_flags;
2031 nb_oargs = op->callo;
2032 nb_iargs = op->calli;
2033 call_flags = op->args[nb_oargs + nb_iargs + 1];
2035 /* pure functions can be removed if their result is unused */
2036 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2037 for (i = 0; i < nb_oargs; i++) {
2038 arg_ts = arg_temp(op->args[i]);
2039 if (arg_ts->state != TS_DEAD) {
2040 goto do_not_remove_call;
2043 goto do_remove;
2044 } else {
2045 do_not_remove_call:
2047 /* output args are dead */
2048 for (i = 0; i < nb_oargs; i++) {
2049 arg_ts = arg_temp(op->args[i]);
2050 if (arg_ts->state & TS_DEAD) {
2051 arg_life |= DEAD_ARG << i;
2053 if (arg_ts->state & TS_MEM) {
2054 arg_life |= SYNC_ARG << i;
2056 arg_ts->state = TS_DEAD;
2059 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2060 TCG_CALL_NO_READ_GLOBALS))) {
2061 /* globals should go back to memory */
2062 for (i = 0; i < nb_globals; i++) {
2063 s->temps[i].state = TS_DEAD | TS_MEM;
2065 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2066 /* globals should be synced to memory */
2067 for (i = 0; i < nb_globals; i++) {
2068 s->temps[i].state |= TS_MEM;
2072 /* record arguments that die in this helper */
2073 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2074 arg_ts = arg_temp(op->args[i]);
2075 if (arg_ts && arg_ts->state & TS_DEAD) {
2076 arg_life |= DEAD_ARG << i;
2079 /* input arguments are live for preceding opcodes */
2080 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2081 arg_ts = arg_temp(op->args[i]);
2082 if (arg_ts) {
2083 arg_ts->state &= ~TS_DEAD;
2088 break;
2089 case INDEX_op_insn_start:
2090 break;
2091 case INDEX_op_discard:
2092 /* mark the temporary as dead */
2093 arg_temp(op->args[0])->state = TS_DEAD;
2094 break;
2096 case INDEX_op_add2_i32:
2097 opc_new = INDEX_op_add_i32;
2098 goto do_addsub2;
2099 case INDEX_op_sub2_i32:
2100 opc_new = INDEX_op_sub_i32;
2101 goto do_addsub2;
2102 case INDEX_op_add2_i64:
2103 opc_new = INDEX_op_add_i64;
2104 goto do_addsub2;
2105 case INDEX_op_sub2_i64:
2106 opc_new = INDEX_op_sub_i64;
2107 do_addsub2:
2108 nb_iargs = 4;
2109 nb_oargs = 2;
2110 /* Test if the high part of the operation is dead, but not
2111 the low part. The result can be optimized to a simple
2112 add or sub. This happens often for x86_64 guest when the
2113 cpu mode is set to 32 bit. */
2114 if (arg_temp(op->args[1])->state == TS_DEAD) {
2115 if (arg_temp(op->args[0])->state == TS_DEAD) {
2116 goto do_remove;
2118 /* Replace the opcode and adjust the args in place,
2119 leaving 3 unused args at the end. */
2120 op->opc = opc = opc_new;
2121 op->args[1] = op->args[2];
2122 op->args[2] = op->args[4];
2123 /* Fall through and mark the single-word operation live. */
2124 nb_iargs = 2;
2125 nb_oargs = 1;
2127 goto do_not_remove;
2129 case INDEX_op_mulu2_i32:
2130 opc_new = INDEX_op_mul_i32;
2131 opc_new2 = INDEX_op_muluh_i32;
2132 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2133 goto do_mul2;
2134 case INDEX_op_muls2_i32:
2135 opc_new = INDEX_op_mul_i32;
2136 opc_new2 = INDEX_op_mulsh_i32;
2137 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2138 goto do_mul2;
2139 case INDEX_op_mulu2_i64:
2140 opc_new = INDEX_op_mul_i64;
2141 opc_new2 = INDEX_op_muluh_i64;
2142 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2143 goto do_mul2;
2144 case INDEX_op_muls2_i64:
2145 opc_new = INDEX_op_mul_i64;
2146 opc_new2 = INDEX_op_mulsh_i64;
2147 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2148 goto do_mul2;
2149 do_mul2:
2150 nb_iargs = 2;
2151 nb_oargs = 2;
2152 if (arg_temp(op->args[1])->state == TS_DEAD) {
2153 if (arg_temp(op->args[0])->state == TS_DEAD) {
2154 /* Both parts of the operation are dead. */
2155 goto do_remove;
2157 /* The high part of the operation is dead; generate the low. */
2158 op->opc = opc = opc_new;
2159 op->args[1] = op->args[2];
2160 op->args[2] = op->args[3];
2161 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2162 /* The low part of the operation is dead; generate the high. */
2163 op->opc = opc = opc_new2;
2164 op->args[0] = op->args[1];
2165 op->args[1] = op->args[2];
2166 op->args[2] = op->args[3];
2167 } else {
2168 goto do_not_remove;
2170 /* Mark the single-word operation live. */
2171 nb_oargs = 1;
2172 goto do_not_remove;
2174 default:
2175 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2176 nb_iargs = def->nb_iargs;
2177 nb_oargs = def->nb_oargs;
2179 /* Test if the operation can be removed because all
2180 its outputs are dead. We assume that nb_oargs == 0
2181 implies side effects */
2182 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2183 for (i = 0; i < nb_oargs; i++) {
2184 if (arg_temp(op->args[i])->state != TS_DEAD) {
2185 goto do_not_remove;
2188 do_remove:
2189 tcg_op_remove(s, op);
2190 } else {
2191 do_not_remove:
2192 /* output args are dead */
2193 for (i = 0; i < nb_oargs; i++) {
2194 arg_ts = arg_temp(op->args[i]);
2195 if (arg_ts->state & TS_DEAD) {
2196 arg_life |= DEAD_ARG << i;
2198 if (arg_ts->state & TS_MEM) {
2199 arg_life |= SYNC_ARG << i;
2201 arg_ts->state = TS_DEAD;
2204 /* if end of basic block, update */
2205 if (def->flags & TCG_OPF_BB_END) {
2206 tcg_la_bb_end(s);
2207 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2208 /* globals should be synced to memory */
2209 for (i = 0; i < nb_globals; i++) {
2210 s->temps[i].state |= TS_MEM;
2214 /* record arguments that die in this opcode */
2215 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2216 arg_ts = arg_temp(op->args[i]);
2217 if (arg_ts->state & TS_DEAD) {
2218 arg_life |= DEAD_ARG << i;
2221 /* input arguments are live for preceding opcodes */
2222 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2223 arg_temp(op->args[i])->state &= ~TS_DEAD;
2226 break;
2228 op->life = arg_life;
2232 /* Liveness analysis: Convert indirect regs to direct temporaries. */
2233 static bool liveness_pass_2(TCGContext *s)
2235 int nb_globals = s->nb_globals;
2236 int nb_temps, i, oi, oi_next;
2237 bool changes = false;
2239 /* Create a temporary for each indirect global. */
2240 for (i = 0; i < nb_globals; ++i) {
2241 TCGTemp *its = &s->temps[i];
2242 if (its->indirect_reg) {
2243 TCGTemp *dts = tcg_temp_alloc(s);
2244 dts->type = its->type;
2245 dts->base_type = its->base_type;
2246 its->state_ptr = dts;
2247 } else {
2248 its->state_ptr = NULL;
2250 /* All globals begin dead. */
2251 its->state = TS_DEAD;
2253 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2254 TCGTemp *its = &s->temps[i];
2255 its->state_ptr = NULL;
2256 its->state = TS_DEAD;
2259 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
2260 TCGOp *op = &s->gen_op_buf[oi];
2261 TCGOpcode opc = op->opc;
2262 const TCGOpDef *def = &tcg_op_defs[opc];
2263 TCGLifeData arg_life = op->life;
2264 int nb_iargs, nb_oargs, call_flags;
2265 TCGTemp *arg_ts, *dir_ts;
2267 oi_next = op->next;
2269 if (opc == INDEX_op_call) {
2270 nb_oargs = op->callo;
2271 nb_iargs = op->calli;
2272 call_flags = op->args[nb_oargs + nb_iargs + 1];
2273 } else {
2274 nb_iargs = def->nb_iargs;
2275 nb_oargs = def->nb_oargs;
2277 /* Set flags similar to how calls require. */
2278 if (def->flags & TCG_OPF_BB_END) {
2279 /* Like writing globals: save_globals */
2280 call_flags = 0;
2281 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2282 /* Like reading globals: sync_globals */
2283 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2284 } else {
2285 /* No effect on globals. */
2286 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2287 TCG_CALL_NO_WRITE_GLOBALS);
2291 /* Make sure that input arguments are available. */
2292 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2293 arg_ts = arg_temp(op->args[i]);
2294 if (arg_ts) {
2295 dir_ts = arg_ts->state_ptr;
2296 if (dir_ts && arg_ts->state == TS_DEAD) {
2297 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2298 ? INDEX_op_ld_i32
2299 : INDEX_op_ld_i64);
2300 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
2302 lop->args[0] = temp_arg(dir_ts);
2303 lop->args[1] = temp_arg(arg_ts->mem_base);
2304 lop->args[2] = arg_ts->mem_offset;
2306 /* Loaded, but synced with memory. */
2307 arg_ts->state = TS_MEM;
2312 /* Perform input replacement, and mark inputs that became dead.
2313 No action is required except keeping temp_state up to date
2314 so that we reload when needed. */
2315 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2316 arg_ts = arg_temp(op->args[i]);
2317 if (arg_ts) {
2318 dir_ts = arg_ts->state_ptr;
2319 if (dir_ts) {
2320 op->args[i] = temp_arg(dir_ts);
2321 changes = true;
2322 if (IS_DEAD_ARG(i)) {
2323 arg_ts->state = TS_DEAD;
2329 /* Liveness analysis should ensure that the following are
2330 all correct, for call sites and basic block end points. */
2331 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2332 /* Nothing to do */
2333 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2334 for (i = 0; i < nb_globals; ++i) {
2335 /* Liveness should see that globals are synced back,
2336 that is, either TS_DEAD or TS_MEM. */
2337 arg_ts = &s->temps[i];
2338 tcg_debug_assert(arg_ts->state_ptr == 0
2339 || arg_ts->state != 0);
2341 } else {
2342 for (i = 0; i < nb_globals; ++i) {
2343 /* Liveness should see that globals are saved back,
2344 that is, TS_DEAD, waiting to be reloaded. */
2345 arg_ts = &s->temps[i];
2346 tcg_debug_assert(arg_ts->state_ptr == 0
2347 || arg_ts->state == TS_DEAD);
2351 /* Outputs become available. */
2352 for (i = 0; i < nb_oargs; i++) {
2353 arg_ts = arg_temp(op->args[i]);
2354 dir_ts = arg_ts->state_ptr;
2355 if (!dir_ts) {
2356 continue;
2358 op->args[i] = temp_arg(dir_ts);
2359 changes = true;
2361 /* The output is now live and modified. */
2362 arg_ts->state = 0;
2364 /* Sync outputs upon their last write. */
2365 if (NEED_SYNC_ARG(i)) {
2366 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2367 ? INDEX_op_st_i32
2368 : INDEX_op_st_i64);
2369 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
2371 sop->args[0] = temp_arg(dir_ts);
2372 sop->args[1] = temp_arg(arg_ts->mem_base);
2373 sop->args[2] = arg_ts->mem_offset;
2375 arg_ts->state = TS_MEM;
2377 /* Drop outputs that are dead. */
2378 if (IS_DEAD_ARG(i)) {
2379 arg_ts->state = TS_DEAD;
2384 return changes;
2387 #ifdef CONFIG_DEBUG_TCG
2388 static void dump_regs(TCGContext *s)
2390 TCGTemp *ts;
2391 int i;
2392 char buf[64];
2394 for(i = 0; i < s->nb_temps; i++) {
2395 ts = &s->temps[i];
2396 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2397 switch(ts->val_type) {
2398 case TEMP_VAL_REG:
2399 printf("%s", tcg_target_reg_names[ts->reg]);
2400 break;
2401 case TEMP_VAL_MEM:
2402 printf("%d(%s)", (int)ts->mem_offset,
2403 tcg_target_reg_names[ts->mem_base->reg]);
2404 break;
2405 case TEMP_VAL_CONST:
2406 printf("$0x%" TCG_PRIlx, ts->val);
2407 break;
2408 case TEMP_VAL_DEAD:
2409 printf("D");
2410 break;
2411 default:
2412 printf("???");
2413 break;
2415 printf("\n");
2418 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2419 if (s->reg_to_temp[i] != NULL) {
2420 printf("%s: %s\n",
2421 tcg_target_reg_names[i],
2422 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2427 static void check_regs(TCGContext *s)
2429 int reg;
2430 int k;
2431 TCGTemp *ts;
2432 char buf[64];
2434 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2435 ts = s->reg_to_temp[reg];
2436 if (ts != NULL) {
2437 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2438 printf("Inconsistency for register %s:\n",
2439 tcg_target_reg_names[reg]);
2440 goto fail;
2444 for (k = 0; k < s->nb_temps; k++) {
2445 ts = &s->temps[k];
2446 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
2447 && s->reg_to_temp[ts->reg] != ts) {
2448 printf("Inconsistency for temp %s:\n",
2449 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2450 fail:
2451 printf("reg state:\n");
2452 dump_regs(s);
2453 tcg_abort();
2457 #endif
2459 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
2461 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
2462 /* Sparc64 stack is accessed with offset of 2047 */
2463 s->current_frame_offset = (s->current_frame_offset +
2464 (tcg_target_long)sizeof(tcg_target_long) - 1) &
2465 ~(sizeof(tcg_target_long) - 1);
2466 #endif
2467 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
2468 s->frame_end) {
2469 tcg_abort();
2471 ts->mem_offset = s->current_frame_offset;
2472 ts->mem_base = s->frame_temp;
2473 ts->mem_allocated = 1;
2474 s->current_frame_offset += sizeof(tcg_target_long);
2477 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet);
2479 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
2480 mark it free; otherwise mark it dead. */
2481 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
2483 if (ts->fixed_reg) {
2484 return;
2486 if (ts->val_type == TEMP_VAL_REG) {
2487 s->reg_to_temp[ts->reg] = NULL;
2489 ts->val_type = (free_or_dead < 0
2490 || ts->temp_local
2491 || ts->temp_global
2492 ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
2495 /* Mark a temporary as dead. */
2496 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
2498 temp_free_or_dead(s, ts, 1);
2501 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
2502 registers needs to be allocated to store a constant. If 'free_or_dead'
2503 is non-zero, subsequently release the temporary; if it is positive, the
2504 temp is dead; if it is negative, the temp is free. */
2505 static void temp_sync(TCGContext *s, TCGTemp *ts,
2506 TCGRegSet allocated_regs, int free_or_dead)
2508 if (ts->fixed_reg) {
2509 return;
2511 if (!ts->mem_coherent) {
2512 if (!ts->mem_allocated) {
2513 temp_allocate_frame(s, ts);
2515 switch (ts->val_type) {
2516 case TEMP_VAL_CONST:
2517 /* If we're going to free the temp immediately, then we won't
2518 require it later in a register, so attempt to store the
2519 constant to memory directly. */
2520 if (free_or_dead
2521 && tcg_out_sti(s, ts->type, ts->val,
2522 ts->mem_base->reg, ts->mem_offset)) {
2523 break;
2525 temp_load(s, ts, tcg_target_available_regs[ts->type],
2526 allocated_regs);
2527 /* fallthrough */
2529 case TEMP_VAL_REG:
2530 tcg_out_st(s, ts->type, ts->reg,
2531 ts->mem_base->reg, ts->mem_offset);
2532 break;
2534 case TEMP_VAL_MEM:
2535 break;
2537 case TEMP_VAL_DEAD:
2538 default:
2539 tcg_abort();
2541 ts->mem_coherent = 1;
2543 if (free_or_dead) {
2544 temp_free_or_dead(s, ts, free_or_dead);
2548 /* free register 'reg' by spilling the corresponding temporary if necessary */
2549 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
2551 TCGTemp *ts = s->reg_to_temp[reg];
2552 if (ts != NULL) {
2553 temp_sync(s, ts, allocated_regs, -1);
2557 /* Allocate a register belonging to reg1 & ~reg2 */
2558 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
2559 TCGRegSet allocated_regs, bool rev)
2561 int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
2562 const int *order;
2563 TCGReg reg;
2564 TCGRegSet reg_ct;
2566 reg_ct = desired_regs & ~allocated_regs;
2567 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
2569 /* first try free registers */
2570 for(i = 0; i < n; i++) {
2571 reg = order[i];
2572 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
2573 return reg;
2576 /* XXX: do better spill choice */
2577 for(i = 0; i < n; i++) {
2578 reg = order[i];
2579 if (tcg_regset_test_reg(reg_ct, reg)) {
2580 tcg_reg_free(s, reg, allocated_regs);
2581 return reg;
2585 tcg_abort();
2588 /* Make sure the temporary is in a register. If needed, allocate the register
2589 from DESIRED while avoiding ALLOCATED. */
2590 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
2591 TCGRegSet allocated_regs)
2593 TCGReg reg;
2595 switch (ts->val_type) {
2596 case TEMP_VAL_REG:
2597 return;
2598 case TEMP_VAL_CONST:
2599 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2600 tcg_out_movi(s, ts->type, reg, ts->val);
2601 ts->mem_coherent = 0;
2602 break;
2603 case TEMP_VAL_MEM:
2604 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2605 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2606 ts->mem_coherent = 1;
2607 break;
2608 case TEMP_VAL_DEAD:
2609 default:
2610 tcg_abort();
2612 ts->reg = reg;
2613 ts->val_type = TEMP_VAL_REG;
2614 s->reg_to_temp[reg] = ts;
2617 /* Save a temporary to memory. 'allocated_regs' is used in case a
2618 temporary registers needs to be allocated to store a constant. */
2619 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
2621 /* The liveness analysis already ensures that globals are back
2622 in memory. Keep an tcg_debug_assert for safety. */
2623 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
2626 /* save globals to their canonical location and assume they can be
2627 modified be the following code. 'allocated_regs' is used in case a
2628 temporary registers needs to be allocated to store a constant. */
2629 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
2631 int i, n;
2633 for (i = 0, n = s->nb_globals; i < n; i++) {
2634 temp_save(s, &s->temps[i], allocated_regs);
2638 /* sync globals to their canonical location and assume they can be
2639 read by the following code. 'allocated_regs' is used in case a
2640 temporary registers needs to be allocated to store a constant. */
2641 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2643 int i, n;
2645 for (i = 0, n = s->nb_globals; i < n; i++) {
2646 TCGTemp *ts = &s->temps[i];
2647 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
2648 || ts->fixed_reg
2649 || ts->mem_coherent);
2653 /* at the end of a basic block, we assume all temporaries are dead and
2654 all globals are stored at their canonical location. */
2655 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2657 int i;
2659 for (i = s->nb_globals; i < s->nb_temps; i++) {
2660 TCGTemp *ts = &s->temps[i];
2661 if (ts->temp_local) {
2662 temp_save(s, ts, allocated_regs);
2663 } else {
2664 /* The liveness analysis already ensures that temps are dead.
2665 Keep an tcg_debug_assert for safety. */
2666 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
2670 save_globals(s, allocated_regs);
2673 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
2674 tcg_target_ulong val, TCGLifeData arg_life)
2676 if (ots->fixed_reg) {
2677 /* For fixed registers, we do not do any constant propagation. */
2678 tcg_out_movi(s, ots->type, ots->reg, val);
2679 return;
2682 /* The movi is not explicitly generated here. */
2683 if (ots->val_type == TEMP_VAL_REG) {
2684 s->reg_to_temp[ots->reg] = NULL;
2686 ots->val_type = TEMP_VAL_CONST;
2687 ots->val = val;
2688 ots->mem_coherent = 0;
2689 if (NEED_SYNC_ARG(0)) {
2690 temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0));
2691 } else if (IS_DEAD_ARG(0)) {
2692 temp_dead(s, ots);
2696 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
2698 TCGTemp *ots = arg_temp(op->args[0]);
2699 tcg_target_ulong val = op->args[1];
2701 tcg_reg_alloc_do_movi(s, ots, val, op->life);
2704 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
2706 const TCGLifeData arg_life = op->life;
2707 TCGRegSet allocated_regs;
2708 TCGTemp *ts, *ots;
2709 TCGType otype, itype;
2711 allocated_regs = s->reserved_regs;
2712 ots = arg_temp(op->args[0]);
2713 ts = arg_temp(op->args[1]);
2715 /* Note that otype != itype for no-op truncation. */
2716 otype = ots->type;
2717 itype = ts->type;
2719 if (ts->val_type == TEMP_VAL_CONST) {
2720 /* propagate constant or generate sti */
2721 tcg_target_ulong val = ts->val;
2722 if (IS_DEAD_ARG(1)) {
2723 temp_dead(s, ts);
2725 tcg_reg_alloc_do_movi(s, ots, val, arg_life);
2726 return;
2729 /* If the source value is in memory we're going to be forced
2730 to have it in a register in order to perform the copy. Copy
2731 the SOURCE value into its own register first, that way we
2732 don't have to reload SOURCE the next time it is used. */
2733 if (ts->val_type == TEMP_VAL_MEM) {
2734 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
2737 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
2738 if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2739 /* mov to a non-saved dead register makes no sense (even with
2740 liveness analysis disabled). */
2741 tcg_debug_assert(NEED_SYNC_ARG(0));
2742 if (!ots->mem_allocated) {
2743 temp_allocate_frame(s, ots);
2745 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
2746 if (IS_DEAD_ARG(1)) {
2747 temp_dead(s, ts);
2749 temp_dead(s, ots);
2750 } else {
2751 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
2752 /* the mov can be suppressed */
2753 if (ots->val_type == TEMP_VAL_REG) {
2754 s->reg_to_temp[ots->reg] = NULL;
2756 ots->reg = ts->reg;
2757 temp_dead(s, ts);
2758 } else {
2759 if (ots->val_type != TEMP_VAL_REG) {
2760 /* When allocating a new register, make sure to not spill the
2761 input one. */
2762 tcg_regset_set_reg(allocated_regs, ts->reg);
2763 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
2764 allocated_regs, ots->indirect_base);
2766 tcg_out_mov(s, otype, ots->reg, ts->reg);
2768 ots->val_type = TEMP_VAL_REG;
2769 ots->mem_coherent = 0;
2770 s->reg_to_temp[ots->reg] = ots;
2771 if (NEED_SYNC_ARG(0)) {
2772 temp_sync(s, ots, allocated_regs, 0);
2777 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
2779 const TCGLifeData arg_life = op->life;
2780 const TCGOpDef * const def = &tcg_op_defs[op->opc];
2781 TCGRegSet i_allocated_regs;
2782 TCGRegSet o_allocated_regs;
2783 int i, k, nb_iargs, nb_oargs;
2784 TCGReg reg;
2785 TCGArg arg;
2786 const TCGArgConstraint *arg_ct;
2787 TCGTemp *ts;
2788 TCGArg new_args[TCG_MAX_OP_ARGS];
2789 int const_args[TCG_MAX_OP_ARGS];
2791 nb_oargs = def->nb_oargs;
2792 nb_iargs = def->nb_iargs;
2794 /* copy constants */
2795 memcpy(new_args + nb_oargs + nb_iargs,
2796 op->args + nb_oargs + nb_iargs,
2797 sizeof(TCGArg) * def->nb_cargs);
2799 i_allocated_regs = s->reserved_regs;
2800 o_allocated_regs = s->reserved_regs;
2802 /* satisfy input constraints */
2803 for (k = 0; k < nb_iargs; k++) {
2804 i = def->sorted_args[nb_oargs + k];
2805 arg = op->args[i];
2806 arg_ct = &def->args_ct[i];
2807 ts = arg_temp(arg);
2809 if (ts->val_type == TEMP_VAL_CONST
2810 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
2811 /* constant is OK for instruction */
2812 const_args[i] = 1;
2813 new_args[i] = ts->val;
2814 goto iarg_end;
2817 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
2819 if (arg_ct->ct & TCG_CT_IALIAS) {
2820 if (ts->fixed_reg) {
2821 /* if fixed register, we must allocate a new register
2822 if the alias is not the same register */
2823 if (arg != op->args[arg_ct->alias_index])
2824 goto allocate_in_reg;
2825 } else {
2826 /* if the input is aliased to an output and if it is
2827 not dead after the instruction, we must allocate
2828 a new register and move it */
2829 if (!IS_DEAD_ARG(i)) {
2830 goto allocate_in_reg;
2832 /* check if the current register has already been allocated
2833 for another input aliased to an output */
2834 int k2, i2;
2835 for (k2 = 0 ; k2 < k ; k2++) {
2836 i2 = def->sorted_args[nb_oargs + k2];
2837 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
2838 (new_args[i2] == ts->reg)) {
2839 goto allocate_in_reg;
2844 reg = ts->reg;
2845 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2846 /* nothing to do : the constraint is satisfied */
2847 } else {
2848 allocate_in_reg:
2849 /* allocate a new register matching the constraint
2850 and move the temporary register into it */
2851 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
2852 ts->indirect_base);
2853 tcg_out_mov(s, ts->type, reg, ts->reg);
2855 new_args[i] = reg;
2856 const_args[i] = 0;
2857 tcg_regset_set_reg(i_allocated_regs, reg);
2858 iarg_end: ;
2861 /* mark dead temporaries and free the associated registers */
2862 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2863 if (IS_DEAD_ARG(i)) {
2864 temp_dead(s, arg_temp(op->args[i]));
2868 if (def->flags & TCG_OPF_BB_END) {
2869 tcg_reg_alloc_bb_end(s, i_allocated_regs);
2870 } else {
2871 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2872 /* XXX: permit generic clobber register list ? */
2873 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2874 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2875 tcg_reg_free(s, i, i_allocated_regs);
2879 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2880 /* sync globals if the op has side effects and might trigger
2881 an exception. */
2882 sync_globals(s, i_allocated_regs);
2885 /* satisfy the output constraints */
2886 for(k = 0; k < nb_oargs; k++) {
2887 i = def->sorted_args[k];
2888 arg = op->args[i];
2889 arg_ct = &def->args_ct[i];
2890 ts = arg_temp(arg);
2891 if ((arg_ct->ct & TCG_CT_ALIAS)
2892 && !const_args[arg_ct->alias_index]) {
2893 reg = new_args[arg_ct->alias_index];
2894 } else if (arg_ct->ct & TCG_CT_NEWREG) {
2895 reg = tcg_reg_alloc(s, arg_ct->u.regs,
2896 i_allocated_regs | o_allocated_regs,
2897 ts->indirect_base);
2898 } else {
2899 /* if fixed register, we try to use it */
2900 reg = ts->reg;
2901 if (ts->fixed_reg &&
2902 tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2903 goto oarg_end;
2905 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
2906 ts->indirect_base);
2908 tcg_regset_set_reg(o_allocated_regs, reg);
2909 /* if a fixed register is used, then a move will be done afterwards */
2910 if (!ts->fixed_reg) {
2911 if (ts->val_type == TEMP_VAL_REG) {
2912 s->reg_to_temp[ts->reg] = NULL;
2914 ts->val_type = TEMP_VAL_REG;
2915 ts->reg = reg;
2916 /* temp value is modified, so the value kept in memory is
2917 potentially not the same */
2918 ts->mem_coherent = 0;
2919 s->reg_to_temp[reg] = ts;
2921 oarg_end:
2922 new_args[i] = reg;
2926 /* emit instruction */
2927 tcg_out_op(s, op->opc, new_args, const_args);
2929 /* move the outputs in the correct register if needed */
2930 for(i = 0; i < nb_oargs; i++) {
2931 ts = arg_temp(op->args[i]);
2932 reg = new_args[i];
2933 if (ts->fixed_reg && ts->reg != reg) {
2934 tcg_out_mov(s, ts->type, ts->reg, reg);
2936 if (NEED_SYNC_ARG(i)) {
2937 temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
2938 } else if (IS_DEAD_ARG(i)) {
2939 temp_dead(s, ts);
2944 #ifdef TCG_TARGET_STACK_GROWSUP
2945 #define STACK_DIR(x) (-(x))
2946 #else
2947 #define STACK_DIR(x) (x)
2948 #endif
2950 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
2952 const int nb_oargs = op->callo;
2953 const int nb_iargs = op->calli;
2954 const TCGLifeData arg_life = op->life;
2955 int flags, nb_regs, i;
2956 TCGReg reg;
2957 TCGArg arg;
2958 TCGTemp *ts;
2959 intptr_t stack_offset;
2960 size_t call_stack_size;
2961 tcg_insn_unit *func_addr;
2962 int allocate_args;
2963 TCGRegSet allocated_regs;
2965 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
2966 flags = op->args[nb_oargs + nb_iargs + 1];
2968 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2969 if (nb_regs > nb_iargs) {
2970 nb_regs = nb_iargs;
2973 /* assign stack slots first */
2974 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
2975 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
2976 ~(TCG_TARGET_STACK_ALIGN - 1);
2977 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
2978 if (allocate_args) {
2979 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2980 preallocate call stack */
2981 tcg_abort();
2984 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
2985 for (i = nb_regs; i < nb_iargs; i++) {
2986 arg = op->args[nb_oargs + i];
2987 #ifdef TCG_TARGET_STACK_GROWSUP
2988 stack_offset -= sizeof(tcg_target_long);
2989 #endif
2990 if (arg != TCG_CALL_DUMMY_ARG) {
2991 ts = arg_temp(arg);
2992 temp_load(s, ts, tcg_target_available_regs[ts->type],
2993 s->reserved_regs);
2994 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
2996 #ifndef TCG_TARGET_STACK_GROWSUP
2997 stack_offset += sizeof(tcg_target_long);
2998 #endif
3001 /* assign input registers */
3002 allocated_regs = s->reserved_regs;
3003 for (i = 0; i < nb_regs; i++) {
3004 arg = op->args[nb_oargs + i];
3005 if (arg != TCG_CALL_DUMMY_ARG) {
3006 ts = arg_temp(arg);
3007 reg = tcg_target_call_iarg_regs[i];
3008 tcg_reg_free(s, reg, allocated_regs);
3010 if (ts->val_type == TEMP_VAL_REG) {
3011 if (ts->reg != reg) {
3012 tcg_out_mov(s, ts->type, reg, ts->reg);
3014 } else {
3015 TCGRegSet arg_set = 0;
3017 tcg_regset_set_reg(arg_set, reg);
3018 temp_load(s, ts, arg_set, allocated_regs);
3021 tcg_regset_set_reg(allocated_regs, reg);
3025 /* mark dead temporaries and free the associated registers */
3026 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3027 if (IS_DEAD_ARG(i)) {
3028 temp_dead(s, arg_temp(op->args[i]));
3032 /* clobber call registers */
3033 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3034 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3035 tcg_reg_free(s, i, allocated_regs);
3039 /* Save globals if they might be written by the helper, sync them if
3040 they might be read. */
3041 if (flags & TCG_CALL_NO_READ_GLOBALS) {
3042 /* Nothing to do */
3043 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3044 sync_globals(s, allocated_regs);
3045 } else {
3046 save_globals(s, allocated_regs);
3049 tcg_out_call(s, func_addr);
3051 /* assign output registers and emit moves if needed */
3052 for(i = 0; i < nb_oargs; i++) {
3053 arg = op->args[i];
3054 ts = arg_temp(arg);
3055 reg = tcg_target_call_oarg_regs[i];
3056 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3058 if (ts->fixed_reg) {
3059 if (ts->reg != reg) {
3060 tcg_out_mov(s, ts->type, ts->reg, reg);
3062 } else {
3063 if (ts->val_type == TEMP_VAL_REG) {
3064 s->reg_to_temp[ts->reg] = NULL;
3066 ts->val_type = TEMP_VAL_REG;
3067 ts->reg = reg;
3068 ts->mem_coherent = 0;
3069 s->reg_to_temp[reg] = ts;
3070 if (NEED_SYNC_ARG(i)) {
3071 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
3072 } else if (IS_DEAD_ARG(i)) {
3073 temp_dead(s, ts);
3079 #ifdef CONFIG_PROFILER
3081 /* avoid copy/paste errors */
3082 #define PROF_ADD(to, from, field) \
3083 do { \
3084 (to)->field += atomic_read(&((from)->field)); \
3085 } while (0)
3087 #define PROF_MAX(to, from, field) \
3088 do { \
3089 typeof((from)->field) val__ = atomic_read(&((from)->field)); \
3090 if (val__ > (to)->field) { \
3091 (to)->field = val__; \
3093 } while (0)
3095 /* Pass in a zero'ed @prof */
3096 static inline
3097 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3099 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3100 unsigned int i;
3102 for (i = 0; i < n_ctxs; i++) {
3103 TCGContext *s = atomic_read(&tcg_ctxs[i]);
3104 const TCGProfile *orig = &s->prof;
3106 if (counters) {
3107 PROF_ADD(prof, orig, tb_count1);
3108 PROF_ADD(prof, orig, tb_count);
3109 PROF_ADD(prof, orig, op_count);
3110 PROF_MAX(prof, orig, op_count_max);
3111 PROF_ADD(prof, orig, temp_count);
3112 PROF_MAX(prof, orig, temp_count_max);
3113 PROF_ADD(prof, orig, del_op_count);
3114 PROF_ADD(prof, orig, code_in_len);
3115 PROF_ADD(prof, orig, code_out_len);
3116 PROF_ADD(prof, orig, search_out_len);
3117 PROF_ADD(prof, orig, interm_time);
3118 PROF_ADD(prof, orig, code_time);
3119 PROF_ADD(prof, orig, la_time);
3120 PROF_ADD(prof, orig, opt_time);
3121 PROF_ADD(prof, orig, restore_count);
3122 PROF_ADD(prof, orig, restore_time);
3124 if (table) {
3125 int i;
3127 for (i = 0; i < NB_OPS; i++) {
3128 PROF_ADD(prof, orig, table_op_count[i]);
3134 #undef PROF_ADD
3135 #undef PROF_MAX
3137 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3139 tcg_profile_snapshot(prof, true, false);
3142 static void tcg_profile_snapshot_table(TCGProfile *prof)
3144 tcg_profile_snapshot(prof, false, true);
3147 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3149 TCGProfile prof = {};
3150 int i;
3152 tcg_profile_snapshot_table(&prof);
3153 for (i = 0; i < NB_OPS; i++) {
3154 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
3155 prof.table_op_count[i]);
3158 #else
3159 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3161 cpu_fprintf(f, "[TCG profiler not compiled]\n");
3163 #endif
3166 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3168 #ifdef CONFIG_PROFILER
3169 TCGProfile *prof = &s->prof;
3170 #endif
3171 int i, oi, oi_next, num_insns;
3173 #ifdef CONFIG_PROFILER
3175 int n;
3177 n = s->gen_op_buf[0].prev + 1;
3178 atomic_set(&prof->op_count, prof->op_count + n);
3179 if (n > prof->op_count_max) {
3180 atomic_set(&prof->op_count_max, n);
3183 n = s->nb_temps;
3184 atomic_set(&prof->temp_count, prof->temp_count + n);
3185 if (n > prof->temp_count_max) {
3186 atomic_set(&prof->temp_count_max, n);
3189 #endif
3191 #ifdef DEBUG_DISAS
3192 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3193 && qemu_log_in_addr_range(tb->pc))) {
3194 qemu_log_lock();
3195 qemu_log("OP:\n");
3196 tcg_dump_ops(s);
3197 qemu_log("\n");
3198 qemu_log_unlock();
3200 #endif
3202 #ifdef CONFIG_PROFILER
3203 atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3204 #endif
3206 #ifdef USE_TCG_OPTIMIZATIONS
3207 tcg_optimize(s);
3208 #endif
3210 #ifdef CONFIG_PROFILER
3211 atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3212 atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3213 #endif
3215 liveness_pass_1(s);
3217 if (s->nb_indirects > 0) {
3218 #ifdef DEBUG_DISAS
3219 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3220 && qemu_log_in_addr_range(tb->pc))) {
3221 qemu_log_lock();
3222 qemu_log("OP before indirect lowering:\n");
3223 tcg_dump_ops(s);
3224 qemu_log("\n");
3225 qemu_log_unlock();
3227 #endif
3228 /* Replace indirect temps with direct temps. */
3229 if (liveness_pass_2(s)) {
3230 /* If changes were made, re-run liveness. */
3231 liveness_pass_1(s);
3235 #ifdef CONFIG_PROFILER
3236 atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3237 #endif
3239 #ifdef DEBUG_DISAS
3240 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3241 && qemu_log_in_addr_range(tb->pc))) {
3242 qemu_log_lock();
3243 qemu_log("OP after optimization and liveness analysis:\n");
3244 tcg_dump_ops(s);
3245 qemu_log("\n");
3246 qemu_log_unlock();
3248 #endif
3250 tcg_reg_alloc_start(s);
3252 s->code_buf = tb->tc.ptr;
3253 s->code_ptr = tb->tc.ptr;
3255 #ifdef TCG_TARGET_NEED_LDST_LABELS
3256 s->ldst_labels = NULL;
3257 #endif
3258 #ifdef TCG_TARGET_NEED_POOL_LABELS
3259 s->pool_labels = NULL;
3260 #endif
3262 num_insns = -1;
3263 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
3264 TCGOp * const op = &s->gen_op_buf[oi];
3265 TCGOpcode opc = op->opc;
3267 oi_next = op->next;
3268 #ifdef CONFIG_PROFILER
3269 atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3270 #endif
3272 switch (opc) {
3273 case INDEX_op_mov_i32:
3274 case INDEX_op_mov_i64:
3275 tcg_reg_alloc_mov(s, op);
3276 break;
3277 case INDEX_op_movi_i32:
3278 case INDEX_op_movi_i64:
3279 tcg_reg_alloc_movi(s, op);
3280 break;
3281 case INDEX_op_insn_start:
3282 if (num_insns >= 0) {
3283 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3285 num_insns++;
3286 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3287 target_ulong a;
3288 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3289 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3290 #else
3291 a = op->args[i];
3292 #endif
3293 s->gen_insn_data[num_insns][i] = a;
3295 break;
3296 case INDEX_op_discard:
3297 temp_dead(s, arg_temp(op->args[0]));
3298 break;
3299 case INDEX_op_set_label:
3300 tcg_reg_alloc_bb_end(s, s->reserved_regs);
3301 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3302 break;
3303 case INDEX_op_call:
3304 tcg_reg_alloc_call(s, op);
3305 break;
3306 default:
3307 /* Sanity check that we've not introduced any unhandled opcodes. */
3308 tcg_debug_assert(tcg_op_supported(opc));
3309 /* Note: in order to speed up the code, it would be much
3310 faster to have specialized register allocator functions for
3311 some common argument patterns */
3312 tcg_reg_alloc_op(s, op);
3313 break;
3315 #ifdef CONFIG_DEBUG_TCG
3316 check_regs(s);
3317 #endif
3318 /* Test for (pending) buffer overflow. The assumption is that any
3319 one operation beginning below the high water mark cannot overrun
3320 the buffer completely. Thus we can test for overflow after
3321 generating code without having to check during generation. */
3322 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3323 return -1;
3326 tcg_debug_assert(num_insns >= 0);
3327 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3329 /* Generate TB finalization at the end of block */
3330 #ifdef TCG_TARGET_NEED_LDST_LABELS
3331 if (!tcg_out_ldst_finalize(s)) {
3332 return -1;
3334 #endif
3335 #ifdef TCG_TARGET_NEED_POOL_LABELS
3336 if (!tcg_out_pool_finalize(s)) {
3337 return -1;
3339 #endif
3341 /* flush instruction cache */
3342 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
3344 return tcg_current_code_size(s);
3347 #ifdef CONFIG_PROFILER
3348 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3350 TCGProfile prof = {};
3351 const TCGProfile *s;
3352 int64_t tb_count;
3353 int64_t tb_div_count;
3354 int64_t tot;
3356 tcg_profile_snapshot_counters(&prof);
3357 s = &prof;
3358 tb_count = s->tb_count;
3359 tb_div_count = tb_count ? tb_count : 1;
3360 tot = s->interm_time + s->code_time;
3362 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
3363 tot, tot / 2.4e9);
3364 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
3365 tb_count, s->tb_count1 - tb_count,
3366 (double)(s->tb_count1 - s->tb_count)
3367 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
3368 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
3369 (double)s->op_count / tb_div_count, s->op_count_max);
3370 cpu_fprintf(f, "deleted ops/TB %0.2f\n",
3371 (double)s->del_op_count / tb_div_count);
3372 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
3373 (double)s->temp_count / tb_div_count, s->temp_count_max);
3374 cpu_fprintf(f, "avg host code/TB %0.1f\n",
3375 (double)s->code_out_len / tb_div_count);
3376 cpu_fprintf(f, "avg search data/TB %0.1f\n",
3377 (double)s->search_out_len / tb_div_count);
3379 cpu_fprintf(f, "cycles/op %0.1f\n",
3380 s->op_count ? (double)tot / s->op_count : 0);
3381 cpu_fprintf(f, "cycles/in byte %0.1f\n",
3382 s->code_in_len ? (double)tot / s->code_in_len : 0);
3383 cpu_fprintf(f, "cycles/out byte %0.1f\n",
3384 s->code_out_len ? (double)tot / s->code_out_len : 0);
3385 cpu_fprintf(f, "cycles/search byte %0.1f\n",
3386 s->search_out_len ? (double)tot / s->search_out_len : 0);
3387 if (tot == 0) {
3388 tot = 1;
3390 cpu_fprintf(f, " gen_interm time %0.1f%%\n",
3391 (double)s->interm_time / tot * 100.0);
3392 cpu_fprintf(f, " gen_code time %0.1f%%\n",
3393 (double)s->code_time / tot * 100.0);
3394 cpu_fprintf(f, "optim./code time %0.1f%%\n",
3395 (double)s->opt_time / (s->code_time ? s->code_time : 1)
3396 * 100.0);
3397 cpu_fprintf(f, "liveness/code time %0.1f%%\n",
3398 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
3399 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
3400 s->restore_count);
3401 cpu_fprintf(f, " avg cycles %0.1f\n",
3402 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
3404 #else
3405 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3407 cpu_fprintf(f, "[TCG profiler not compiled]\n");
3409 #endif
3411 #ifdef ELF_HOST_MACHINE
3412 /* In order to use this feature, the backend needs to do three things:
3414 (1) Define ELF_HOST_MACHINE to indicate both what value to
3415 put into the ELF image and to indicate support for the feature.
3417 (2) Define tcg_register_jit. This should create a buffer containing
3418 the contents of a .debug_frame section that describes the post-
3419 prologue unwind info for the tcg machine.
3421 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
3424 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
3425 typedef enum {
3426 JIT_NOACTION = 0,
3427 JIT_REGISTER_FN,
3428 JIT_UNREGISTER_FN
3429 } jit_actions_t;
3431 struct jit_code_entry {
3432 struct jit_code_entry *next_entry;
3433 struct jit_code_entry *prev_entry;
3434 const void *symfile_addr;
3435 uint64_t symfile_size;
3438 struct jit_descriptor {
3439 uint32_t version;
3440 uint32_t action_flag;
3441 struct jit_code_entry *relevant_entry;
3442 struct jit_code_entry *first_entry;
3445 void __jit_debug_register_code(void) __attribute__((noinline));
3446 void __jit_debug_register_code(void)
3448 asm("");
3451 /* Must statically initialize the version, because GDB may check
3452 the version before we can set it. */
3453 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
3455 /* End GDB interface. */
3457 static int find_string(const char *strtab, const char *str)
3459 const char *p = strtab + 1;
3461 while (1) {
3462 if (strcmp(p, str) == 0) {
3463 return p - strtab;
3465 p += strlen(p) + 1;
3469 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
3470 const void *debug_frame,
3471 size_t debug_frame_size)
3473 struct __attribute__((packed)) DebugInfo {
3474 uint32_t len;
3475 uint16_t version;
3476 uint32_t abbrev;
3477 uint8_t ptr_size;
3478 uint8_t cu_die;
3479 uint16_t cu_lang;
3480 uintptr_t cu_low_pc;
3481 uintptr_t cu_high_pc;
3482 uint8_t fn_die;
3483 char fn_name[16];
3484 uintptr_t fn_low_pc;
3485 uintptr_t fn_high_pc;
3486 uint8_t cu_eoc;
3489 struct ElfImage {
3490 ElfW(Ehdr) ehdr;
3491 ElfW(Phdr) phdr;
3492 ElfW(Shdr) shdr[7];
3493 ElfW(Sym) sym[2];
3494 struct DebugInfo di;
3495 uint8_t da[24];
3496 char str[80];
3499 struct ElfImage *img;
3501 static const struct ElfImage img_template = {
3502 .ehdr = {
3503 .e_ident[EI_MAG0] = ELFMAG0,
3504 .e_ident[EI_MAG1] = ELFMAG1,
3505 .e_ident[EI_MAG2] = ELFMAG2,
3506 .e_ident[EI_MAG3] = ELFMAG3,
3507 .e_ident[EI_CLASS] = ELF_CLASS,
3508 .e_ident[EI_DATA] = ELF_DATA,
3509 .e_ident[EI_VERSION] = EV_CURRENT,
3510 .e_type = ET_EXEC,
3511 .e_machine = ELF_HOST_MACHINE,
3512 .e_version = EV_CURRENT,
3513 .e_phoff = offsetof(struct ElfImage, phdr),
3514 .e_shoff = offsetof(struct ElfImage, shdr),
3515 .e_ehsize = sizeof(ElfW(Shdr)),
3516 .e_phentsize = sizeof(ElfW(Phdr)),
3517 .e_phnum = 1,
3518 .e_shentsize = sizeof(ElfW(Shdr)),
3519 .e_shnum = ARRAY_SIZE(img->shdr),
3520 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
3521 #ifdef ELF_HOST_FLAGS
3522 .e_flags = ELF_HOST_FLAGS,
3523 #endif
3524 #ifdef ELF_OSABI
3525 .e_ident[EI_OSABI] = ELF_OSABI,
3526 #endif
3528 .phdr = {
3529 .p_type = PT_LOAD,
3530 .p_flags = PF_X,
3532 .shdr = {
3533 [0] = { .sh_type = SHT_NULL },
3534 /* Trick: The contents of code_gen_buffer are not present in
3535 this fake ELF file; that got allocated elsewhere. Therefore
3536 we mark .text as SHT_NOBITS (similar to .bss) so that readers
3537 will not look for contents. We can record any address. */
3538 [1] = { /* .text */
3539 .sh_type = SHT_NOBITS,
3540 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
3542 [2] = { /* .debug_info */
3543 .sh_type = SHT_PROGBITS,
3544 .sh_offset = offsetof(struct ElfImage, di),
3545 .sh_size = sizeof(struct DebugInfo),
3547 [3] = { /* .debug_abbrev */
3548 .sh_type = SHT_PROGBITS,
3549 .sh_offset = offsetof(struct ElfImage, da),
3550 .sh_size = sizeof(img->da),
3552 [4] = { /* .debug_frame */
3553 .sh_type = SHT_PROGBITS,
3554 .sh_offset = sizeof(struct ElfImage),
3556 [5] = { /* .symtab */
3557 .sh_type = SHT_SYMTAB,
3558 .sh_offset = offsetof(struct ElfImage, sym),
3559 .sh_size = sizeof(img->sym),
3560 .sh_info = 1,
3561 .sh_link = ARRAY_SIZE(img->shdr) - 1,
3562 .sh_entsize = sizeof(ElfW(Sym)),
3564 [6] = { /* .strtab */
3565 .sh_type = SHT_STRTAB,
3566 .sh_offset = offsetof(struct ElfImage, str),
3567 .sh_size = sizeof(img->str),
3570 .sym = {
3571 [1] = { /* code_gen_buffer */
3572 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
3573 .st_shndx = 1,
3576 .di = {
3577 .len = sizeof(struct DebugInfo) - 4,
3578 .version = 2,
3579 .ptr_size = sizeof(void *),
3580 .cu_die = 1,
3581 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
3582 .fn_die = 2,
3583 .fn_name = "code_gen_buffer"
3585 .da = {
3586 1, /* abbrev number (the cu) */
3587 0x11, 1, /* DW_TAG_compile_unit, has children */
3588 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
3589 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
3590 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
3591 0, 0, /* end of abbrev */
3592 2, /* abbrev number (the fn) */
3593 0x2e, 0, /* DW_TAG_subprogram, no children */
3594 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
3595 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
3596 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
3597 0, 0, /* end of abbrev */
3598 0 /* no more abbrev */
3600 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
3601 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
3604 /* We only need a single jit entry; statically allocate it. */
3605 static struct jit_code_entry one_entry;
3607 uintptr_t buf = (uintptr_t)buf_ptr;
3608 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
3609 DebugFrameHeader *dfh;
3611 img = g_malloc(img_size);
3612 *img = img_template;
3614 img->phdr.p_vaddr = buf;
3615 img->phdr.p_paddr = buf;
3616 img->phdr.p_memsz = buf_size;
3618 img->shdr[1].sh_name = find_string(img->str, ".text");
3619 img->shdr[1].sh_addr = buf;
3620 img->shdr[1].sh_size = buf_size;
3622 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
3623 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
3625 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
3626 img->shdr[4].sh_size = debug_frame_size;
3628 img->shdr[5].sh_name = find_string(img->str, ".symtab");
3629 img->shdr[6].sh_name = find_string(img->str, ".strtab");
3631 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
3632 img->sym[1].st_value = buf;
3633 img->sym[1].st_size = buf_size;
3635 img->di.cu_low_pc = buf;
3636 img->di.cu_high_pc = buf + buf_size;
3637 img->di.fn_low_pc = buf;
3638 img->di.fn_high_pc = buf + buf_size;
3640 dfh = (DebugFrameHeader *)(img + 1);
3641 memcpy(dfh, debug_frame, debug_frame_size);
3642 dfh->fde.func_start = buf;
3643 dfh->fde.func_len = buf_size;
3645 #ifdef DEBUG_JIT
3646 /* Enable this block to be able to debug the ELF image file creation.
3647 One can use readelf, objdump, or other inspection utilities. */
3649 FILE *f = fopen("/tmp/qemu.jit", "w+b");
3650 if (f) {
3651 if (fwrite(img, img_size, 1, f) != img_size) {
3652 /* Avoid stupid unused return value warning for fwrite. */
3654 fclose(f);
3657 #endif
3659 one_entry.symfile_addr = img;
3660 one_entry.symfile_size = img_size;
3662 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
3663 __jit_debug_descriptor.relevant_entry = &one_entry;
3664 __jit_debug_descriptor.first_entry = &one_entry;
3665 __jit_debug_register_code();
3667 #else
3668 /* No support for the feature. Provide the entry point expected by exec.c,
3669 and implement the internal function we declared earlier. */
3671 static void tcg_register_jit_int(void *buf, size_t size,
3672 const void *debug_frame,
3673 size_t debug_frame_size)
3677 void tcg_register_jit(void *buf, size_t buf_size)
3680 #endif /* ELF_HOST_MACHINE */