mips: malta/boston: replace cpu_model with cpu_type
[qemu/kevin.git] / tcg / tcg.c
blob683ff4abb7efbd4a245e4d61e092d2294701ea2a
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
33 #include "qemu/cutils.h"
34 #include "qemu/host-utils.h"
35 #include "qemu/timer.h"
37 /* Note: the long term plan is to reduce the dependencies on the QEMU
38 CPU definitions. Currently they are used for qemu_ld/st
39 instructions */
40 #define NO_CPU_IO_DEFS
41 #include "cpu.h"
43 #include "exec/cpu-common.h"
44 #include "exec/exec-all.h"
46 #include "tcg-op.h"
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS ELFCLASS32
50 #else
51 # define ELF_CLASS ELFCLASS64
52 #endif
53 #ifdef HOST_WORDS_BIGENDIAN
54 # define ELF_DATA ELFDATA2MSB
55 #else
56 # define ELF_DATA ELFDATA2LSB
57 #endif
59 #include "elf.h"
60 #include "exec/log.h"
61 #include "sysemu/sysemu.h"
63 /* Forward declarations for functions declared in tcg-target.inc.c and
64 used here. */
65 static void tcg_target_init(TCGContext *s);
66 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
69 intptr_t value, intptr_t addend);
71 /* The CIE and FDE header definitions will be common to all hosts. */
72 typedef struct {
73 uint32_t len __attribute__((aligned((sizeof(void *)))));
74 uint32_t id;
75 uint8_t version;
76 char augmentation[1];
77 uint8_t code_align;
78 uint8_t data_align;
79 uint8_t return_column;
80 } DebugFrameCIE;
82 typedef struct QEMU_PACKED {
83 uint32_t len __attribute__((aligned((sizeof(void *)))));
84 uint32_t cie_offset;
85 uintptr_t func_start;
86 uintptr_t func_len;
87 } DebugFrameFDEHeader;
89 typedef struct QEMU_PACKED {
90 DebugFrameCIE cie;
91 DebugFrameFDEHeader fde;
92 } DebugFrameHeader;
94 static void tcg_register_jit_int(void *buf, size_t size,
95 const void *debug_frame,
96 size_t debug_frame_size)
97 __attribute__((unused));
99 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
100 static const char *target_parse_constraint(TCGArgConstraint *ct,
101 const char *ct_str, TCGType type);
102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103 intptr_t arg2);
104 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
105 static void tcg_out_movi(TCGContext *s, TCGType type,
106 TCGReg ret, tcg_target_long arg);
107 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
108 const int *const_args);
109 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
110 intptr_t arg2);
111 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
112 TCGReg base, intptr_t ofs);
113 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
114 static int tcg_target_const_match(tcg_target_long val, TCGType type,
115 const TCGArgConstraint *arg_ct);
116 #ifdef TCG_TARGET_NEED_LDST_LABELS
117 static bool tcg_out_ldst_finalize(TCGContext *s);
118 #endif
120 #define TCG_HIGHWATER 1024
122 static TCGContext **tcg_ctxs;
123 static unsigned int n_tcg_ctxs;
124 TCGv_env cpu_env = 0;
127 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
128 * dynamically allocate from as demand dictates. Given appropriate region
129 * sizing, this minimizes flushes even when some TCG threads generate a lot
130 * more code than others.
132 struct tcg_region_state {
133 QemuMutex lock;
135 /* fields set at init time */
136 void *start;
137 void *start_aligned;
138 void *end;
139 size_t n;
140 size_t size; /* size of one region */
141 size_t stride; /* .size + guard size */
143 /* fields protected by the lock */
144 size_t current; /* current region index */
145 size_t agg_size_full; /* aggregate size of full regions */
148 static struct tcg_region_state region;
150 static TCGRegSet tcg_target_available_regs[2];
151 static TCGRegSet tcg_target_call_clobber_regs;
153 #if TCG_TARGET_INSN_UNIT_SIZE == 1
154 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
156 *s->code_ptr++ = v;
159 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
160 uint8_t v)
162 *p = v;
164 #endif
166 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
167 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
169 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
170 *s->code_ptr++ = v;
171 } else {
172 tcg_insn_unit *p = s->code_ptr;
173 memcpy(p, &v, sizeof(v));
174 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
178 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
179 uint16_t v)
181 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
182 *p = v;
183 } else {
184 memcpy(p, &v, sizeof(v));
187 #endif
189 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
190 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
192 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
193 *s->code_ptr++ = v;
194 } else {
195 tcg_insn_unit *p = s->code_ptr;
196 memcpy(p, &v, sizeof(v));
197 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
201 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
202 uint32_t v)
204 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
205 *p = v;
206 } else {
207 memcpy(p, &v, sizeof(v));
210 #endif
212 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
213 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
215 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
216 *s->code_ptr++ = v;
217 } else {
218 tcg_insn_unit *p = s->code_ptr;
219 memcpy(p, &v, sizeof(v));
220 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
224 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
225 uint64_t v)
227 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
228 *p = v;
229 } else {
230 memcpy(p, &v, sizeof(v));
233 #endif
235 /* label relocation processing */
237 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
238 TCGLabel *l, intptr_t addend)
240 TCGRelocation *r;
242 if (l->has_value) {
243 /* FIXME: This may break relocations on RISC targets that
244 modify instruction fields in place. The caller may not have
245 written the initial value. */
246 patch_reloc(code_ptr, type, l->u.value, addend);
247 } else {
248 /* add a new relocation entry */
249 r = tcg_malloc(sizeof(TCGRelocation));
250 r->type = type;
251 r->ptr = code_ptr;
252 r->addend = addend;
253 r->next = l->u.first_reloc;
254 l->u.first_reloc = r;
258 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
260 intptr_t value = (intptr_t)ptr;
261 TCGRelocation *r;
263 tcg_debug_assert(!l->has_value);
265 for (r = l->u.first_reloc; r != NULL; r = r->next) {
266 patch_reloc(r->ptr, r->type, value, r->addend);
269 l->has_value = 1;
270 l->u.value_ptr = ptr;
273 TCGLabel *gen_new_label(void)
275 TCGContext *s = tcg_ctx;
276 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
278 *l = (TCGLabel){
279 .id = s->nb_labels++
282 return l;
285 #include "tcg-target.inc.c"
287 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
289 void *start, *end;
291 start = region.start_aligned + curr_region * region.stride;
292 end = start + region.size;
294 if (curr_region == 0) {
295 start = region.start;
297 if (curr_region == region.n - 1) {
298 end = region.end;
301 *pstart = start;
302 *pend = end;
305 static void tcg_region_assign(TCGContext *s, size_t curr_region)
307 void *start, *end;
309 tcg_region_bounds(curr_region, &start, &end);
311 s->code_gen_buffer = start;
312 s->code_gen_ptr = start;
313 s->code_gen_buffer_size = end - start;
314 s->code_gen_highwater = end - TCG_HIGHWATER;
317 static bool tcg_region_alloc__locked(TCGContext *s)
319 if (region.current == region.n) {
320 return true;
322 tcg_region_assign(s, region.current);
323 region.current++;
324 return false;
328 * Request a new region once the one in use has filled up.
329 * Returns true on error.
331 static bool tcg_region_alloc(TCGContext *s)
333 bool err;
334 /* read the region size now; alloc__locked will overwrite it on success */
335 size_t size_full = s->code_gen_buffer_size;
337 qemu_mutex_lock(&region.lock);
338 err = tcg_region_alloc__locked(s);
339 if (!err) {
340 region.agg_size_full += size_full - TCG_HIGHWATER;
342 qemu_mutex_unlock(&region.lock);
343 return err;
347 * Perform a context's first region allocation.
348 * This function does _not_ increment region.agg_size_full.
350 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
352 return tcg_region_alloc__locked(s);
355 /* Call from a safe-work context */
356 void tcg_region_reset_all(void)
358 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
359 unsigned int i;
361 qemu_mutex_lock(&region.lock);
362 region.current = 0;
363 region.agg_size_full = 0;
365 for (i = 0; i < n_ctxs; i++) {
366 TCGContext *s = atomic_read(&tcg_ctxs[i]);
367 bool err = tcg_region_initial_alloc__locked(s);
369 g_assert(!err);
371 qemu_mutex_unlock(&region.lock);
374 #ifdef CONFIG_USER_ONLY
375 static size_t tcg_n_regions(void)
377 return 1;
379 #else
381 * It is likely that some vCPUs will translate more code than others, so we
382 * first try to set more regions than max_cpus, with those regions being of
383 * reasonable size. If that's not possible we make do by evenly dividing
384 * the code_gen_buffer among the vCPUs.
386 static size_t tcg_n_regions(void)
388 size_t i;
390 /* Use a single region if all we have is one vCPU thread */
391 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
392 return 1;
395 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
396 for (i = 8; i > 0; i--) {
397 size_t regions_per_thread = i;
398 size_t region_size;
400 region_size = tcg_init_ctx.code_gen_buffer_size;
401 region_size /= max_cpus * regions_per_thread;
403 if (region_size >= 2 * 1024u * 1024) {
404 return max_cpus * regions_per_thread;
407 /* If we can't, then just allocate one region per vCPU thread */
408 return max_cpus;
410 #endif
413 * Initializes region partitioning.
415 * Called at init time from the parent thread (i.e. the one calling
416 * tcg_context_init), after the target's TCG globals have been set.
418 * Region partitioning works by splitting code_gen_buffer into separate regions,
419 * and then assigning regions to TCG threads so that the threads can translate
420 * code in parallel without synchronization.
422 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
423 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
424 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
425 * must have been parsed before calling this function, since it calls
426 * qemu_tcg_mttcg_enabled().
428 * In user-mode we use a single region. Having multiple regions in user-mode
429 * is not supported, because the number of vCPU threads (recall that each thread
430 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
431 * OS, and usually this number is huge (tens of thousands is not uncommon).
432 * Thus, given this large bound on the number of vCPU threads and the fact
433 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
434 * that the availability of at least one region per vCPU thread.
436 * However, this user-mode limitation is unlikely to be a significant problem
437 * in practice. Multi-threaded guests share most if not all of their translated
438 * code, which makes parallel code generation less appealing than in softmmu.
440 void tcg_region_init(void)
442 void *buf = tcg_init_ctx.code_gen_buffer;
443 void *aligned;
444 size_t size = tcg_init_ctx.code_gen_buffer_size;
445 size_t page_size = qemu_real_host_page_size;
446 size_t region_size;
447 size_t n_regions;
448 size_t i;
450 n_regions = tcg_n_regions();
452 /* The first region will be 'aligned - buf' bytes larger than the others */
453 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
454 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
456 * Make region_size a multiple of page_size, using aligned as the start.
457 * As a result of this we might end up with a few extra pages at the end of
458 * the buffer; we will assign those to the last region.
460 region_size = (size - (aligned - buf)) / n_regions;
461 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
463 /* A region must have at least 2 pages; one code, one guard */
464 g_assert(region_size >= 2 * page_size);
466 /* init the region struct */
467 qemu_mutex_init(&region.lock);
468 region.n = n_regions;
469 region.size = region_size - page_size;
470 region.stride = region_size;
471 region.start = buf;
472 region.start_aligned = aligned;
473 /* page-align the end, since its last page will be a guard page */
474 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
475 /* account for that last guard page */
476 region.end -= page_size;
478 /* set guard pages */
479 for (i = 0; i < region.n; i++) {
480 void *start, *end;
481 int rc;
483 tcg_region_bounds(i, &start, &end);
484 rc = qemu_mprotect_none(end, page_size);
485 g_assert(!rc);
488 /* In user-mode we support only one ctx, so do the initial allocation now */
489 #ifdef CONFIG_USER_ONLY
491 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
493 g_assert(!err);
495 #endif
499 * All TCG threads except the parent (i.e. the one that called tcg_context_init
500 * and registered the target's TCG globals) must register with this function
501 * before initiating translation.
503 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
504 * of tcg_region_init() for the reasoning behind this.
506 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
507 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
508 * is not used anymore for translation once this function is called.
510 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
511 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
513 #ifdef CONFIG_USER_ONLY
514 void tcg_register_thread(void)
516 tcg_ctx = &tcg_init_ctx;
518 #else
519 void tcg_register_thread(void)
521 TCGContext *s = g_malloc(sizeof(*s));
522 unsigned int i, n;
523 bool err;
525 *s = tcg_init_ctx;
527 /* Relink mem_base. */
528 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
529 if (tcg_init_ctx.temps[i].mem_base) {
530 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
531 tcg_debug_assert(b >= 0 && b < n);
532 s->temps[i].mem_base = &s->temps[b];
536 /* Claim an entry in tcg_ctxs */
537 n = atomic_fetch_inc(&n_tcg_ctxs);
538 g_assert(n < max_cpus);
539 atomic_set(&tcg_ctxs[n], s);
541 tcg_ctx = s;
542 qemu_mutex_lock(&region.lock);
543 err = tcg_region_initial_alloc__locked(tcg_ctx);
544 g_assert(!err);
545 qemu_mutex_unlock(&region.lock);
547 #endif /* !CONFIG_USER_ONLY */
550 * Returns the size (in bytes) of all translated code (i.e. from all regions)
551 * currently in the cache.
552 * See also: tcg_code_capacity()
553 * Do not confuse with tcg_current_code_size(); that one applies to a single
554 * TCG context.
556 size_t tcg_code_size(void)
558 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
559 unsigned int i;
560 size_t total;
562 qemu_mutex_lock(&region.lock);
563 total = region.agg_size_full;
564 for (i = 0; i < n_ctxs; i++) {
565 const TCGContext *s = atomic_read(&tcg_ctxs[i]);
566 size_t size;
568 size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
569 g_assert(size <= s->code_gen_buffer_size);
570 total += size;
572 qemu_mutex_unlock(&region.lock);
573 return total;
577 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
578 * regions.
579 * See also: tcg_code_size()
581 size_t tcg_code_capacity(void)
583 size_t guard_size, capacity;
585 /* no need for synchronization; these variables are set at init time */
586 guard_size = region.stride - region.size;
587 capacity = region.end + guard_size - region.start;
588 capacity -= region.n * (guard_size + TCG_HIGHWATER);
589 return capacity;
592 /* pool based memory allocation */
593 void *tcg_malloc_internal(TCGContext *s, int size)
595 TCGPool *p;
596 int pool_size;
598 if (size > TCG_POOL_CHUNK_SIZE) {
599 /* big malloc: insert a new pool (XXX: could optimize) */
600 p = g_malloc(sizeof(TCGPool) + size);
601 p->size = size;
602 p->next = s->pool_first_large;
603 s->pool_first_large = p;
604 return p->data;
605 } else {
606 p = s->pool_current;
607 if (!p) {
608 p = s->pool_first;
609 if (!p)
610 goto new_pool;
611 } else {
612 if (!p->next) {
613 new_pool:
614 pool_size = TCG_POOL_CHUNK_SIZE;
615 p = g_malloc(sizeof(TCGPool) + pool_size);
616 p->size = pool_size;
617 p->next = NULL;
618 if (s->pool_current)
619 s->pool_current->next = p;
620 else
621 s->pool_first = p;
622 } else {
623 p = p->next;
627 s->pool_current = p;
628 s->pool_cur = p->data + size;
629 s->pool_end = p->data + p->size;
630 return p->data;
633 void tcg_pool_reset(TCGContext *s)
635 TCGPool *p, *t;
636 for (p = s->pool_first_large; p; p = t) {
637 t = p->next;
638 g_free(p);
640 s->pool_first_large = NULL;
641 s->pool_cur = s->pool_end = NULL;
642 s->pool_current = NULL;
645 typedef struct TCGHelperInfo {
646 void *func;
647 const char *name;
648 unsigned flags;
649 unsigned sizemask;
650 } TCGHelperInfo;
652 #include "exec/helper-proto.h"
654 static const TCGHelperInfo all_helpers[] = {
655 #include "exec/helper-tcg.h"
657 static GHashTable *helper_table;
659 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
660 static void process_op_defs(TCGContext *s);
661 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
662 TCGReg reg, const char *name);
664 void tcg_context_init(TCGContext *s)
666 int op, total_args, n, i;
667 TCGOpDef *def;
668 TCGArgConstraint *args_ct;
669 int *sorted_args;
670 TCGTemp *ts;
672 memset(s, 0, sizeof(*s));
673 s->nb_globals = 0;
675 /* Count total number of arguments and allocate the corresponding
676 space */
677 total_args = 0;
678 for(op = 0; op < NB_OPS; op++) {
679 def = &tcg_op_defs[op];
680 n = def->nb_iargs + def->nb_oargs;
681 total_args += n;
684 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
685 sorted_args = g_malloc(sizeof(int) * total_args);
687 for(op = 0; op < NB_OPS; op++) {
688 def = &tcg_op_defs[op];
689 def->args_ct = args_ct;
690 def->sorted_args = sorted_args;
691 n = def->nb_iargs + def->nb_oargs;
692 sorted_args += n;
693 args_ct += n;
696 /* Register helpers. */
697 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
698 helper_table = g_hash_table_new(NULL, NULL);
700 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
701 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
702 (gpointer)&all_helpers[i]);
705 tcg_target_init(s);
706 process_op_defs(s);
708 /* Reverse the order of the saved registers, assuming they're all at
709 the start of tcg_target_reg_alloc_order. */
710 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
711 int r = tcg_target_reg_alloc_order[n];
712 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
713 break;
716 for (i = 0; i < n; ++i) {
717 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
719 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
720 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
723 tcg_ctx = s;
725 * In user-mode we simply share the init context among threads, since we
726 * use a single region. See the documentation tcg_region_init() for the
727 * reasoning behind this.
728 * In softmmu we will have at most max_cpus TCG threads.
730 #ifdef CONFIG_USER_ONLY
731 tcg_ctxs = &tcg_ctx;
732 n_tcg_ctxs = 1;
733 #else
734 tcg_ctxs = g_new(TCGContext *, max_cpus);
735 #endif
737 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
738 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
739 cpu_env = temp_tcgv_ptr(ts);
743 * Allocate TBs right before their corresponding translated code, making
744 * sure that TBs and code are on different cache lines.
746 TranslationBlock *tcg_tb_alloc(TCGContext *s)
748 uintptr_t align = qemu_icache_linesize;
749 TranslationBlock *tb;
750 void *next;
752 retry:
753 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
754 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
756 if (unlikely(next > s->code_gen_highwater)) {
757 if (tcg_region_alloc(s)) {
758 return NULL;
760 goto retry;
762 atomic_set(&s->code_gen_ptr, next);
763 s->data_gen_ptr = NULL;
764 return tb;
767 void tcg_prologue_init(TCGContext *s)
769 size_t prologue_size, total_size;
770 void *buf0, *buf1;
772 /* Put the prologue at the beginning of code_gen_buffer. */
773 buf0 = s->code_gen_buffer;
774 s->code_ptr = buf0;
775 s->code_buf = buf0;
776 s->code_gen_prologue = buf0;
778 /* Generate the prologue. */
779 tcg_target_qemu_prologue(s);
780 buf1 = s->code_ptr;
781 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
783 /* Deduct the prologue from the buffer. */
784 prologue_size = tcg_current_code_size(s);
785 s->code_gen_ptr = buf1;
786 s->code_gen_buffer = buf1;
787 s->code_buf = buf1;
788 total_size = s->code_gen_buffer_size - prologue_size;
789 s->code_gen_buffer_size = total_size;
791 /* Compute a high-water mark, at which we voluntarily flush the buffer
792 and start over. The size here is arbitrary, significantly larger
793 than we expect the code generation for any one opcode to require. */
794 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
796 tcg_register_jit(s->code_gen_buffer, total_size);
798 #ifdef DEBUG_DISAS
799 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
800 qemu_log_lock();
801 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
802 log_disas(buf0, prologue_size);
803 qemu_log("\n");
804 qemu_log_flush();
805 qemu_log_unlock();
807 #endif
809 /* Assert that goto_ptr is implemented completely. */
810 if (TCG_TARGET_HAS_goto_ptr) {
811 tcg_debug_assert(s->code_gen_epilogue != NULL);
815 void tcg_func_start(TCGContext *s)
817 tcg_pool_reset(s);
818 s->nb_temps = s->nb_globals;
820 /* No temps have been previously allocated for size or locality. */
821 memset(s->free_temps, 0, sizeof(s->free_temps));
823 s->nb_labels = 0;
824 s->current_frame_offset = s->frame_start;
826 #ifdef CONFIG_DEBUG_TCG
827 s->goto_tb_issue_mask = 0;
828 #endif
830 s->gen_op_buf[0].next = 1;
831 s->gen_op_buf[0].prev = 0;
832 s->gen_next_op_idx = 1;
835 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
837 int n = s->nb_temps++;
838 tcg_debug_assert(n < TCG_MAX_TEMPS);
839 return memset(&s->temps[n], 0, sizeof(TCGTemp));
842 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
844 TCGTemp *ts;
846 tcg_debug_assert(s->nb_globals == s->nb_temps);
847 s->nb_globals++;
848 ts = tcg_temp_alloc(s);
849 ts->temp_global = 1;
851 return ts;
854 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
855 TCGReg reg, const char *name)
857 TCGTemp *ts;
859 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
860 tcg_abort();
863 ts = tcg_global_alloc(s);
864 ts->base_type = type;
865 ts->type = type;
866 ts->fixed_reg = 1;
867 ts->reg = reg;
868 ts->name = name;
869 tcg_regset_set_reg(s->reserved_regs, reg);
871 return ts;
874 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
876 s->frame_start = start;
877 s->frame_end = start + size;
878 s->frame_temp
879 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
882 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
883 intptr_t offset, const char *name)
885 TCGContext *s = tcg_ctx;
886 TCGTemp *base_ts = tcgv_ptr_temp(base);
887 TCGTemp *ts = tcg_global_alloc(s);
888 int indirect_reg = 0, bigendian = 0;
889 #ifdef HOST_WORDS_BIGENDIAN
890 bigendian = 1;
891 #endif
893 if (!base_ts->fixed_reg) {
894 /* We do not support double-indirect registers. */
895 tcg_debug_assert(!base_ts->indirect_reg);
896 base_ts->indirect_base = 1;
897 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
898 ? 2 : 1);
899 indirect_reg = 1;
902 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
903 TCGTemp *ts2 = tcg_global_alloc(s);
904 char buf[64];
906 ts->base_type = TCG_TYPE_I64;
907 ts->type = TCG_TYPE_I32;
908 ts->indirect_reg = indirect_reg;
909 ts->mem_allocated = 1;
910 ts->mem_base = base_ts;
911 ts->mem_offset = offset + bigendian * 4;
912 pstrcpy(buf, sizeof(buf), name);
913 pstrcat(buf, sizeof(buf), "_0");
914 ts->name = strdup(buf);
916 tcg_debug_assert(ts2 == ts + 1);
917 ts2->base_type = TCG_TYPE_I64;
918 ts2->type = TCG_TYPE_I32;
919 ts2->indirect_reg = indirect_reg;
920 ts2->mem_allocated = 1;
921 ts2->mem_base = base_ts;
922 ts2->mem_offset = offset + (1 - bigendian) * 4;
923 pstrcpy(buf, sizeof(buf), name);
924 pstrcat(buf, sizeof(buf), "_1");
925 ts2->name = strdup(buf);
926 } else {
927 ts->base_type = type;
928 ts->type = type;
929 ts->indirect_reg = indirect_reg;
930 ts->mem_allocated = 1;
931 ts->mem_base = base_ts;
932 ts->mem_offset = offset;
933 ts->name = name;
935 return ts;
938 static TCGTemp *tcg_temp_new_internal(TCGType type, int temp_local)
940 TCGContext *s = tcg_ctx;
941 TCGTemp *ts;
942 int idx, k;
944 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
945 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
946 if (idx < TCG_MAX_TEMPS) {
947 /* There is already an available temp with the right type. */
948 clear_bit(idx, s->free_temps[k].l);
950 ts = &s->temps[idx];
951 ts->temp_allocated = 1;
952 tcg_debug_assert(ts->base_type == type);
953 tcg_debug_assert(ts->temp_local == temp_local);
954 } else {
955 ts = tcg_temp_alloc(s);
956 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
957 TCGTemp *ts2 = tcg_temp_alloc(s);
959 ts->base_type = type;
960 ts->type = TCG_TYPE_I32;
961 ts->temp_allocated = 1;
962 ts->temp_local = temp_local;
964 tcg_debug_assert(ts2 == ts + 1);
965 ts2->base_type = TCG_TYPE_I64;
966 ts2->type = TCG_TYPE_I32;
967 ts2->temp_allocated = 1;
968 ts2->temp_local = temp_local;
969 } else {
970 ts->base_type = type;
971 ts->type = type;
972 ts->temp_allocated = 1;
973 ts->temp_local = temp_local;
977 #if defined(CONFIG_DEBUG_TCG)
978 s->temps_in_use++;
979 #endif
980 return ts;
983 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
985 TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
986 return temp_tcgv_i32(t);
989 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
991 TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
992 return temp_tcgv_i64(t);
995 static void tcg_temp_free_internal(TCGTemp *ts)
997 TCGContext *s = tcg_ctx;
998 int k, idx;
1000 #if defined(CONFIG_DEBUG_TCG)
1001 s->temps_in_use--;
1002 if (s->temps_in_use < 0) {
1003 fprintf(stderr, "More temporaries freed than allocated!\n");
1005 #endif
1007 tcg_debug_assert(ts->temp_global == 0);
1008 tcg_debug_assert(ts->temp_allocated != 0);
1009 ts->temp_allocated = 0;
1011 idx = temp_idx(ts);
1012 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1013 set_bit(idx, s->free_temps[k].l);
1016 void tcg_temp_free_i32(TCGv_i32 arg)
1018 tcg_temp_free_internal(tcgv_i32_temp(arg));
1021 void tcg_temp_free_i64(TCGv_i64 arg)
1023 tcg_temp_free_internal(tcgv_i64_temp(arg));
1026 TCGv_i32 tcg_const_i32(int32_t val)
1028 TCGv_i32 t0;
1029 t0 = tcg_temp_new_i32();
1030 tcg_gen_movi_i32(t0, val);
1031 return t0;
1034 TCGv_i64 tcg_const_i64(int64_t val)
1036 TCGv_i64 t0;
1037 t0 = tcg_temp_new_i64();
1038 tcg_gen_movi_i64(t0, val);
1039 return t0;
1042 TCGv_i32 tcg_const_local_i32(int32_t val)
1044 TCGv_i32 t0;
1045 t0 = tcg_temp_local_new_i32();
1046 tcg_gen_movi_i32(t0, val);
1047 return t0;
1050 TCGv_i64 tcg_const_local_i64(int64_t val)
1052 TCGv_i64 t0;
1053 t0 = tcg_temp_local_new_i64();
1054 tcg_gen_movi_i64(t0, val);
1055 return t0;
1058 #if defined(CONFIG_DEBUG_TCG)
1059 void tcg_clear_temp_count(void)
1061 TCGContext *s = tcg_ctx;
1062 s->temps_in_use = 0;
1065 int tcg_check_temp_count(void)
1067 TCGContext *s = tcg_ctx;
1068 if (s->temps_in_use) {
1069 /* Clear the count so that we don't give another
1070 * warning immediately next time around.
1072 s->temps_in_use = 0;
1073 return 1;
1075 return 0;
1077 #endif
1079 /* Return true if OP may appear in the opcode stream.
1080 Test the runtime variable that controls each opcode. */
1081 bool tcg_op_supported(TCGOpcode op)
1083 switch (op) {
1084 case INDEX_op_discard:
1085 case INDEX_op_set_label:
1086 case INDEX_op_call:
1087 case INDEX_op_br:
1088 case INDEX_op_mb:
1089 case INDEX_op_insn_start:
1090 case INDEX_op_exit_tb:
1091 case INDEX_op_goto_tb:
1092 case INDEX_op_qemu_ld_i32:
1093 case INDEX_op_qemu_st_i32:
1094 case INDEX_op_qemu_ld_i64:
1095 case INDEX_op_qemu_st_i64:
1096 return true;
1098 case INDEX_op_goto_ptr:
1099 return TCG_TARGET_HAS_goto_ptr;
1101 case INDEX_op_mov_i32:
1102 case INDEX_op_movi_i32:
1103 case INDEX_op_setcond_i32:
1104 case INDEX_op_brcond_i32:
1105 case INDEX_op_ld8u_i32:
1106 case INDEX_op_ld8s_i32:
1107 case INDEX_op_ld16u_i32:
1108 case INDEX_op_ld16s_i32:
1109 case INDEX_op_ld_i32:
1110 case INDEX_op_st8_i32:
1111 case INDEX_op_st16_i32:
1112 case INDEX_op_st_i32:
1113 case INDEX_op_add_i32:
1114 case INDEX_op_sub_i32:
1115 case INDEX_op_mul_i32:
1116 case INDEX_op_and_i32:
1117 case INDEX_op_or_i32:
1118 case INDEX_op_xor_i32:
1119 case INDEX_op_shl_i32:
1120 case INDEX_op_shr_i32:
1121 case INDEX_op_sar_i32:
1122 return true;
1124 case INDEX_op_movcond_i32:
1125 return TCG_TARGET_HAS_movcond_i32;
1126 case INDEX_op_div_i32:
1127 case INDEX_op_divu_i32:
1128 return TCG_TARGET_HAS_div_i32;
1129 case INDEX_op_rem_i32:
1130 case INDEX_op_remu_i32:
1131 return TCG_TARGET_HAS_rem_i32;
1132 case INDEX_op_div2_i32:
1133 case INDEX_op_divu2_i32:
1134 return TCG_TARGET_HAS_div2_i32;
1135 case INDEX_op_rotl_i32:
1136 case INDEX_op_rotr_i32:
1137 return TCG_TARGET_HAS_rot_i32;
1138 case INDEX_op_deposit_i32:
1139 return TCG_TARGET_HAS_deposit_i32;
1140 case INDEX_op_extract_i32:
1141 return TCG_TARGET_HAS_extract_i32;
1142 case INDEX_op_sextract_i32:
1143 return TCG_TARGET_HAS_sextract_i32;
1144 case INDEX_op_add2_i32:
1145 return TCG_TARGET_HAS_add2_i32;
1146 case INDEX_op_sub2_i32:
1147 return TCG_TARGET_HAS_sub2_i32;
1148 case INDEX_op_mulu2_i32:
1149 return TCG_TARGET_HAS_mulu2_i32;
1150 case INDEX_op_muls2_i32:
1151 return TCG_TARGET_HAS_muls2_i32;
1152 case INDEX_op_muluh_i32:
1153 return TCG_TARGET_HAS_muluh_i32;
1154 case INDEX_op_mulsh_i32:
1155 return TCG_TARGET_HAS_mulsh_i32;
1156 case INDEX_op_ext8s_i32:
1157 return TCG_TARGET_HAS_ext8s_i32;
1158 case INDEX_op_ext16s_i32:
1159 return TCG_TARGET_HAS_ext16s_i32;
1160 case INDEX_op_ext8u_i32:
1161 return TCG_TARGET_HAS_ext8u_i32;
1162 case INDEX_op_ext16u_i32:
1163 return TCG_TARGET_HAS_ext16u_i32;
1164 case INDEX_op_bswap16_i32:
1165 return TCG_TARGET_HAS_bswap16_i32;
1166 case INDEX_op_bswap32_i32:
1167 return TCG_TARGET_HAS_bswap32_i32;
1168 case INDEX_op_not_i32:
1169 return TCG_TARGET_HAS_not_i32;
1170 case INDEX_op_neg_i32:
1171 return TCG_TARGET_HAS_neg_i32;
1172 case INDEX_op_andc_i32:
1173 return TCG_TARGET_HAS_andc_i32;
1174 case INDEX_op_orc_i32:
1175 return TCG_TARGET_HAS_orc_i32;
1176 case INDEX_op_eqv_i32:
1177 return TCG_TARGET_HAS_eqv_i32;
1178 case INDEX_op_nand_i32:
1179 return TCG_TARGET_HAS_nand_i32;
1180 case INDEX_op_nor_i32:
1181 return TCG_TARGET_HAS_nor_i32;
1182 case INDEX_op_clz_i32:
1183 return TCG_TARGET_HAS_clz_i32;
1184 case INDEX_op_ctz_i32:
1185 return TCG_TARGET_HAS_ctz_i32;
1186 case INDEX_op_ctpop_i32:
1187 return TCG_TARGET_HAS_ctpop_i32;
1189 case INDEX_op_brcond2_i32:
1190 case INDEX_op_setcond2_i32:
1191 return TCG_TARGET_REG_BITS == 32;
1193 case INDEX_op_mov_i64:
1194 case INDEX_op_movi_i64:
1195 case INDEX_op_setcond_i64:
1196 case INDEX_op_brcond_i64:
1197 case INDEX_op_ld8u_i64:
1198 case INDEX_op_ld8s_i64:
1199 case INDEX_op_ld16u_i64:
1200 case INDEX_op_ld16s_i64:
1201 case INDEX_op_ld32u_i64:
1202 case INDEX_op_ld32s_i64:
1203 case INDEX_op_ld_i64:
1204 case INDEX_op_st8_i64:
1205 case INDEX_op_st16_i64:
1206 case INDEX_op_st32_i64:
1207 case INDEX_op_st_i64:
1208 case INDEX_op_add_i64:
1209 case INDEX_op_sub_i64:
1210 case INDEX_op_mul_i64:
1211 case INDEX_op_and_i64:
1212 case INDEX_op_or_i64:
1213 case INDEX_op_xor_i64:
1214 case INDEX_op_shl_i64:
1215 case INDEX_op_shr_i64:
1216 case INDEX_op_sar_i64:
1217 case INDEX_op_ext_i32_i64:
1218 case INDEX_op_extu_i32_i64:
1219 return TCG_TARGET_REG_BITS == 64;
1221 case INDEX_op_movcond_i64:
1222 return TCG_TARGET_HAS_movcond_i64;
1223 case INDEX_op_div_i64:
1224 case INDEX_op_divu_i64:
1225 return TCG_TARGET_HAS_div_i64;
1226 case INDEX_op_rem_i64:
1227 case INDEX_op_remu_i64:
1228 return TCG_TARGET_HAS_rem_i64;
1229 case INDEX_op_div2_i64:
1230 case INDEX_op_divu2_i64:
1231 return TCG_TARGET_HAS_div2_i64;
1232 case INDEX_op_rotl_i64:
1233 case INDEX_op_rotr_i64:
1234 return TCG_TARGET_HAS_rot_i64;
1235 case INDEX_op_deposit_i64:
1236 return TCG_TARGET_HAS_deposit_i64;
1237 case INDEX_op_extract_i64:
1238 return TCG_TARGET_HAS_extract_i64;
1239 case INDEX_op_sextract_i64:
1240 return TCG_TARGET_HAS_sextract_i64;
1241 case INDEX_op_extrl_i64_i32:
1242 return TCG_TARGET_HAS_extrl_i64_i32;
1243 case INDEX_op_extrh_i64_i32:
1244 return TCG_TARGET_HAS_extrh_i64_i32;
1245 case INDEX_op_ext8s_i64:
1246 return TCG_TARGET_HAS_ext8s_i64;
1247 case INDEX_op_ext16s_i64:
1248 return TCG_TARGET_HAS_ext16s_i64;
1249 case INDEX_op_ext32s_i64:
1250 return TCG_TARGET_HAS_ext32s_i64;
1251 case INDEX_op_ext8u_i64:
1252 return TCG_TARGET_HAS_ext8u_i64;
1253 case INDEX_op_ext16u_i64:
1254 return TCG_TARGET_HAS_ext16u_i64;
1255 case INDEX_op_ext32u_i64:
1256 return TCG_TARGET_HAS_ext32u_i64;
1257 case INDEX_op_bswap16_i64:
1258 return TCG_TARGET_HAS_bswap16_i64;
1259 case INDEX_op_bswap32_i64:
1260 return TCG_TARGET_HAS_bswap32_i64;
1261 case INDEX_op_bswap64_i64:
1262 return TCG_TARGET_HAS_bswap64_i64;
1263 case INDEX_op_not_i64:
1264 return TCG_TARGET_HAS_not_i64;
1265 case INDEX_op_neg_i64:
1266 return TCG_TARGET_HAS_neg_i64;
1267 case INDEX_op_andc_i64:
1268 return TCG_TARGET_HAS_andc_i64;
1269 case INDEX_op_orc_i64:
1270 return TCG_TARGET_HAS_orc_i64;
1271 case INDEX_op_eqv_i64:
1272 return TCG_TARGET_HAS_eqv_i64;
1273 case INDEX_op_nand_i64:
1274 return TCG_TARGET_HAS_nand_i64;
1275 case INDEX_op_nor_i64:
1276 return TCG_TARGET_HAS_nor_i64;
1277 case INDEX_op_clz_i64:
1278 return TCG_TARGET_HAS_clz_i64;
1279 case INDEX_op_ctz_i64:
1280 return TCG_TARGET_HAS_ctz_i64;
1281 case INDEX_op_ctpop_i64:
1282 return TCG_TARGET_HAS_ctpop_i64;
1283 case INDEX_op_add2_i64:
1284 return TCG_TARGET_HAS_add2_i64;
1285 case INDEX_op_sub2_i64:
1286 return TCG_TARGET_HAS_sub2_i64;
1287 case INDEX_op_mulu2_i64:
1288 return TCG_TARGET_HAS_mulu2_i64;
1289 case INDEX_op_muls2_i64:
1290 return TCG_TARGET_HAS_muls2_i64;
1291 case INDEX_op_muluh_i64:
1292 return TCG_TARGET_HAS_muluh_i64;
1293 case INDEX_op_mulsh_i64:
1294 return TCG_TARGET_HAS_mulsh_i64;
1296 case NB_OPS:
1297 break;
1299 g_assert_not_reached();
1302 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1303 and endian swap. Maybe it would be better to do the alignment
1304 and endian swap in tcg_reg_alloc_call(). */
1305 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1307 TCGContext *s = tcg_ctx;
1308 int i, real_args, nb_rets, pi;
1309 unsigned sizemask, flags;
1310 TCGHelperInfo *info;
1311 TCGOp *op;
1313 info = g_hash_table_lookup(helper_table, (gpointer)func);
1314 flags = info->flags;
1315 sizemask = info->sizemask;
1317 #if defined(__sparc__) && !defined(__arch64__) \
1318 && !defined(CONFIG_TCG_INTERPRETER)
1319 /* We have 64-bit values in one register, but need to pass as two
1320 separate parameters. Split them. */
1321 int orig_sizemask = sizemask;
1322 int orig_nargs = nargs;
1323 TCGv_i64 retl, reth;
1324 TCGTemp *split_args[MAX_OPC_PARAM];
1326 TCGV_UNUSED_I64(retl);
1327 TCGV_UNUSED_I64(reth);
1328 if (sizemask != 0) {
1329 for (i = real_args = 0; i < nargs; ++i) {
1330 int is_64bit = sizemask & (1 << (i+1)*2);
1331 if (is_64bit) {
1332 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1333 TCGv_i32 h = tcg_temp_new_i32();
1334 TCGv_i32 l = tcg_temp_new_i32();
1335 tcg_gen_extr_i64_i32(l, h, orig);
1336 split_args[real_args++] = tcgv_i32_temp(h);
1337 split_args[real_args++] = tcgv_i32_temp(l);
1338 } else {
1339 split_args[real_args++] = args[i];
1342 nargs = real_args;
1343 args = split_args;
1344 sizemask = 0;
1346 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1347 for (i = 0; i < nargs; ++i) {
1348 int is_64bit = sizemask & (1 << (i+1)*2);
1349 int is_signed = sizemask & (2 << (i+1)*2);
1350 if (!is_64bit) {
1351 TCGv_i64 temp = tcg_temp_new_i64();
1352 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1353 if (is_signed) {
1354 tcg_gen_ext32s_i64(temp, orig);
1355 } else {
1356 tcg_gen_ext32u_i64(temp, orig);
1358 args[i] = tcgv_i64_temp(temp);
1361 #endif /* TCG_TARGET_EXTEND_ARGS */
1363 i = s->gen_next_op_idx;
1364 tcg_debug_assert(i < OPC_BUF_SIZE);
1365 s->gen_op_buf[0].prev = i;
1366 s->gen_next_op_idx = i + 1;
1367 op = &s->gen_op_buf[i];
1369 /* Set links for sequential allocation during translation. */
1370 memset(op, 0, offsetof(TCGOp, args));
1371 op->opc = INDEX_op_call;
1372 op->prev = i - 1;
1373 op->next = i + 1;
1375 pi = 0;
1376 if (ret != NULL) {
1377 #if defined(__sparc__) && !defined(__arch64__) \
1378 && !defined(CONFIG_TCG_INTERPRETER)
1379 if (orig_sizemask & 1) {
1380 /* The 32-bit ABI is going to return the 64-bit value in
1381 the %o0/%o1 register pair. Prepare for this by using
1382 two return temporaries, and reassemble below. */
1383 retl = tcg_temp_new_i64();
1384 reth = tcg_temp_new_i64();
1385 op->args[pi++] = tcgv_i64_arg(reth);
1386 op->args[pi++] = tcgv_i64_arg(retl);
1387 nb_rets = 2;
1388 } else {
1389 op->args[pi++] = temp_arg(ret);
1390 nb_rets = 1;
1392 #else
1393 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1394 #ifdef HOST_WORDS_BIGENDIAN
1395 op->args[pi++] = temp_arg(ret + 1);
1396 op->args[pi++] = temp_arg(ret);
1397 #else
1398 op->args[pi++] = temp_arg(ret);
1399 op->args[pi++] = temp_arg(ret + 1);
1400 #endif
1401 nb_rets = 2;
1402 } else {
1403 op->args[pi++] = temp_arg(ret);
1404 nb_rets = 1;
1406 #endif
1407 } else {
1408 nb_rets = 0;
1410 op->callo = nb_rets;
1412 real_args = 0;
1413 for (i = 0; i < nargs; i++) {
1414 int is_64bit = sizemask & (1 << (i+1)*2);
1415 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1416 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1417 /* some targets want aligned 64 bit args */
1418 if (real_args & 1) {
1419 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1420 real_args++;
1422 #endif
1423 /* If stack grows up, then we will be placing successive
1424 arguments at lower addresses, which means we need to
1425 reverse the order compared to how we would normally
1426 treat either big or little-endian. For those arguments
1427 that will wind up in registers, this still works for
1428 HPPA (the only current STACK_GROWSUP target) since the
1429 argument registers are *also* allocated in decreasing
1430 order. If another such target is added, this logic may
1431 have to get more complicated to differentiate between
1432 stack arguments and register arguments. */
1433 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1434 op->args[pi++] = temp_arg(args[i] + 1);
1435 op->args[pi++] = temp_arg(args[i]);
1436 #else
1437 op->args[pi++] = temp_arg(args[i]);
1438 op->args[pi++] = temp_arg(args[i] + 1);
1439 #endif
1440 real_args += 2;
1441 continue;
1444 op->args[pi++] = temp_arg(args[i]);
1445 real_args++;
1447 op->args[pi++] = (uintptr_t)func;
1448 op->args[pi++] = flags;
1449 op->calli = real_args;
1451 /* Make sure the fields didn't overflow. */
1452 tcg_debug_assert(op->calli == real_args);
1453 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1455 #if defined(__sparc__) && !defined(__arch64__) \
1456 && !defined(CONFIG_TCG_INTERPRETER)
1457 /* Free all of the parts we allocated above. */
1458 for (i = real_args = 0; i < orig_nargs; ++i) {
1459 int is_64bit = orig_sizemask & (1 << (i+1)*2);
1460 if (is_64bit) {
1461 tcg_temp_free_internal(args[real_args++]);
1462 tcg_temp_free_internal(args[real_args++]);
1463 } else {
1464 real_args++;
1467 if (orig_sizemask & 1) {
1468 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1469 Note that describing these as TCGv_i64 eliminates an unnecessary
1470 zero-extension that tcg_gen_concat_i32_i64 would create. */
1471 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1472 tcg_temp_free_i64(retl);
1473 tcg_temp_free_i64(reth);
1475 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1476 for (i = 0; i < nargs; ++i) {
1477 int is_64bit = sizemask & (1 << (i+1)*2);
1478 if (!is_64bit) {
1479 tcg_temp_free_internal(args[i]);
1482 #endif /* TCG_TARGET_EXTEND_ARGS */
1485 static void tcg_reg_alloc_start(TCGContext *s)
1487 int i, n;
1488 TCGTemp *ts;
1490 for (i = 0, n = s->nb_globals; i < n; i++) {
1491 ts = &s->temps[i];
1492 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1494 for (n = s->nb_temps; i < n; i++) {
1495 ts = &s->temps[i];
1496 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1497 ts->mem_allocated = 0;
1498 ts->fixed_reg = 0;
1501 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1504 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1505 TCGTemp *ts)
1507 int idx = temp_idx(ts);
1509 if (ts->temp_global) {
1510 pstrcpy(buf, buf_size, ts->name);
1511 } else if (ts->temp_local) {
1512 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1513 } else {
1514 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1516 return buf;
1519 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1520 int buf_size, TCGArg arg)
1522 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1525 /* Find helper name. */
1526 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1528 const char *ret = NULL;
1529 if (helper_table) {
1530 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1531 if (info) {
1532 ret = info->name;
1535 return ret;
1538 static const char * const cond_name[] =
1540 [TCG_COND_NEVER] = "never",
1541 [TCG_COND_ALWAYS] = "always",
1542 [TCG_COND_EQ] = "eq",
1543 [TCG_COND_NE] = "ne",
1544 [TCG_COND_LT] = "lt",
1545 [TCG_COND_GE] = "ge",
1546 [TCG_COND_LE] = "le",
1547 [TCG_COND_GT] = "gt",
1548 [TCG_COND_LTU] = "ltu",
1549 [TCG_COND_GEU] = "geu",
1550 [TCG_COND_LEU] = "leu",
1551 [TCG_COND_GTU] = "gtu"
1554 static const char * const ldst_name[] =
1556 [MO_UB] = "ub",
1557 [MO_SB] = "sb",
1558 [MO_LEUW] = "leuw",
1559 [MO_LESW] = "lesw",
1560 [MO_LEUL] = "leul",
1561 [MO_LESL] = "lesl",
1562 [MO_LEQ] = "leq",
1563 [MO_BEUW] = "beuw",
1564 [MO_BESW] = "besw",
1565 [MO_BEUL] = "beul",
1566 [MO_BESL] = "besl",
1567 [MO_BEQ] = "beq",
1570 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1571 #ifdef ALIGNED_ONLY
1572 [MO_UNALN >> MO_ASHIFT] = "un+",
1573 [MO_ALIGN >> MO_ASHIFT] = "",
1574 #else
1575 [MO_UNALN >> MO_ASHIFT] = "",
1576 [MO_ALIGN >> MO_ASHIFT] = "al+",
1577 #endif
1578 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
1579 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
1580 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
1581 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1582 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1583 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1586 void tcg_dump_ops(TCGContext *s)
1588 char buf[128];
1589 TCGOp *op;
1590 int oi;
1592 for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) {
1593 int i, k, nb_oargs, nb_iargs, nb_cargs;
1594 const TCGOpDef *def;
1595 TCGOpcode c;
1596 int col = 0;
1598 op = &s->gen_op_buf[oi];
1599 c = op->opc;
1600 def = &tcg_op_defs[c];
1602 if (c == INDEX_op_insn_start) {
1603 col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : "");
1605 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1606 target_ulong a;
1607 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1608 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1609 #else
1610 a = op->args[i];
1611 #endif
1612 col += qemu_log(" " TARGET_FMT_lx, a);
1614 } else if (c == INDEX_op_call) {
1615 /* variable number of arguments */
1616 nb_oargs = op->callo;
1617 nb_iargs = op->calli;
1618 nb_cargs = def->nb_cargs;
1620 /* function name, flags, out args */
1621 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1622 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1623 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1624 for (i = 0; i < nb_oargs; i++) {
1625 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1626 op->args[i]));
1628 for (i = 0; i < nb_iargs; i++) {
1629 TCGArg arg = op->args[nb_oargs + i];
1630 const char *t = "<dummy>";
1631 if (arg != TCG_CALL_DUMMY_ARG) {
1632 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1634 col += qemu_log(",%s", t);
1636 } else {
1637 col += qemu_log(" %s ", def->name);
1639 nb_oargs = def->nb_oargs;
1640 nb_iargs = def->nb_iargs;
1641 nb_cargs = def->nb_cargs;
1643 k = 0;
1644 for (i = 0; i < nb_oargs; i++) {
1645 if (k != 0) {
1646 col += qemu_log(",");
1648 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1649 op->args[k++]));
1651 for (i = 0; i < nb_iargs; i++) {
1652 if (k != 0) {
1653 col += qemu_log(",");
1655 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1656 op->args[k++]));
1658 switch (c) {
1659 case INDEX_op_brcond_i32:
1660 case INDEX_op_setcond_i32:
1661 case INDEX_op_movcond_i32:
1662 case INDEX_op_brcond2_i32:
1663 case INDEX_op_setcond2_i32:
1664 case INDEX_op_brcond_i64:
1665 case INDEX_op_setcond_i64:
1666 case INDEX_op_movcond_i64:
1667 if (op->args[k] < ARRAY_SIZE(cond_name)
1668 && cond_name[op->args[k]]) {
1669 col += qemu_log(",%s", cond_name[op->args[k++]]);
1670 } else {
1671 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1673 i = 1;
1674 break;
1675 case INDEX_op_qemu_ld_i32:
1676 case INDEX_op_qemu_st_i32:
1677 case INDEX_op_qemu_ld_i64:
1678 case INDEX_op_qemu_st_i64:
1680 TCGMemOpIdx oi = op->args[k++];
1681 TCGMemOp op = get_memop(oi);
1682 unsigned ix = get_mmuidx(oi);
1684 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1685 col += qemu_log(",$0x%x,%u", op, ix);
1686 } else {
1687 const char *s_al, *s_op;
1688 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1689 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1690 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1692 i = 1;
1694 break;
1695 default:
1696 i = 0;
1697 break;
1699 switch (c) {
1700 case INDEX_op_set_label:
1701 case INDEX_op_br:
1702 case INDEX_op_brcond_i32:
1703 case INDEX_op_brcond_i64:
1704 case INDEX_op_brcond2_i32:
1705 col += qemu_log("%s$L%d", k ? "," : "",
1706 arg_label(op->args[k])->id);
1707 i++, k++;
1708 break;
1709 default:
1710 break;
1712 for (; i < nb_cargs; i++, k++) {
1713 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1716 if (op->life) {
1717 unsigned life = op->life;
1719 for (; col < 48; ++col) {
1720 putc(' ', qemu_logfile);
1723 if (life & (SYNC_ARG * 3)) {
1724 qemu_log(" sync:");
1725 for (i = 0; i < 2; ++i) {
1726 if (life & (SYNC_ARG << i)) {
1727 qemu_log(" %d", i);
1731 life /= DEAD_ARG;
1732 if (life) {
1733 qemu_log(" dead:");
1734 for (i = 0; life; ++i, life >>= 1) {
1735 if (life & 1) {
1736 qemu_log(" %d", i);
1741 qemu_log("\n");
1745 /* we give more priority to constraints with less registers */
1746 static int get_constraint_priority(const TCGOpDef *def, int k)
1748 const TCGArgConstraint *arg_ct;
1750 int i, n;
1751 arg_ct = &def->args_ct[k];
1752 if (arg_ct->ct & TCG_CT_ALIAS) {
1753 /* an alias is equivalent to a single register */
1754 n = 1;
1755 } else {
1756 if (!(arg_ct->ct & TCG_CT_REG))
1757 return 0;
1758 n = 0;
1759 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1760 if (tcg_regset_test_reg(arg_ct->u.regs, i))
1761 n++;
1764 return TCG_TARGET_NB_REGS - n + 1;
1767 /* sort from highest priority to lowest */
1768 static void sort_constraints(TCGOpDef *def, int start, int n)
1770 int i, j, p1, p2, tmp;
1772 for(i = 0; i < n; i++)
1773 def->sorted_args[start + i] = start + i;
1774 if (n <= 1)
1775 return;
1776 for(i = 0; i < n - 1; i++) {
1777 for(j = i + 1; j < n; j++) {
1778 p1 = get_constraint_priority(def, def->sorted_args[start + i]);
1779 p2 = get_constraint_priority(def, def->sorted_args[start + j]);
1780 if (p1 < p2) {
1781 tmp = def->sorted_args[start + i];
1782 def->sorted_args[start + i] = def->sorted_args[start + j];
1783 def->sorted_args[start + j] = tmp;
1789 static void process_op_defs(TCGContext *s)
1791 TCGOpcode op;
1793 for (op = 0; op < NB_OPS; op++) {
1794 TCGOpDef *def = &tcg_op_defs[op];
1795 const TCGTargetOpDef *tdefs;
1796 TCGType type;
1797 int i, nb_args;
1799 if (def->flags & TCG_OPF_NOT_PRESENT) {
1800 continue;
1803 nb_args = def->nb_iargs + def->nb_oargs;
1804 if (nb_args == 0) {
1805 continue;
1808 tdefs = tcg_target_op_def(op);
1809 /* Missing TCGTargetOpDef entry. */
1810 tcg_debug_assert(tdefs != NULL);
1812 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
1813 for (i = 0; i < nb_args; i++) {
1814 const char *ct_str = tdefs->args_ct_str[i];
1815 /* Incomplete TCGTargetOpDef entry. */
1816 tcg_debug_assert(ct_str != NULL);
1818 def->args_ct[i].u.regs = 0;
1819 def->args_ct[i].ct = 0;
1820 while (*ct_str != '\0') {
1821 switch(*ct_str) {
1822 case '0' ... '9':
1824 int oarg = *ct_str - '0';
1825 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
1826 tcg_debug_assert(oarg < def->nb_oargs);
1827 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
1828 /* TCG_CT_ALIAS is for the output arguments.
1829 The input is tagged with TCG_CT_IALIAS. */
1830 def->args_ct[i] = def->args_ct[oarg];
1831 def->args_ct[oarg].ct |= TCG_CT_ALIAS;
1832 def->args_ct[oarg].alias_index = i;
1833 def->args_ct[i].ct |= TCG_CT_IALIAS;
1834 def->args_ct[i].alias_index = oarg;
1836 ct_str++;
1837 break;
1838 case '&':
1839 def->args_ct[i].ct |= TCG_CT_NEWREG;
1840 ct_str++;
1841 break;
1842 case 'i':
1843 def->args_ct[i].ct |= TCG_CT_CONST;
1844 ct_str++;
1845 break;
1846 default:
1847 ct_str = target_parse_constraint(&def->args_ct[i],
1848 ct_str, type);
1849 /* Typo in TCGTargetOpDef constraint. */
1850 tcg_debug_assert(ct_str != NULL);
1855 /* TCGTargetOpDef entry with too much information? */
1856 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
1858 /* sort the constraints (XXX: this is just an heuristic) */
1859 sort_constraints(def, 0, def->nb_oargs);
1860 sort_constraints(def, def->nb_oargs, def->nb_iargs);
1864 void tcg_op_remove(TCGContext *s, TCGOp *op)
1866 int next = op->next;
1867 int prev = op->prev;
1869 /* We should never attempt to remove the list terminator. */
1870 tcg_debug_assert(op != &s->gen_op_buf[0]);
1872 s->gen_op_buf[next].prev = prev;
1873 s->gen_op_buf[prev].next = next;
1875 memset(op, 0, sizeof(*op));
1877 #ifdef CONFIG_PROFILER
1878 atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
1879 #endif
1882 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
1883 TCGOpcode opc, int nargs)
1885 int oi = s->gen_next_op_idx;
1886 int prev = old_op->prev;
1887 int next = old_op - s->gen_op_buf;
1888 TCGOp *new_op;
1890 tcg_debug_assert(oi < OPC_BUF_SIZE);
1891 s->gen_next_op_idx = oi + 1;
1893 new_op = &s->gen_op_buf[oi];
1894 *new_op = (TCGOp){
1895 .opc = opc,
1896 .prev = prev,
1897 .next = next
1899 s->gen_op_buf[prev].next = oi;
1900 old_op->prev = oi;
1902 return new_op;
1905 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
1906 TCGOpcode opc, int nargs)
1908 int oi = s->gen_next_op_idx;
1909 int prev = old_op - s->gen_op_buf;
1910 int next = old_op->next;
1911 TCGOp *new_op;
1913 tcg_debug_assert(oi < OPC_BUF_SIZE);
1914 s->gen_next_op_idx = oi + 1;
1916 new_op = &s->gen_op_buf[oi];
1917 *new_op = (TCGOp){
1918 .opc = opc,
1919 .prev = prev,
1920 .next = next
1922 s->gen_op_buf[next].prev = oi;
1923 old_op->next = oi;
1925 return new_op;
1928 #define TS_DEAD 1
1929 #define TS_MEM 2
1931 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
1932 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
1934 /* liveness analysis: end of function: all temps are dead, and globals
1935 should be in memory. */
1936 static void tcg_la_func_end(TCGContext *s)
1938 int ng = s->nb_globals;
1939 int nt = s->nb_temps;
1940 int i;
1942 for (i = 0; i < ng; ++i) {
1943 s->temps[i].state = TS_DEAD | TS_MEM;
1945 for (i = ng; i < nt; ++i) {
1946 s->temps[i].state = TS_DEAD;
1950 /* liveness analysis: end of basic block: all temps are dead, globals
1951 and local temps should be in memory. */
1952 static void tcg_la_bb_end(TCGContext *s)
1954 int ng = s->nb_globals;
1955 int nt = s->nb_temps;
1956 int i;
1958 for (i = 0; i < ng; ++i) {
1959 s->temps[i].state = TS_DEAD | TS_MEM;
1961 for (i = ng; i < nt; ++i) {
1962 s->temps[i].state = (s->temps[i].temp_local
1963 ? TS_DEAD | TS_MEM
1964 : TS_DEAD);
1968 /* Liveness analysis : update the opc_arg_life array to tell if a
1969 given input arguments is dead. Instructions updating dead
1970 temporaries are removed. */
1971 static void liveness_pass_1(TCGContext *s)
1973 int nb_globals = s->nb_globals;
1974 int oi, oi_prev;
1976 tcg_la_func_end(s);
1978 for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
1979 int i, nb_iargs, nb_oargs;
1980 TCGOpcode opc_new, opc_new2;
1981 bool have_opc_new2;
1982 TCGLifeData arg_life = 0;
1983 TCGTemp *arg_ts;
1985 TCGOp * const op = &s->gen_op_buf[oi];
1986 TCGOpcode opc = op->opc;
1987 const TCGOpDef *def = &tcg_op_defs[opc];
1989 oi_prev = op->prev;
1991 switch (opc) {
1992 case INDEX_op_call:
1994 int call_flags;
1996 nb_oargs = op->callo;
1997 nb_iargs = op->calli;
1998 call_flags = op->args[nb_oargs + nb_iargs + 1];
2000 /* pure functions can be removed if their result is unused */
2001 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2002 for (i = 0; i < nb_oargs; i++) {
2003 arg_ts = arg_temp(op->args[i]);
2004 if (arg_ts->state != TS_DEAD) {
2005 goto do_not_remove_call;
2008 goto do_remove;
2009 } else {
2010 do_not_remove_call:
2012 /* output args are dead */
2013 for (i = 0; i < nb_oargs; i++) {
2014 arg_ts = arg_temp(op->args[i]);
2015 if (arg_ts->state & TS_DEAD) {
2016 arg_life |= DEAD_ARG << i;
2018 if (arg_ts->state & TS_MEM) {
2019 arg_life |= SYNC_ARG << i;
2021 arg_ts->state = TS_DEAD;
2024 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2025 TCG_CALL_NO_READ_GLOBALS))) {
2026 /* globals should go back to memory */
2027 for (i = 0; i < nb_globals; i++) {
2028 s->temps[i].state = TS_DEAD | TS_MEM;
2030 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2031 /* globals should be synced to memory */
2032 for (i = 0; i < nb_globals; i++) {
2033 s->temps[i].state |= TS_MEM;
2037 /* record arguments that die in this helper */
2038 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2039 arg_ts = arg_temp(op->args[i]);
2040 if (arg_ts && arg_ts->state & TS_DEAD) {
2041 arg_life |= DEAD_ARG << i;
2044 /* input arguments are live for preceding opcodes */
2045 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2046 arg_ts = arg_temp(op->args[i]);
2047 if (arg_ts) {
2048 arg_ts->state &= ~TS_DEAD;
2053 break;
2054 case INDEX_op_insn_start:
2055 break;
2056 case INDEX_op_discard:
2057 /* mark the temporary as dead */
2058 arg_temp(op->args[0])->state = TS_DEAD;
2059 break;
2061 case INDEX_op_add2_i32:
2062 opc_new = INDEX_op_add_i32;
2063 goto do_addsub2;
2064 case INDEX_op_sub2_i32:
2065 opc_new = INDEX_op_sub_i32;
2066 goto do_addsub2;
2067 case INDEX_op_add2_i64:
2068 opc_new = INDEX_op_add_i64;
2069 goto do_addsub2;
2070 case INDEX_op_sub2_i64:
2071 opc_new = INDEX_op_sub_i64;
2072 do_addsub2:
2073 nb_iargs = 4;
2074 nb_oargs = 2;
2075 /* Test if the high part of the operation is dead, but not
2076 the low part. The result can be optimized to a simple
2077 add or sub. This happens often for x86_64 guest when the
2078 cpu mode is set to 32 bit. */
2079 if (arg_temp(op->args[1])->state == TS_DEAD) {
2080 if (arg_temp(op->args[0])->state == TS_DEAD) {
2081 goto do_remove;
2083 /* Replace the opcode and adjust the args in place,
2084 leaving 3 unused args at the end. */
2085 op->opc = opc = opc_new;
2086 op->args[1] = op->args[2];
2087 op->args[2] = op->args[4];
2088 /* Fall through and mark the single-word operation live. */
2089 nb_iargs = 2;
2090 nb_oargs = 1;
2092 goto do_not_remove;
2094 case INDEX_op_mulu2_i32:
2095 opc_new = INDEX_op_mul_i32;
2096 opc_new2 = INDEX_op_muluh_i32;
2097 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2098 goto do_mul2;
2099 case INDEX_op_muls2_i32:
2100 opc_new = INDEX_op_mul_i32;
2101 opc_new2 = INDEX_op_mulsh_i32;
2102 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2103 goto do_mul2;
2104 case INDEX_op_mulu2_i64:
2105 opc_new = INDEX_op_mul_i64;
2106 opc_new2 = INDEX_op_muluh_i64;
2107 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2108 goto do_mul2;
2109 case INDEX_op_muls2_i64:
2110 opc_new = INDEX_op_mul_i64;
2111 opc_new2 = INDEX_op_mulsh_i64;
2112 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2113 goto do_mul2;
2114 do_mul2:
2115 nb_iargs = 2;
2116 nb_oargs = 2;
2117 if (arg_temp(op->args[1])->state == TS_DEAD) {
2118 if (arg_temp(op->args[0])->state == TS_DEAD) {
2119 /* Both parts of the operation are dead. */
2120 goto do_remove;
2122 /* The high part of the operation is dead; generate the low. */
2123 op->opc = opc = opc_new;
2124 op->args[1] = op->args[2];
2125 op->args[2] = op->args[3];
2126 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2127 /* The low part of the operation is dead; generate the high. */
2128 op->opc = opc = opc_new2;
2129 op->args[0] = op->args[1];
2130 op->args[1] = op->args[2];
2131 op->args[2] = op->args[3];
2132 } else {
2133 goto do_not_remove;
2135 /* Mark the single-word operation live. */
2136 nb_oargs = 1;
2137 goto do_not_remove;
2139 default:
2140 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2141 nb_iargs = def->nb_iargs;
2142 nb_oargs = def->nb_oargs;
2144 /* Test if the operation can be removed because all
2145 its outputs are dead. We assume that nb_oargs == 0
2146 implies side effects */
2147 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2148 for (i = 0; i < nb_oargs; i++) {
2149 if (arg_temp(op->args[i])->state != TS_DEAD) {
2150 goto do_not_remove;
2153 do_remove:
2154 tcg_op_remove(s, op);
2155 } else {
2156 do_not_remove:
2157 /* output args are dead */
2158 for (i = 0; i < nb_oargs; i++) {
2159 arg_ts = arg_temp(op->args[i]);
2160 if (arg_ts->state & TS_DEAD) {
2161 arg_life |= DEAD_ARG << i;
2163 if (arg_ts->state & TS_MEM) {
2164 arg_life |= SYNC_ARG << i;
2166 arg_ts->state = TS_DEAD;
2169 /* if end of basic block, update */
2170 if (def->flags & TCG_OPF_BB_END) {
2171 tcg_la_bb_end(s);
2172 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2173 /* globals should be synced to memory */
2174 for (i = 0; i < nb_globals; i++) {
2175 s->temps[i].state |= TS_MEM;
2179 /* record arguments that die in this opcode */
2180 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2181 arg_ts = arg_temp(op->args[i]);
2182 if (arg_ts->state & TS_DEAD) {
2183 arg_life |= DEAD_ARG << i;
2186 /* input arguments are live for preceding opcodes */
2187 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2188 arg_temp(op->args[i])->state &= ~TS_DEAD;
2191 break;
2193 op->life = arg_life;
2197 /* Liveness analysis: Convert indirect regs to direct temporaries. */
2198 static bool liveness_pass_2(TCGContext *s)
2200 int nb_globals = s->nb_globals;
2201 int nb_temps, i, oi, oi_next;
2202 bool changes = false;
2204 /* Create a temporary for each indirect global. */
2205 for (i = 0; i < nb_globals; ++i) {
2206 TCGTemp *its = &s->temps[i];
2207 if (its->indirect_reg) {
2208 TCGTemp *dts = tcg_temp_alloc(s);
2209 dts->type = its->type;
2210 dts->base_type = its->base_type;
2211 its->state_ptr = dts;
2212 } else {
2213 its->state_ptr = NULL;
2215 /* All globals begin dead. */
2216 its->state = TS_DEAD;
2218 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2219 TCGTemp *its = &s->temps[i];
2220 its->state_ptr = NULL;
2221 its->state = TS_DEAD;
2224 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
2225 TCGOp *op = &s->gen_op_buf[oi];
2226 TCGOpcode opc = op->opc;
2227 const TCGOpDef *def = &tcg_op_defs[opc];
2228 TCGLifeData arg_life = op->life;
2229 int nb_iargs, nb_oargs, call_flags;
2230 TCGTemp *arg_ts, *dir_ts;
2232 oi_next = op->next;
2234 if (opc == INDEX_op_call) {
2235 nb_oargs = op->callo;
2236 nb_iargs = op->calli;
2237 call_flags = op->args[nb_oargs + nb_iargs + 1];
2238 } else {
2239 nb_iargs = def->nb_iargs;
2240 nb_oargs = def->nb_oargs;
2242 /* Set flags similar to how calls require. */
2243 if (def->flags & TCG_OPF_BB_END) {
2244 /* Like writing globals: save_globals */
2245 call_flags = 0;
2246 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2247 /* Like reading globals: sync_globals */
2248 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2249 } else {
2250 /* No effect on globals. */
2251 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2252 TCG_CALL_NO_WRITE_GLOBALS);
2256 /* Make sure that input arguments are available. */
2257 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2258 arg_ts = arg_temp(op->args[i]);
2259 if (arg_ts) {
2260 dir_ts = arg_ts->state_ptr;
2261 if (dir_ts && arg_ts->state == TS_DEAD) {
2262 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2263 ? INDEX_op_ld_i32
2264 : INDEX_op_ld_i64);
2265 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
2267 lop->args[0] = temp_arg(dir_ts);
2268 lop->args[1] = temp_arg(arg_ts->mem_base);
2269 lop->args[2] = arg_ts->mem_offset;
2271 /* Loaded, but synced with memory. */
2272 arg_ts->state = TS_MEM;
2277 /* Perform input replacement, and mark inputs that became dead.
2278 No action is required except keeping temp_state up to date
2279 so that we reload when needed. */
2280 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2281 arg_ts = arg_temp(op->args[i]);
2282 if (arg_ts) {
2283 dir_ts = arg_ts->state_ptr;
2284 if (dir_ts) {
2285 op->args[i] = temp_arg(dir_ts);
2286 changes = true;
2287 if (IS_DEAD_ARG(i)) {
2288 arg_ts->state = TS_DEAD;
2294 /* Liveness analysis should ensure that the following are
2295 all correct, for call sites and basic block end points. */
2296 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2297 /* Nothing to do */
2298 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2299 for (i = 0; i < nb_globals; ++i) {
2300 /* Liveness should see that globals are synced back,
2301 that is, either TS_DEAD or TS_MEM. */
2302 arg_ts = &s->temps[i];
2303 tcg_debug_assert(arg_ts->state_ptr == 0
2304 || arg_ts->state != 0);
2306 } else {
2307 for (i = 0; i < nb_globals; ++i) {
2308 /* Liveness should see that globals are saved back,
2309 that is, TS_DEAD, waiting to be reloaded. */
2310 arg_ts = &s->temps[i];
2311 tcg_debug_assert(arg_ts->state_ptr == 0
2312 || arg_ts->state == TS_DEAD);
2316 /* Outputs become available. */
2317 for (i = 0; i < nb_oargs; i++) {
2318 arg_ts = arg_temp(op->args[i]);
2319 dir_ts = arg_ts->state_ptr;
2320 if (!dir_ts) {
2321 continue;
2323 op->args[i] = temp_arg(dir_ts);
2324 changes = true;
2326 /* The output is now live and modified. */
2327 arg_ts->state = 0;
2329 /* Sync outputs upon their last write. */
2330 if (NEED_SYNC_ARG(i)) {
2331 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2332 ? INDEX_op_st_i32
2333 : INDEX_op_st_i64);
2334 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
2336 sop->args[0] = temp_arg(dir_ts);
2337 sop->args[1] = temp_arg(arg_ts->mem_base);
2338 sop->args[2] = arg_ts->mem_offset;
2340 arg_ts->state = TS_MEM;
2342 /* Drop outputs that are dead. */
2343 if (IS_DEAD_ARG(i)) {
2344 arg_ts->state = TS_DEAD;
2349 return changes;
2352 #ifdef CONFIG_DEBUG_TCG
2353 static void dump_regs(TCGContext *s)
2355 TCGTemp *ts;
2356 int i;
2357 char buf[64];
2359 for(i = 0; i < s->nb_temps; i++) {
2360 ts = &s->temps[i];
2361 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2362 switch(ts->val_type) {
2363 case TEMP_VAL_REG:
2364 printf("%s", tcg_target_reg_names[ts->reg]);
2365 break;
2366 case TEMP_VAL_MEM:
2367 printf("%d(%s)", (int)ts->mem_offset,
2368 tcg_target_reg_names[ts->mem_base->reg]);
2369 break;
2370 case TEMP_VAL_CONST:
2371 printf("$0x%" TCG_PRIlx, ts->val);
2372 break;
2373 case TEMP_VAL_DEAD:
2374 printf("D");
2375 break;
2376 default:
2377 printf("???");
2378 break;
2380 printf("\n");
2383 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2384 if (s->reg_to_temp[i] != NULL) {
2385 printf("%s: %s\n",
2386 tcg_target_reg_names[i],
2387 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2392 static void check_regs(TCGContext *s)
2394 int reg;
2395 int k;
2396 TCGTemp *ts;
2397 char buf[64];
2399 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2400 ts = s->reg_to_temp[reg];
2401 if (ts != NULL) {
2402 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2403 printf("Inconsistency for register %s:\n",
2404 tcg_target_reg_names[reg]);
2405 goto fail;
2409 for (k = 0; k < s->nb_temps; k++) {
2410 ts = &s->temps[k];
2411 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
2412 && s->reg_to_temp[ts->reg] != ts) {
2413 printf("Inconsistency for temp %s:\n",
2414 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2415 fail:
2416 printf("reg state:\n");
2417 dump_regs(s);
2418 tcg_abort();
2422 #endif
2424 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
2426 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
2427 /* Sparc64 stack is accessed with offset of 2047 */
2428 s->current_frame_offset = (s->current_frame_offset +
2429 (tcg_target_long)sizeof(tcg_target_long) - 1) &
2430 ~(sizeof(tcg_target_long) - 1);
2431 #endif
2432 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
2433 s->frame_end) {
2434 tcg_abort();
2436 ts->mem_offset = s->current_frame_offset;
2437 ts->mem_base = s->frame_temp;
2438 ts->mem_allocated = 1;
2439 s->current_frame_offset += sizeof(tcg_target_long);
2442 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet);
2444 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
2445 mark it free; otherwise mark it dead. */
2446 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
2448 if (ts->fixed_reg) {
2449 return;
2451 if (ts->val_type == TEMP_VAL_REG) {
2452 s->reg_to_temp[ts->reg] = NULL;
2454 ts->val_type = (free_or_dead < 0
2455 || ts->temp_local
2456 || ts->temp_global
2457 ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
2460 /* Mark a temporary as dead. */
2461 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
2463 temp_free_or_dead(s, ts, 1);
2466 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
2467 registers needs to be allocated to store a constant. If 'free_or_dead'
2468 is non-zero, subsequently release the temporary; if it is positive, the
2469 temp is dead; if it is negative, the temp is free. */
2470 static void temp_sync(TCGContext *s, TCGTemp *ts,
2471 TCGRegSet allocated_regs, int free_or_dead)
2473 if (ts->fixed_reg) {
2474 return;
2476 if (!ts->mem_coherent) {
2477 if (!ts->mem_allocated) {
2478 temp_allocate_frame(s, ts);
2480 switch (ts->val_type) {
2481 case TEMP_VAL_CONST:
2482 /* If we're going to free the temp immediately, then we won't
2483 require it later in a register, so attempt to store the
2484 constant to memory directly. */
2485 if (free_or_dead
2486 && tcg_out_sti(s, ts->type, ts->val,
2487 ts->mem_base->reg, ts->mem_offset)) {
2488 break;
2490 temp_load(s, ts, tcg_target_available_regs[ts->type],
2491 allocated_regs);
2492 /* fallthrough */
2494 case TEMP_VAL_REG:
2495 tcg_out_st(s, ts->type, ts->reg,
2496 ts->mem_base->reg, ts->mem_offset);
2497 break;
2499 case TEMP_VAL_MEM:
2500 break;
2502 case TEMP_VAL_DEAD:
2503 default:
2504 tcg_abort();
2506 ts->mem_coherent = 1;
2508 if (free_or_dead) {
2509 temp_free_or_dead(s, ts, free_or_dead);
2513 /* free register 'reg' by spilling the corresponding temporary if necessary */
2514 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
2516 TCGTemp *ts = s->reg_to_temp[reg];
2517 if (ts != NULL) {
2518 temp_sync(s, ts, allocated_regs, -1);
2522 /* Allocate a register belonging to reg1 & ~reg2 */
2523 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
2524 TCGRegSet allocated_regs, bool rev)
2526 int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
2527 const int *order;
2528 TCGReg reg;
2529 TCGRegSet reg_ct;
2531 reg_ct = desired_regs & ~allocated_regs;
2532 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
2534 /* first try free registers */
2535 for(i = 0; i < n; i++) {
2536 reg = order[i];
2537 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
2538 return reg;
2541 /* XXX: do better spill choice */
2542 for(i = 0; i < n; i++) {
2543 reg = order[i];
2544 if (tcg_regset_test_reg(reg_ct, reg)) {
2545 tcg_reg_free(s, reg, allocated_regs);
2546 return reg;
2550 tcg_abort();
2553 /* Make sure the temporary is in a register. If needed, allocate the register
2554 from DESIRED while avoiding ALLOCATED. */
2555 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
2556 TCGRegSet allocated_regs)
2558 TCGReg reg;
2560 switch (ts->val_type) {
2561 case TEMP_VAL_REG:
2562 return;
2563 case TEMP_VAL_CONST:
2564 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2565 tcg_out_movi(s, ts->type, reg, ts->val);
2566 ts->mem_coherent = 0;
2567 break;
2568 case TEMP_VAL_MEM:
2569 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2570 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2571 ts->mem_coherent = 1;
2572 break;
2573 case TEMP_VAL_DEAD:
2574 default:
2575 tcg_abort();
2577 ts->reg = reg;
2578 ts->val_type = TEMP_VAL_REG;
2579 s->reg_to_temp[reg] = ts;
2582 /* Save a temporary to memory. 'allocated_regs' is used in case a
2583 temporary registers needs to be allocated to store a constant. */
2584 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
2586 /* The liveness analysis already ensures that globals are back
2587 in memory. Keep an tcg_debug_assert for safety. */
2588 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
2591 /* save globals to their canonical location and assume they can be
2592 modified be the following code. 'allocated_regs' is used in case a
2593 temporary registers needs to be allocated to store a constant. */
2594 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
2596 int i, n;
2598 for (i = 0, n = s->nb_globals; i < n; i++) {
2599 temp_save(s, &s->temps[i], allocated_regs);
2603 /* sync globals to their canonical location and assume they can be
2604 read by the following code. 'allocated_regs' is used in case a
2605 temporary registers needs to be allocated to store a constant. */
2606 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2608 int i, n;
2610 for (i = 0, n = s->nb_globals; i < n; i++) {
2611 TCGTemp *ts = &s->temps[i];
2612 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
2613 || ts->fixed_reg
2614 || ts->mem_coherent);
2618 /* at the end of a basic block, we assume all temporaries are dead and
2619 all globals are stored at their canonical location. */
2620 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2622 int i;
2624 for (i = s->nb_globals; i < s->nb_temps; i++) {
2625 TCGTemp *ts = &s->temps[i];
2626 if (ts->temp_local) {
2627 temp_save(s, ts, allocated_regs);
2628 } else {
2629 /* The liveness analysis already ensures that temps are dead.
2630 Keep an tcg_debug_assert for safety. */
2631 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
2635 save_globals(s, allocated_regs);
2638 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
2639 tcg_target_ulong val, TCGLifeData arg_life)
2641 if (ots->fixed_reg) {
2642 /* For fixed registers, we do not do any constant propagation. */
2643 tcg_out_movi(s, ots->type, ots->reg, val);
2644 return;
2647 /* The movi is not explicitly generated here. */
2648 if (ots->val_type == TEMP_VAL_REG) {
2649 s->reg_to_temp[ots->reg] = NULL;
2651 ots->val_type = TEMP_VAL_CONST;
2652 ots->val = val;
2653 ots->mem_coherent = 0;
2654 if (NEED_SYNC_ARG(0)) {
2655 temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0));
2656 } else if (IS_DEAD_ARG(0)) {
2657 temp_dead(s, ots);
2661 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
2663 TCGTemp *ots = arg_temp(op->args[0]);
2664 tcg_target_ulong val = op->args[1];
2666 tcg_reg_alloc_do_movi(s, ots, val, op->life);
2669 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
2671 const TCGLifeData arg_life = op->life;
2672 TCGRegSet allocated_regs;
2673 TCGTemp *ts, *ots;
2674 TCGType otype, itype;
2676 allocated_regs = s->reserved_regs;
2677 ots = arg_temp(op->args[0]);
2678 ts = arg_temp(op->args[1]);
2680 /* Note that otype != itype for no-op truncation. */
2681 otype = ots->type;
2682 itype = ts->type;
2684 if (ts->val_type == TEMP_VAL_CONST) {
2685 /* propagate constant or generate sti */
2686 tcg_target_ulong val = ts->val;
2687 if (IS_DEAD_ARG(1)) {
2688 temp_dead(s, ts);
2690 tcg_reg_alloc_do_movi(s, ots, val, arg_life);
2691 return;
2694 /* If the source value is in memory we're going to be forced
2695 to have it in a register in order to perform the copy. Copy
2696 the SOURCE value into its own register first, that way we
2697 don't have to reload SOURCE the next time it is used. */
2698 if (ts->val_type == TEMP_VAL_MEM) {
2699 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
2702 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
2703 if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2704 /* mov to a non-saved dead register makes no sense (even with
2705 liveness analysis disabled). */
2706 tcg_debug_assert(NEED_SYNC_ARG(0));
2707 if (!ots->mem_allocated) {
2708 temp_allocate_frame(s, ots);
2710 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
2711 if (IS_DEAD_ARG(1)) {
2712 temp_dead(s, ts);
2714 temp_dead(s, ots);
2715 } else {
2716 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
2717 /* the mov can be suppressed */
2718 if (ots->val_type == TEMP_VAL_REG) {
2719 s->reg_to_temp[ots->reg] = NULL;
2721 ots->reg = ts->reg;
2722 temp_dead(s, ts);
2723 } else {
2724 if (ots->val_type != TEMP_VAL_REG) {
2725 /* When allocating a new register, make sure to not spill the
2726 input one. */
2727 tcg_regset_set_reg(allocated_regs, ts->reg);
2728 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
2729 allocated_regs, ots->indirect_base);
2731 tcg_out_mov(s, otype, ots->reg, ts->reg);
2733 ots->val_type = TEMP_VAL_REG;
2734 ots->mem_coherent = 0;
2735 s->reg_to_temp[ots->reg] = ots;
2736 if (NEED_SYNC_ARG(0)) {
2737 temp_sync(s, ots, allocated_regs, 0);
2742 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
2744 const TCGLifeData arg_life = op->life;
2745 const TCGOpDef * const def = &tcg_op_defs[op->opc];
2746 TCGRegSet i_allocated_regs;
2747 TCGRegSet o_allocated_regs;
2748 int i, k, nb_iargs, nb_oargs;
2749 TCGReg reg;
2750 TCGArg arg;
2751 const TCGArgConstraint *arg_ct;
2752 TCGTemp *ts;
2753 TCGArg new_args[TCG_MAX_OP_ARGS];
2754 int const_args[TCG_MAX_OP_ARGS];
2756 nb_oargs = def->nb_oargs;
2757 nb_iargs = def->nb_iargs;
2759 /* copy constants */
2760 memcpy(new_args + nb_oargs + nb_iargs,
2761 op->args + nb_oargs + nb_iargs,
2762 sizeof(TCGArg) * def->nb_cargs);
2764 i_allocated_regs = s->reserved_regs;
2765 o_allocated_regs = s->reserved_regs;
2767 /* satisfy input constraints */
2768 for (k = 0; k < nb_iargs; k++) {
2769 i = def->sorted_args[nb_oargs + k];
2770 arg = op->args[i];
2771 arg_ct = &def->args_ct[i];
2772 ts = arg_temp(arg);
2774 if (ts->val_type == TEMP_VAL_CONST
2775 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
2776 /* constant is OK for instruction */
2777 const_args[i] = 1;
2778 new_args[i] = ts->val;
2779 goto iarg_end;
2782 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
2784 if (arg_ct->ct & TCG_CT_IALIAS) {
2785 if (ts->fixed_reg) {
2786 /* if fixed register, we must allocate a new register
2787 if the alias is not the same register */
2788 if (arg != op->args[arg_ct->alias_index])
2789 goto allocate_in_reg;
2790 } else {
2791 /* if the input is aliased to an output and if it is
2792 not dead after the instruction, we must allocate
2793 a new register and move it */
2794 if (!IS_DEAD_ARG(i)) {
2795 goto allocate_in_reg;
2797 /* check if the current register has already been allocated
2798 for another input aliased to an output */
2799 int k2, i2;
2800 for (k2 = 0 ; k2 < k ; k2++) {
2801 i2 = def->sorted_args[nb_oargs + k2];
2802 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
2803 (new_args[i2] == ts->reg)) {
2804 goto allocate_in_reg;
2809 reg = ts->reg;
2810 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2811 /* nothing to do : the constraint is satisfied */
2812 } else {
2813 allocate_in_reg:
2814 /* allocate a new register matching the constraint
2815 and move the temporary register into it */
2816 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
2817 ts->indirect_base);
2818 tcg_out_mov(s, ts->type, reg, ts->reg);
2820 new_args[i] = reg;
2821 const_args[i] = 0;
2822 tcg_regset_set_reg(i_allocated_regs, reg);
2823 iarg_end: ;
2826 /* mark dead temporaries and free the associated registers */
2827 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2828 if (IS_DEAD_ARG(i)) {
2829 temp_dead(s, arg_temp(op->args[i]));
2833 if (def->flags & TCG_OPF_BB_END) {
2834 tcg_reg_alloc_bb_end(s, i_allocated_regs);
2835 } else {
2836 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2837 /* XXX: permit generic clobber register list ? */
2838 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2839 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2840 tcg_reg_free(s, i, i_allocated_regs);
2844 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2845 /* sync globals if the op has side effects and might trigger
2846 an exception. */
2847 sync_globals(s, i_allocated_regs);
2850 /* satisfy the output constraints */
2851 for(k = 0; k < nb_oargs; k++) {
2852 i = def->sorted_args[k];
2853 arg = op->args[i];
2854 arg_ct = &def->args_ct[i];
2855 ts = arg_temp(arg);
2856 if ((arg_ct->ct & TCG_CT_ALIAS)
2857 && !const_args[arg_ct->alias_index]) {
2858 reg = new_args[arg_ct->alias_index];
2859 } else if (arg_ct->ct & TCG_CT_NEWREG) {
2860 reg = tcg_reg_alloc(s, arg_ct->u.regs,
2861 i_allocated_regs | o_allocated_regs,
2862 ts->indirect_base);
2863 } else {
2864 /* if fixed register, we try to use it */
2865 reg = ts->reg;
2866 if (ts->fixed_reg &&
2867 tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2868 goto oarg_end;
2870 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
2871 ts->indirect_base);
2873 tcg_regset_set_reg(o_allocated_regs, reg);
2874 /* if a fixed register is used, then a move will be done afterwards */
2875 if (!ts->fixed_reg) {
2876 if (ts->val_type == TEMP_VAL_REG) {
2877 s->reg_to_temp[ts->reg] = NULL;
2879 ts->val_type = TEMP_VAL_REG;
2880 ts->reg = reg;
2881 /* temp value is modified, so the value kept in memory is
2882 potentially not the same */
2883 ts->mem_coherent = 0;
2884 s->reg_to_temp[reg] = ts;
2886 oarg_end:
2887 new_args[i] = reg;
2891 /* emit instruction */
2892 tcg_out_op(s, op->opc, new_args, const_args);
2894 /* move the outputs in the correct register if needed */
2895 for(i = 0; i < nb_oargs; i++) {
2896 ts = arg_temp(op->args[i]);
2897 reg = new_args[i];
2898 if (ts->fixed_reg && ts->reg != reg) {
2899 tcg_out_mov(s, ts->type, ts->reg, reg);
2901 if (NEED_SYNC_ARG(i)) {
2902 temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
2903 } else if (IS_DEAD_ARG(i)) {
2904 temp_dead(s, ts);
2909 #ifdef TCG_TARGET_STACK_GROWSUP
2910 #define STACK_DIR(x) (-(x))
2911 #else
2912 #define STACK_DIR(x) (x)
2913 #endif
2915 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
2917 const int nb_oargs = op->callo;
2918 const int nb_iargs = op->calli;
2919 const TCGLifeData arg_life = op->life;
2920 int flags, nb_regs, i;
2921 TCGReg reg;
2922 TCGArg arg;
2923 TCGTemp *ts;
2924 intptr_t stack_offset;
2925 size_t call_stack_size;
2926 tcg_insn_unit *func_addr;
2927 int allocate_args;
2928 TCGRegSet allocated_regs;
2930 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
2931 flags = op->args[nb_oargs + nb_iargs + 1];
2933 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2934 if (nb_regs > nb_iargs) {
2935 nb_regs = nb_iargs;
2938 /* assign stack slots first */
2939 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
2940 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
2941 ~(TCG_TARGET_STACK_ALIGN - 1);
2942 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
2943 if (allocate_args) {
2944 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2945 preallocate call stack */
2946 tcg_abort();
2949 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
2950 for (i = nb_regs; i < nb_iargs; i++) {
2951 arg = op->args[nb_oargs + i];
2952 #ifdef TCG_TARGET_STACK_GROWSUP
2953 stack_offset -= sizeof(tcg_target_long);
2954 #endif
2955 if (arg != TCG_CALL_DUMMY_ARG) {
2956 ts = arg_temp(arg);
2957 temp_load(s, ts, tcg_target_available_regs[ts->type],
2958 s->reserved_regs);
2959 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
2961 #ifndef TCG_TARGET_STACK_GROWSUP
2962 stack_offset += sizeof(tcg_target_long);
2963 #endif
2966 /* assign input registers */
2967 allocated_regs = s->reserved_regs;
2968 for (i = 0; i < nb_regs; i++) {
2969 arg = op->args[nb_oargs + i];
2970 if (arg != TCG_CALL_DUMMY_ARG) {
2971 ts = arg_temp(arg);
2972 reg = tcg_target_call_iarg_regs[i];
2973 tcg_reg_free(s, reg, allocated_regs);
2975 if (ts->val_type == TEMP_VAL_REG) {
2976 if (ts->reg != reg) {
2977 tcg_out_mov(s, ts->type, reg, ts->reg);
2979 } else {
2980 TCGRegSet arg_set = 0;
2982 tcg_regset_set_reg(arg_set, reg);
2983 temp_load(s, ts, arg_set, allocated_regs);
2986 tcg_regset_set_reg(allocated_regs, reg);
2990 /* mark dead temporaries and free the associated registers */
2991 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2992 if (IS_DEAD_ARG(i)) {
2993 temp_dead(s, arg_temp(op->args[i]));
2997 /* clobber call registers */
2998 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2999 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3000 tcg_reg_free(s, i, allocated_regs);
3004 /* Save globals if they might be written by the helper, sync them if
3005 they might be read. */
3006 if (flags & TCG_CALL_NO_READ_GLOBALS) {
3007 /* Nothing to do */
3008 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3009 sync_globals(s, allocated_regs);
3010 } else {
3011 save_globals(s, allocated_regs);
3014 tcg_out_call(s, func_addr);
3016 /* assign output registers and emit moves if needed */
3017 for(i = 0; i < nb_oargs; i++) {
3018 arg = op->args[i];
3019 ts = arg_temp(arg);
3020 reg = tcg_target_call_oarg_regs[i];
3021 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3023 if (ts->fixed_reg) {
3024 if (ts->reg != reg) {
3025 tcg_out_mov(s, ts->type, ts->reg, reg);
3027 } else {
3028 if (ts->val_type == TEMP_VAL_REG) {
3029 s->reg_to_temp[ts->reg] = NULL;
3031 ts->val_type = TEMP_VAL_REG;
3032 ts->reg = reg;
3033 ts->mem_coherent = 0;
3034 s->reg_to_temp[reg] = ts;
3035 if (NEED_SYNC_ARG(i)) {
3036 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
3037 } else if (IS_DEAD_ARG(i)) {
3038 temp_dead(s, ts);
3044 #ifdef CONFIG_PROFILER
3046 /* avoid copy/paste errors */
3047 #define PROF_ADD(to, from, field) \
3048 do { \
3049 (to)->field += atomic_read(&((from)->field)); \
3050 } while (0)
3052 #define PROF_MAX(to, from, field) \
3053 do { \
3054 typeof((from)->field) val__ = atomic_read(&((from)->field)); \
3055 if (val__ > (to)->field) { \
3056 (to)->field = val__; \
3058 } while (0)
3060 /* Pass in a zero'ed @prof */
3061 static inline
3062 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3064 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3065 unsigned int i;
3067 for (i = 0; i < n_ctxs; i++) {
3068 TCGContext *s = atomic_read(&tcg_ctxs[i]);
3069 const TCGProfile *orig = &s->prof;
3071 if (counters) {
3072 PROF_ADD(prof, orig, tb_count1);
3073 PROF_ADD(prof, orig, tb_count);
3074 PROF_ADD(prof, orig, op_count);
3075 PROF_MAX(prof, orig, op_count_max);
3076 PROF_ADD(prof, orig, temp_count);
3077 PROF_MAX(prof, orig, temp_count_max);
3078 PROF_ADD(prof, orig, del_op_count);
3079 PROF_ADD(prof, orig, code_in_len);
3080 PROF_ADD(prof, orig, code_out_len);
3081 PROF_ADD(prof, orig, search_out_len);
3082 PROF_ADD(prof, orig, interm_time);
3083 PROF_ADD(prof, orig, code_time);
3084 PROF_ADD(prof, orig, la_time);
3085 PROF_ADD(prof, orig, opt_time);
3086 PROF_ADD(prof, orig, restore_count);
3087 PROF_ADD(prof, orig, restore_time);
3089 if (table) {
3090 int i;
3092 for (i = 0; i < NB_OPS; i++) {
3093 PROF_ADD(prof, orig, table_op_count[i]);
3099 #undef PROF_ADD
3100 #undef PROF_MAX
3102 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3104 tcg_profile_snapshot(prof, true, false);
3107 static void tcg_profile_snapshot_table(TCGProfile *prof)
3109 tcg_profile_snapshot(prof, false, true);
3112 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3114 TCGProfile prof = {};
3115 int i;
3117 tcg_profile_snapshot_table(&prof);
3118 for (i = 0; i < NB_OPS; i++) {
3119 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
3120 prof.table_op_count[i]);
3123 #else
3124 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3126 cpu_fprintf(f, "[TCG profiler not compiled]\n");
3128 #endif
3131 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3133 #ifdef CONFIG_PROFILER
3134 TCGProfile *prof = &s->prof;
3135 #endif
3136 int i, oi, oi_next, num_insns;
3138 #ifdef CONFIG_PROFILER
3140 int n;
3142 n = s->gen_op_buf[0].prev + 1;
3143 atomic_set(&prof->op_count, prof->op_count + n);
3144 if (n > prof->op_count_max) {
3145 atomic_set(&prof->op_count_max, n);
3148 n = s->nb_temps;
3149 atomic_set(&prof->temp_count, prof->temp_count + n);
3150 if (n > prof->temp_count_max) {
3151 atomic_set(&prof->temp_count_max, n);
3154 #endif
3156 #ifdef DEBUG_DISAS
3157 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3158 && qemu_log_in_addr_range(tb->pc))) {
3159 qemu_log_lock();
3160 qemu_log("OP:\n");
3161 tcg_dump_ops(s);
3162 qemu_log("\n");
3163 qemu_log_unlock();
3165 #endif
3167 #ifdef CONFIG_PROFILER
3168 atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3169 #endif
3171 #ifdef USE_TCG_OPTIMIZATIONS
3172 tcg_optimize(s);
3173 #endif
3175 #ifdef CONFIG_PROFILER
3176 atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3177 atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3178 #endif
3180 liveness_pass_1(s);
3182 if (s->nb_indirects > 0) {
3183 #ifdef DEBUG_DISAS
3184 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3185 && qemu_log_in_addr_range(tb->pc))) {
3186 qemu_log_lock();
3187 qemu_log("OP before indirect lowering:\n");
3188 tcg_dump_ops(s);
3189 qemu_log("\n");
3190 qemu_log_unlock();
3192 #endif
3193 /* Replace indirect temps with direct temps. */
3194 if (liveness_pass_2(s)) {
3195 /* If changes were made, re-run liveness. */
3196 liveness_pass_1(s);
3200 #ifdef CONFIG_PROFILER
3201 atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3202 #endif
3204 #ifdef DEBUG_DISAS
3205 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3206 && qemu_log_in_addr_range(tb->pc))) {
3207 qemu_log_lock();
3208 qemu_log("OP after optimization and liveness analysis:\n");
3209 tcg_dump_ops(s);
3210 qemu_log("\n");
3211 qemu_log_unlock();
3213 #endif
3215 tcg_reg_alloc_start(s);
3217 s->code_buf = tb->tc.ptr;
3218 s->code_ptr = tb->tc.ptr;
3220 #ifdef TCG_TARGET_NEED_LDST_LABELS
3221 s->ldst_labels = NULL;
3222 #endif
3223 #ifdef TCG_TARGET_NEED_POOL_LABELS
3224 s->pool_labels = NULL;
3225 #endif
3227 num_insns = -1;
3228 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
3229 TCGOp * const op = &s->gen_op_buf[oi];
3230 TCGOpcode opc = op->opc;
3232 oi_next = op->next;
3233 #ifdef CONFIG_PROFILER
3234 atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3235 #endif
3237 switch (opc) {
3238 case INDEX_op_mov_i32:
3239 case INDEX_op_mov_i64:
3240 tcg_reg_alloc_mov(s, op);
3241 break;
3242 case INDEX_op_movi_i32:
3243 case INDEX_op_movi_i64:
3244 tcg_reg_alloc_movi(s, op);
3245 break;
3246 case INDEX_op_insn_start:
3247 if (num_insns >= 0) {
3248 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3250 num_insns++;
3251 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3252 target_ulong a;
3253 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3254 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3255 #else
3256 a = op->args[i];
3257 #endif
3258 s->gen_insn_data[num_insns][i] = a;
3260 break;
3261 case INDEX_op_discard:
3262 temp_dead(s, arg_temp(op->args[0]));
3263 break;
3264 case INDEX_op_set_label:
3265 tcg_reg_alloc_bb_end(s, s->reserved_regs);
3266 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3267 break;
3268 case INDEX_op_call:
3269 tcg_reg_alloc_call(s, op);
3270 break;
3271 default:
3272 /* Sanity check that we've not introduced any unhandled opcodes. */
3273 tcg_debug_assert(tcg_op_supported(opc));
3274 /* Note: in order to speed up the code, it would be much
3275 faster to have specialized register allocator functions for
3276 some common argument patterns */
3277 tcg_reg_alloc_op(s, op);
3278 break;
3280 #ifdef CONFIG_DEBUG_TCG
3281 check_regs(s);
3282 #endif
3283 /* Test for (pending) buffer overflow. The assumption is that any
3284 one operation beginning below the high water mark cannot overrun
3285 the buffer completely. Thus we can test for overflow after
3286 generating code without having to check during generation. */
3287 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3288 return -1;
3291 tcg_debug_assert(num_insns >= 0);
3292 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3294 /* Generate TB finalization at the end of block */
3295 #ifdef TCG_TARGET_NEED_LDST_LABELS
3296 if (!tcg_out_ldst_finalize(s)) {
3297 return -1;
3299 #endif
3300 #ifdef TCG_TARGET_NEED_POOL_LABELS
3301 if (!tcg_out_pool_finalize(s)) {
3302 return -1;
3304 #endif
3306 /* flush instruction cache */
3307 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
3309 return tcg_current_code_size(s);
3312 #ifdef CONFIG_PROFILER
3313 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3315 TCGProfile prof = {};
3316 const TCGProfile *s;
3317 int64_t tb_count;
3318 int64_t tb_div_count;
3319 int64_t tot;
3321 tcg_profile_snapshot_counters(&prof);
3322 s = &prof;
3323 tb_count = s->tb_count;
3324 tb_div_count = tb_count ? tb_count : 1;
3325 tot = s->interm_time + s->code_time;
3327 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
3328 tot, tot / 2.4e9);
3329 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
3330 tb_count, s->tb_count1 - tb_count,
3331 (double)(s->tb_count1 - s->tb_count)
3332 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
3333 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
3334 (double)s->op_count / tb_div_count, s->op_count_max);
3335 cpu_fprintf(f, "deleted ops/TB %0.2f\n",
3336 (double)s->del_op_count / tb_div_count);
3337 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
3338 (double)s->temp_count / tb_div_count, s->temp_count_max);
3339 cpu_fprintf(f, "avg host code/TB %0.1f\n",
3340 (double)s->code_out_len / tb_div_count);
3341 cpu_fprintf(f, "avg search data/TB %0.1f\n",
3342 (double)s->search_out_len / tb_div_count);
3344 cpu_fprintf(f, "cycles/op %0.1f\n",
3345 s->op_count ? (double)tot / s->op_count : 0);
3346 cpu_fprintf(f, "cycles/in byte %0.1f\n",
3347 s->code_in_len ? (double)tot / s->code_in_len : 0);
3348 cpu_fprintf(f, "cycles/out byte %0.1f\n",
3349 s->code_out_len ? (double)tot / s->code_out_len : 0);
3350 cpu_fprintf(f, "cycles/search byte %0.1f\n",
3351 s->search_out_len ? (double)tot / s->search_out_len : 0);
3352 if (tot == 0) {
3353 tot = 1;
3355 cpu_fprintf(f, " gen_interm time %0.1f%%\n",
3356 (double)s->interm_time / tot * 100.0);
3357 cpu_fprintf(f, " gen_code time %0.1f%%\n",
3358 (double)s->code_time / tot * 100.0);
3359 cpu_fprintf(f, "optim./code time %0.1f%%\n",
3360 (double)s->opt_time / (s->code_time ? s->code_time : 1)
3361 * 100.0);
3362 cpu_fprintf(f, "liveness/code time %0.1f%%\n",
3363 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
3364 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
3365 s->restore_count);
3366 cpu_fprintf(f, " avg cycles %0.1f\n",
3367 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
3369 #else
3370 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3372 cpu_fprintf(f, "[TCG profiler not compiled]\n");
3374 #endif
3376 #ifdef ELF_HOST_MACHINE
3377 /* In order to use this feature, the backend needs to do three things:
3379 (1) Define ELF_HOST_MACHINE to indicate both what value to
3380 put into the ELF image and to indicate support for the feature.
3382 (2) Define tcg_register_jit. This should create a buffer containing
3383 the contents of a .debug_frame section that describes the post-
3384 prologue unwind info for the tcg machine.
3386 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
3389 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
3390 typedef enum {
3391 JIT_NOACTION = 0,
3392 JIT_REGISTER_FN,
3393 JIT_UNREGISTER_FN
3394 } jit_actions_t;
3396 struct jit_code_entry {
3397 struct jit_code_entry *next_entry;
3398 struct jit_code_entry *prev_entry;
3399 const void *symfile_addr;
3400 uint64_t symfile_size;
3403 struct jit_descriptor {
3404 uint32_t version;
3405 uint32_t action_flag;
3406 struct jit_code_entry *relevant_entry;
3407 struct jit_code_entry *first_entry;
3410 void __jit_debug_register_code(void) __attribute__((noinline));
3411 void __jit_debug_register_code(void)
3413 asm("");
3416 /* Must statically initialize the version, because GDB may check
3417 the version before we can set it. */
3418 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
3420 /* End GDB interface. */
3422 static int find_string(const char *strtab, const char *str)
3424 const char *p = strtab + 1;
3426 while (1) {
3427 if (strcmp(p, str) == 0) {
3428 return p - strtab;
3430 p += strlen(p) + 1;
3434 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
3435 const void *debug_frame,
3436 size_t debug_frame_size)
3438 struct __attribute__((packed)) DebugInfo {
3439 uint32_t len;
3440 uint16_t version;
3441 uint32_t abbrev;
3442 uint8_t ptr_size;
3443 uint8_t cu_die;
3444 uint16_t cu_lang;
3445 uintptr_t cu_low_pc;
3446 uintptr_t cu_high_pc;
3447 uint8_t fn_die;
3448 char fn_name[16];
3449 uintptr_t fn_low_pc;
3450 uintptr_t fn_high_pc;
3451 uint8_t cu_eoc;
3454 struct ElfImage {
3455 ElfW(Ehdr) ehdr;
3456 ElfW(Phdr) phdr;
3457 ElfW(Shdr) shdr[7];
3458 ElfW(Sym) sym[2];
3459 struct DebugInfo di;
3460 uint8_t da[24];
3461 char str[80];
3464 struct ElfImage *img;
3466 static const struct ElfImage img_template = {
3467 .ehdr = {
3468 .e_ident[EI_MAG0] = ELFMAG0,
3469 .e_ident[EI_MAG1] = ELFMAG1,
3470 .e_ident[EI_MAG2] = ELFMAG2,
3471 .e_ident[EI_MAG3] = ELFMAG3,
3472 .e_ident[EI_CLASS] = ELF_CLASS,
3473 .e_ident[EI_DATA] = ELF_DATA,
3474 .e_ident[EI_VERSION] = EV_CURRENT,
3475 .e_type = ET_EXEC,
3476 .e_machine = ELF_HOST_MACHINE,
3477 .e_version = EV_CURRENT,
3478 .e_phoff = offsetof(struct ElfImage, phdr),
3479 .e_shoff = offsetof(struct ElfImage, shdr),
3480 .e_ehsize = sizeof(ElfW(Shdr)),
3481 .e_phentsize = sizeof(ElfW(Phdr)),
3482 .e_phnum = 1,
3483 .e_shentsize = sizeof(ElfW(Shdr)),
3484 .e_shnum = ARRAY_SIZE(img->shdr),
3485 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
3486 #ifdef ELF_HOST_FLAGS
3487 .e_flags = ELF_HOST_FLAGS,
3488 #endif
3489 #ifdef ELF_OSABI
3490 .e_ident[EI_OSABI] = ELF_OSABI,
3491 #endif
3493 .phdr = {
3494 .p_type = PT_LOAD,
3495 .p_flags = PF_X,
3497 .shdr = {
3498 [0] = { .sh_type = SHT_NULL },
3499 /* Trick: The contents of code_gen_buffer are not present in
3500 this fake ELF file; that got allocated elsewhere. Therefore
3501 we mark .text as SHT_NOBITS (similar to .bss) so that readers
3502 will not look for contents. We can record any address. */
3503 [1] = { /* .text */
3504 .sh_type = SHT_NOBITS,
3505 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
3507 [2] = { /* .debug_info */
3508 .sh_type = SHT_PROGBITS,
3509 .sh_offset = offsetof(struct ElfImage, di),
3510 .sh_size = sizeof(struct DebugInfo),
3512 [3] = { /* .debug_abbrev */
3513 .sh_type = SHT_PROGBITS,
3514 .sh_offset = offsetof(struct ElfImage, da),
3515 .sh_size = sizeof(img->da),
3517 [4] = { /* .debug_frame */
3518 .sh_type = SHT_PROGBITS,
3519 .sh_offset = sizeof(struct ElfImage),
3521 [5] = { /* .symtab */
3522 .sh_type = SHT_SYMTAB,
3523 .sh_offset = offsetof(struct ElfImage, sym),
3524 .sh_size = sizeof(img->sym),
3525 .sh_info = 1,
3526 .sh_link = ARRAY_SIZE(img->shdr) - 1,
3527 .sh_entsize = sizeof(ElfW(Sym)),
3529 [6] = { /* .strtab */
3530 .sh_type = SHT_STRTAB,
3531 .sh_offset = offsetof(struct ElfImage, str),
3532 .sh_size = sizeof(img->str),
3535 .sym = {
3536 [1] = { /* code_gen_buffer */
3537 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
3538 .st_shndx = 1,
3541 .di = {
3542 .len = sizeof(struct DebugInfo) - 4,
3543 .version = 2,
3544 .ptr_size = sizeof(void *),
3545 .cu_die = 1,
3546 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
3547 .fn_die = 2,
3548 .fn_name = "code_gen_buffer"
3550 .da = {
3551 1, /* abbrev number (the cu) */
3552 0x11, 1, /* DW_TAG_compile_unit, has children */
3553 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
3554 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
3555 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
3556 0, 0, /* end of abbrev */
3557 2, /* abbrev number (the fn) */
3558 0x2e, 0, /* DW_TAG_subprogram, no children */
3559 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
3560 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
3561 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
3562 0, 0, /* end of abbrev */
3563 0 /* no more abbrev */
3565 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
3566 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
3569 /* We only need a single jit entry; statically allocate it. */
3570 static struct jit_code_entry one_entry;
3572 uintptr_t buf = (uintptr_t)buf_ptr;
3573 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
3574 DebugFrameHeader *dfh;
3576 img = g_malloc(img_size);
3577 *img = img_template;
3579 img->phdr.p_vaddr = buf;
3580 img->phdr.p_paddr = buf;
3581 img->phdr.p_memsz = buf_size;
3583 img->shdr[1].sh_name = find_string(img->str, ".text");
3584 img->shdr[1].sh_addr = buf;
3585 img->shdr[1].sh_size = buf_size;
3587 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
3588 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
3590 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
3591 img->shdr[4].sh_size = debug_frame_size;
3593 img->shdr[5].sh_name = find_string(img->str, ".symtab");
3594 img->shdr[6].sh_name = find_string(img->str, ".strtab");
3596 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
3597 img->sym[1].st_value = buf;
3598 img->sym[1].st_size = buf_size;
3600 img->di.cu_low_pc = buf;
3601 img->di.cu_high_pc = buf + buf_size;
3602 img->di.fn_low_pc = buf;
3603 img->di.fn_high_pc = buf + buf_size;
3605 dfh = (DebugFrameHeader *)(img + 1);
3606 memcpy(dfh, debug_frame, debug_frame_size);
3607 dfh->fde.func_start = buf;
3608 dfh->fde.func_len = buf_size;
3610 #ifdef DEBUG_JIT
3611 /* Enable this block to be able to debug the ELF image file creation.
3612 One can use readelf, objdump, or other inspection utilities. */
3614 FILE *f = fopen("/tmp/qemu.jit", "w+b");
3615 if (f) {
3616 if (fwrite(img, img_size, 1, f) != img_size) {
3617 /* Avoid stupid unused return value warning for fwrite. */
3619 fclose(f);
3622 #endif
3624 one_entry.symfile_addr = img;
3625 one_entry.symfile_size = img_size;
3627 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
3628 __jit_debug_descriptor.relevant_entry = &one_entry;
3629 __jit_debug_descriptor.first_entry = &one_entry;
3630 __jit_debug_register_code();
3632 #else
3633 /* No support for the feature. Provide the entry point expected by exec.c,
3634 and implement the internal function we declared earlier. */
3636 static void tcg_register_jit_int(void *buf, size_t size,
3637 const void *debug_frame,
3638 size_t debug_frame_size)
3642 void tcg_register_jit(void *buf, size_t buf_size)
3645 #endif /* ELF_HOST_MACHINE */