qmp-commands: move 'blockdev-snapshot' doc to schema
[qemu/kevin.git] / tcg / tcg.c
blobcb898f1636b9bf4e005bf42063a84dff8d36b0df
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
33 #include "qemu/cutils.h"
34 #include "qemu/host-utils.h"
35 #include "qemu/timer.h"
37 /* Note: the long term plan is to reduce the dependencies on the QEMU
38 CPU definitions. Currently they are used for qemu_ld/st
39 instructions */
40 #define NO_CPU_IO_DEFS
41 #include "cpu.h"
43 #include "exec/cpu-common.h"
44 #include "exec/exec-all.h"
46 #include "tcg-op.h"
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS ELFCLASS32
50 #else
51 # define ELF_CLASS ELFCLASS64
52 #endif
53 #ifdef HOST_WORDS_BIGENDIAN
54 # define ELF_DATA ELFDATA2MSB
55 #else
56 # define ELF_DATA ELFDATA2LSB
57 #endif
59 #include "elf.h"
60 #include "exec/log.h"
62 /* Forward declarations for functions declared in tcg-target.inc.c and
63 used here. */
64 static void tcg_target_init(TCGContext *s);
65 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
66 static void tcg_target_qemu_prologue(TCGContext *s);
67 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
68 intptr_t value, intptr_t addend);
70 /* The CIE and FDE header definitions will be common to all hosts. */
71 typedef struct {
72 uint32_t len __attribute__((aligned((sizeof(void *)))));
73 uint32_t id;
74 uint8_t version;
75 char augmentation[1];
76 uint8_t code_align;
77 uint8_t data_align;
78 uint8_t return_column;
79 } DebugFrameCIE;
81 typedef struct QEMU_PACKED {
82 uint32_t len __attribute__((aligned((sizeof(void *)))));
83 uint32_t cie_offset;
84 uintptr_t func_start;
85 uintptr_t func_len;
86 } DebugFrameFDEHeader;
88 typedef struct QEMU_PACKED {
89 DebugFrameCIE cie;
90 DebugFrameFDEHeader fde;
91 } DebugFrameHeader;
93 static void tcg_register_jit_int(void *buf, size_t size,
94 const void *debug_frame,
95 size_t debug_frame_size)
96 __attribute__((unused));
98 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
99 static const char *target_parse_constraint(TCGArgConstraint *ct,
100 const char *ct_str, TCGType type);
101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
102 intptr_t arg2);
103 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
104 static void tcg_out_movi(TCGContext *s, TCGType type,
105 TCGReg ret, tcg_target_long arg);
106 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
107 const int *const_args);
108 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
109 intptr_t arg2);
110 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
111 TCGReg base, intptr_t ofs);
112 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
113 static int tcg_target_const_match(tcg_target_long val, TCGType type,
114 const TCGArgConstraint *arg_ct);
115 static void tcg_out_tb_init(TCGContext *s);
116 static bool tcg_out_tb_finalize(TCGContext *s);
120 static TCGRegSet tcg_target_available_regs[2];
121 static TCGRegSet tcg_target_call_clobber_regs;
123 #if TCG_TARGET_INSN_UNIT_SIZE == 1
124 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
126 *s->code_ptr++ = v;
129 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
130 uint8_t v)
132 *p = v;
134 #endif
136 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
137 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
139 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
140 *s->code_ptr++ = v;
141 } else {
142 tcg_insn_unit *p = s->code_ptr;
143 memcpy(p, &v, sizeof(v));
144 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
148 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
149 uint16_t v)
151 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
152 *p = v;
153 } else {
154 memcpy(p, &v, sizeof(v));
157 #endif
159 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
160 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
162 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
163 *s->code_ptr++ = v;
164 } else {
165 tcg_insn_unit *p = s->code_ptr;
166 memcpy(p, &v, sizeof(v));
167 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
171 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
172 uint32_t v)
174 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
175 *p = v;
176 } else {
177 memcpy(p, &v, sizeof(v));
180 #endif
182 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
183 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
185 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
186 *s->code_ptr++ = v;
187 } else {
188 tcg_insn_unit *p = s->code_ptr;
189 memcpy(p, &v, sizeof(v));
190 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
194 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
195 uint64_t v)
197 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
198 *p = v;
199 } else {
200 memcpy(p, &v, sizeof(v));
203 #endif
205 /* label relocation processing */
207 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
208 TCGLabel *l, intptr_t addend)
210 TCGRelocation *r;
212 if (l->has_value) {
213 /* FIXME: This may break relocations on RISC targets that
214 modify instruction fields in place. The caller may not have
215 written the initial value. */
216 patch_reloc(code_ptr, type, l->u.value, addend);
217 } else {
218 /* add a new relocation entry */
219 r = tcg_malloc(sizeof(TCGRelocation));
220 r->type = type;
221 r->ptr = code_ptr;
222 r->addend = addend;
223 r->next = l->u.first_reloc;
224 l->u.first_reloc = r;
228 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
230 intptr_t value = (intptr_t)ptr;
231 TCGRelocation *r;
233 tcg_debug_assert(!l->has_value);
235 for (r = l->u.first_reloc; r != NULL; r = r->next) {
236 patch_reloc(r->ptr, r->type, value, r->addend);
239 l->has_value = 1;
240 l->u.value_ptr = ptr;
243 TCGLabel *gen_new_label(void)
245 TCGContext *s = &tcg_ctx;
246 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
248 *l = (TCGLabel){
249 .id = s->nb_labels++
252 return l;
255 #include "tcg-target.inc.c"
257 /* pool based memory allocation */
258 void *tcg_malloc_internal(TCGContext *s, int size)
260 TCGPool *p;
261 int pool_size;
263 if (size > TCG_POOL_CHUNK_SIZE) {
264 /* big malloc: insert a new pool (XXX: could optimize) */
265 p = g_malloc(sizeof(TCGPool) + size);
266 p->size = size;
267 p->next = s->pool_first_large;
268 s->pool_first_large = p;
269 return p->data;
270 } else {
271 p = s->pool_current;
272 if (!p) {
273 p = s->pool_first;
274 if (!p)
275 goto new_pool;
276 } else {
277 if (!p->next) {
278 new_pool:
279 pool_size = TCG_POOL_CHUNK_SIZE;
280 p = g_malloc(sizeof(TCGPool) + pool_size);
281 p->size = pool_size;
282 p->next = NULL;
283 if (s->pool_current)
284 s->pool_current->next = p;
285 else
286 s->pool_first = p;
287 } else {
288 p = p->next;
292 s->pool_current = p;
293 s->pool_cur = p->data + size;
294 s->pool_end = p->data + p->size;
295 return p->data;
298 void tcg_pool_reset(TCGContext *s)
300 TCGPool *p, *t;
301 for (p = s->pool_first_large; p; p = t) {
302 t = p->next;
303 g_free(p);
305 s->pool_first_large = NULL;
306 s->pool_cur = s->pool_end = NULL;
307 s->pool_current = NULL;
310 typedef struct TCGHelperInfo {
311 void *func;
312 const char *name;
313 unsigned flags;
314 unsigned sizemask;
315 } TCGHelperInfo;
317 #include "exec/helper-proto.h"
319 static const TCGHelperInfo all_helpers[] = {
320 #include "exec/helper-tcg.h"
323 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
324 static void process_op_defs(TCGContext *s);
326 void tcg_context_init(TCGContext *s)
328 int op, total_args, n, i;
329 TCGOpDef *def;
330 TCGArgConstraint *args_ct;
331 int *sorted_args;
332 GHashTable *helper_table;
334 memset(s, 0, sizeof(*s));
335 s->nb_globals = 0;
337 /* Count total number of arguments and allocate the corresponding
338 space */
339 total_args = 0;
340 for(op = 0; op < NB_OPS; op++) {
341 def = &tcg_op_defs[op];
342 n = def->nb_iargs + def->nb_oargs;
343 total_args += n;
346 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
347 sorted_args = g_malloc(sizeof(int) * total_args);
349 for(op = 0; op < NB_OPS; op++) {
350 def = &tcg_op_defs[op];
351 def->args_ct = args_ct;
352 def->sorted_args = sorted_args;
353 n = def->nb_iargs + def->nb_oargs;
354 sorted_args += n;
355 args_ct += n;
358 /* Register helpers. */
359 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
360 s->helpers = helper_table = g_hash_table_new(NULL, NULL);
362 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
363 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
364 (gpointer)&all_helpers[i]);
367 tcg_target_init(s);
368 process_op_defs(s);
370 /* Reverse the order of the saved registers, assuming they're all at
371 the start of tcg_target_reg_alloc_order. */
372 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
373 int r = tcg_target_reg_alloc_order[n];
374 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
375 break;
378 for (i = 0; i < n; ++i) {
379 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
381 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
382 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
386 void tcg_prologue_init(TCGContext *s)
388 size_t prologue_size, total_size;
389 void *buf0, *buf1;
391 /* Put the prologue at the beginning of code_gen_buffer. */
392 buf0 = s->code_gen_buffer;
393 s->code_ptr = buf0;
394 s->code_buf = buf0;
395 s->code_gen_prologue = buf0;
397 /* Generate the prologue. */
398 tcg_target_qemu_prologue(s);
399 buf1 = s->code_ptr;
400 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
402 /* Deduct the prologue from the buffer. */
403 prologue_size = tcg_current_code_size(s);
404 s->code_gen_ptr = buf1;
405 s->code_gen_buffer = buf1;
406 s->code_buf = buf1;
407 total_size = s->code_gen_buffer_size - prologue_size;
408 s->code_gen_buffer_size = total_size;
410 /* Compute a high-water mark, at which we voluntarily flush the buffer
411 and start over. The size here is arbitrary, significantly larger
412 than we expect the code generation for any one opcode to require. */
413 s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024);
415 tcg_register_jit(s->code_gen_buffer, total_size);
417 #ifdef DEBUG_DISAS
418 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
419 qemu_log_lock();
420 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
421 log_disas(buf0, prologue_size);
422 qemu_log("\n");
423 qemu_log_flush();
424 qemu_log_unlock();
426 #endif
429 void tcg_func_start(TCGContext *s)
431 tcg_pool_reset(s);
432 s->nb_temps = s->nb_globals;
434 /* No temps have been previously allocated for size or locality. */
435 memset(s->free_temps, 0, sizeof(s->free_temps));
437 s->nb_labels = 0;
438 s->current_frame_offset = s->frame_start;
440 #ifdef CONFIG_DEBUG_TCG
441 s->goto_tb_issue_mask = 0;
442 #endif
444 s->gen_op_buf[0].next = 1;
445 s->gen_op_buf[0].prev = 0;
446 s->gen_next_op_idx = 1;
447 s->gen_next_parm_idx = 0;
449 s->be = tcg_malloc(sizeof(TCGBackendData));
452 static inline int temp_idx(TCGContext *s, TCGTemp *ts)
454 ptrdiff_t n = ts - s->temps;
455 tcg_debug_assert(n >= 0 && n < s->nb_temps);
456 return n;
459 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
461 int n = s->nb_temps++;
462 tcg_debug_assert(n < TCG_MAX_TEMPS);
463 return memset(&s->temps[n], 0, sizeof(TCGTemp));
466 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
468 tcg_debug_assert(s->nb_globals == s->nb_temps);
469 s->nb_globals++;
470 return tcg_temp_alloc(s);
473 static int tcg_global_reg_new_internal(TCGContext *s, TCGType type,
474 TCGReg reg, const char *name)
476 TCGTemp *ts;
478 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
479 tcg_abort();
482 ts = tcg_global_alloc(s);
483 ts->base_type = type;
484 ts->type = type;
485 ts->fixed_reg = 1;
486 ts->reg = reg;
487 ts->name = name;
488 tcg_regset_set_reg(s->reserved_regs, reg);
490 return temp_idx(s, ts);
493 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
495 int idx;
496 s->frame_start = start;
497 s->frame_end = start + size;
498 idx = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
499 s->frame_temp = &s->temps[idx];
502 TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name)
504 TCGContext *s = &tcg_ctx;
505 int idx;
507 if (tcg_regset_test_reg(s->reserved_regs, reg)) {
508 tcg_abort();
510 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I32, reg, name);
511 return MAKE_TCGV_I32(idx);
514 TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name)
516 TCGContext *s = &tcg_ctx;
517 int idx;
519 if (tcg_regset_test_reg(s->reserved_regs, reg)) {
520 tcg_abort();
522 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I64, reg, name);
523 return MAKE_TCGV_I64(idx);
526 int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
527 intptr_t offset, const char *name)
529 TCGContext *s = &tcg_ctx;
530 TCGTemp *base_ts = &s->temps[GET_TCGV_PTR(base)];
531 TCGTemp *ts = tcg_global_alloc(s);
532 int indirect_reg = 0, bigendian = 0;
533 #ifdef HOST_WORDS_BIGENDIAN
534 bigendian = 1;
535 #endif
537 if (!base_ts->fixed_reg) {
538 /* We do not support double-indirect registers. */
539 tcg_debug_assert(!base_ts->indirect_reg);
540 base_ts->indirect_base = 1;
541 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
542 ? 2 : 1);
543 indirect_reg = 1;
546 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
547 TCGTemp *ts2 = tcg_global_alloc(s);
548 char buf[64];
550 ts->base_type = TCG_TYPE_I64;
551 ts->type = TCG_TYPE_I32;
552 ts->indirect_reg = indirect_reg;
553 ts->mem_allocated = 1;
554 ts->mem_base = base_ts;
555 ts->mem_offset = offset + bigendian * 4;
556 pstrcpy(buf, sizeof(buf), name);
557 pstrcat(buf, sizeof(buf), "_0");
558 ts->name = strdup(buf);
560 tcg_debug_assert(ts2 == ts + 1);
561 ts2->base_type = TCG_TYPE_I64;
562 ts2->type = TCG_TYPE_I32;
563 ts2->indirect_reg = indirect_reg;
564 ts2->mem_allocated = 1;
565 ts2->mem_base = base_ts;
566 ts2->mem_offset = offset + (1 - bigendian) * 4;
567 pstrcpy(buf, sizeof(buf), name);
568 pstrcat(buf, sizeof(buf), "_1");
569 ts2->name = strdup(buf);
570 } else {
571 ts->base_type = type;
572 ts->type = type;
573 ts->indirect_reg = indirect_reg;
574 ts->mem_allocated = 1;
575 ts->mem_base = base_ts;
576 ts->mem_offset = offset;
577 ts->name = name;
579 return temp_idx(s, ts);
582 static int tcg_temp_new_internal(TCGType type, int temp_local)
584 TCGContext *s = &tcg_ctx;
585 TCGTemp *ts;
586 int idx, k;
588 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
589 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
590 if (idx < TCG_MAX_TEMPS) {
591 /* There is already an available temp with the right type. */
592 clear_bit(idx, s->free_temps[k].l);
594 ts = &s->temps[idx];
595 ts->temp_allocated = 1;
596 tcg_debug_assert(ts->base_type == type);
597 tcg_debug_assert(ts->temp_local == temp_local);
598 } else {
599 ts = tcg_temp_alloc(s);
600 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
601 TCGTemp *ts2 = tcg_temp_alloc(s);
603 ts->base_type = type;
604 ts->type = TCG_TYPE_I32;
605 ts->temp_allocated = 1;
606 ts->temp_local = temp_local;
608 tcg_debug_assert(ts2 == ts + 1);
609 ts2->base_type = TCG_TYPE_I64;
610 ts2->type = TCG_TYPE_I32;
611 ts2->temp_allocated = 1;
612 ts2->temp_local = temp_local;
613 } else {
614 ts->base_type = type;
615 ts->type = type;
616 ts->temp_allocated = 1;
617 ts->temp_local = temp_local;
619 idx = temp_idx(s, ts);
622 #if defined(CONFIG_DEBUG_TCG)
623 s->temps_in_use++;
624 #endif
625 return idx;
628 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
630 int idx;
632 idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
633 return MAKE_TCGV_I32(idx);
636 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
638 int idx;
640 idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
641 return MAKE_TCGV_I64(idx);
644 static void tcg_temp_free_internal(int idx)
646 TCGContext *s = &tcg_ctx;
647 TCGTemp *ts;
648 int k;
650 #if defined(CONFIG_DEBUG_TCG)
651 s->temps_in_use--;
652 if (s->temps_in_use < 0) {
653 fprintf(stderr, "More temporaries freed than allocated!\n");
655 #endif
657 tcg_debug_assert(idx >= s->nb_globals && idx < s->nb_temps);
658 ts = &s->temps[idx];
659 tcg_debug_assert(ts->temp_allocated != 0);
660 ts->temp_allocated = 0;
662 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
663 set_bit(idx, s->free_temps[k].l);
666 void tcg_temp_free_i32(TCGv_i32 arg)
668 tcg_temp_free_internal(GET_TCGV_I32(arg));
671 void tcg_temp_free_i64(TCGv_i64 arg)
673 tcg_temp_free_internal(GET_TCGV_I64(arg));
676 TCGv_i32 tcg_const_i32(int32_t val)
678 TCGv_i32 t0;
679 t0 = tcg_temp_new_i32();
680 tcg_gen_movi_i32(t0, val);
681 return t0;
684 TCGv_i64 tcg_const_i64(int64_t val)
686 TCGv_i64 t0;
687 t0 = tcg_temp_new_i64();
688 tcg_gen_movi_i64(t0, val);
689 return t0;
692 TCGv_i32 tcg_const_local_i32(int32_t val)
694 TCGv_i32 t0;
695 t0 = tcg_temp_local_new_i32();
696 tcg_gen_movi_i32(t0, val);
697 return t0;
700 TCGv_i64 tcg_const_local_i64(int64_t val)
702 TCGv_i64 t0;
703 t0 = tcg_temp_local_new_i64();
704 tcg_gen_movi_i64(t0, val);
705 return t0;
708 #if defined(CONFIG_DEBUG_TCG)
709 void tcg_clear_temp_count(void)
711 TCGContext *s = &tcg_ctx;
712 s->temps_in_use = 0;
715 int tcg_check_temp_count(void)
717 TCGContext *s = &tcg_ctx;
718 if (s->temps_in_use) {
719 /* Clear the count so that we don't give another
720 * warning immediately next time around.
722 s->temps_in_use = 0;
723 return 1;
725 return 0;
727 #endif
729 /* Note: we convert the 64 bit args to 32 bit and do some alignment
730 and endian swap. Maybe it would be better to do the alignment
731 and endian swap in tcg_reg_alloc_call(). */
732 void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
733 int nargs, TCGArg *args)
735 int i, real_args, nb_rets, pi, pi_first;
736 unsigned sizemask, flags;
737 TCGHelperInfo *info;
739 info = g_hash_table_lookup(s->helpers, (gpointer)func);
740 flags = info->flags;
741 sizemask = info->sizemask;
743 #if defined(__sparc__) && !defined(__arch64__) \
744 && !defined(CONFIG_TCG_INTERPRETER)
745 /* We have 64-bit values in one register, but need to pass as two
746 separate parameters. Split them. */
747 int orig_sizemask = sizemask;
748 int orig_nargs = nargs;
749 TCGv_i64 retl, reth;
751 TCGV_UNUSED_I64(retl);
752 TCGV_UNUSED_I64(reth);
753 if (sizemask != 0) {
754 TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2);
755 for (i = real_args = 0; i < nargs; ++i) {
756 int is_64bit = sizemask & (1 << (i+1)*2);
757 if (is_64bit) {
758 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
759 TCGv_i32 h = tcg_temp_new_i32();
760 TCGv_i32 l = tcg_temp_new_i32();
761 tcg_gen_extr_i64_i32(l, h, orig);
762 split_args[real_args++] = GET_TCGV_I32(h);
763 split_args[real_args++] = GET_TCGV_I32(l);
764 } else {
765 split_args[real_args++] = args[i];
768 nargs = real_args;
769 args = split_args;
770 sizemask = 0;
772 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
773 for (i = 0; i < nargs; ++i) {
774 int is_64bit = sizemask & (1 << (i+1)*2);
775 int is_signed = sizemask & (2 << (i+1)*2);
776 if (!is_64bit) {
777 TCGv_i64 temp = tcg_temp_new_i64();
778 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
779 if (is_signed) {
780 tcg_gen_ext32s_i64(temp, orig);
781 } else {
782 tcg_gen_ext32u_i64(temp, orig);
784 args[i] = GET_TCGV_I64(temp);
787 #endif /* TCG_TARGET_EXTEND_ARGS */
789 pi_first = pi = s->gen_next_parm_idx;
790 if (ret != TCG_CALL_DUMMY_ARG) {
791 #if defined(__sparc__) && !defined(__arch64__) \
792 && !defined(CONFIG_TCG_INTERPRETER)
793 if (orig_sizemask & 1) {
794 /* The 32-bit ABI is going to return the 64-bit value in
795 the %o0/%o1 register pair. Prepare for this by using
796 two return temporaries, and reassemble below. */
797 retl = tcg_temp_new_i64();
798 reth = tcg_temp_new_i64();
799 s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth);
800 s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl);
801 nb_rets = 2;
802 } else {
803 s->gen_opparam_buf[pi++] = ret;
804 nb_rets = 1;
806 #else
807 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
808 #ifdef HOST_WORDS_BIGENDIAN
809 s->gen_opparam_buf[pi++] = ret + 1;
810 s->gen_opparam_buf[pi++] = ret;
811 #else
812 s->gen_opparam_buf[pi++] = ret;
813 s->gen_opparam_buf[pi++] = ret + 1;
814 #endif
815 nb_rets = 2;
816 } else {
817 s->gen_opparam_buf[pi++] = ret;
818 nb_rets = 1;
820 #endif
821 } else {
822 nb_rets = 0;
824 real_args = 0;
825 for (i = 0; i < nargs; i++) {
826 int is_64bit = sizemask & (1 << (i+1)*2);
827 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
828 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
829 /* some targets want aligned 64 bit args */
830 if (real_args & 1) {
831 s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG;
832 real_args++;
834 #endif
835 /* If stack grows up, then we will be placing successive
836 arguments at lower addresses, which means we need to
837 reverse the order compared to how we would normally
838 treat either big or little-endian. For those arguments
839 that will wind up in registers, this still works for
840 HPPA (the only current STACK_GROWSUP target) since the
841 argument registers are *also* allocated in decreasing
842 order. If another such target is added, this logic may
843 have to get more complicated to differentiate between
844 stack arguments and register arguments. */
845 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
846 s->gen_opparam_buf[pi++] = args[i] + 1;
847 s->gen_opparam_buf[pi++] = args[i];
848 #else
849 s->gen_opparam_buf[pi++] = args[i];
850 s->gen_opparam_buf[pi++] = args[i] + 1;
851 #endif
852 real_args += 2;
853 continue;
856 s->gen_opparam_buf[pi++] = args[i];
857 real_args++;
859 s->gen_opparam_buf[pi++] = (uintptr_t)func;
860 s->gen_opparam_buf[pi++] = flags;
862 i = s->gen_next_op_idx;
863 tcg_debug_assert(i < OPC_BUF_SIZE);
864 tcg_debug_assert(pi <= OPPARAM_BUF_SIZE);
866 /* Set links for sequential allocation during translation. */
867 s->gen_op_buf[i] = (TCGOp){
868 .opc = INDEX_op_call,
869 .callo = nb_rets,
870 .calli = real_args,
871 .args = pi_first,
872 .prev = i - 1,
873 .next = i + 1
876 /* Make sure the calli field didn't overflow. */
877 tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
879 s->gen_op_buf[0].prev = i;
880 s->gen_next_op_idx = i + 1;
881 s->gen_next_parm_idx = pi;
883 #if defined(__sparc__) && !defined(__arch64__) \
884 && !defined(CONFIG_TCG_INTERPRETER)
885 /* Free all of the parts we allocated above. */
886 for (i = real_args = 0; i < orig_nargs; ++i) {
887 int is_64bit = orig_sizemask & (1 << (i+1)*2);
888 if (is_64bit) {
889 TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]);
890 TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]);
891 tcg_temp_free_i32(h);
892 tcg_temp_free_i32(l);
893 } else {
894 real_args++;
897 if (orig_sizemask & 1) {
898 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
899 Note that describing these as TCGv_i64 eliminates an unnecessary
900 zero-extension that tcg_gen_concat_i32_i64 would create. */
901 tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth);
902 tcg_temp_free_i64(retl);
903 tcg_temp_free_i64(reth);
905 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
906 for (i = 0; i < nargs; ++i) {
907 int is_64bit = sizemask & (1 << (i+1)*2);
908 if (!is_64bit) {
909 TCGv_i64 temp = MAKE_TCGV_I64(args[i]);
910 tcg_temp_free_i64(temp);
913 #endif /* TCG_TARGET_EXTEND_ARGS */
916 static void tcg_reg_alloc_start(TCGContext *s)
918 int i;
919 TCGTemp *ts;
920 for(i = 0; i < s->nb_globals; i++) {
921 ts = &s->temps[i];
922 if (ts->fixed_reg) {
923 ts->val_type = TEMP_VAL_REG;
924 } else {
925 ts->val_type = TEMP_VAL_MEM;
928 for(i = s->nb_globals; i < s->nb_temps; i++) {
929 ts = &s->temps[i];
930 if (ts->temp_local) {
931 ts->val_type = TEMP_VAL_MEM;
932 } else {
933 ts->val_type = TEMP_VAL_DEAD;
935 ts->mem_allocated = 0;
936 ts->fixed_reg = 0;
939 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
942 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
943 TCGTemp *ts)
945 int idx = temp_idx(s, ts);
947 if (idx < s->nb_globals) {
948 pstrcpy(buf, buf_size, ts->name);
949 } else if (ts->temp_local) {
950 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
951 } else {
952 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
954 return buf;
957 static char *tcg_get_arg_str_idx(TCGContext *s, char *buf,
958 int buf_size, int idx)
960 tcg_debug_assert(idx >= 0 && idx < s->nb_temps);
961 return tcg_get_arg_str_ptr(s, buf, buf_size, &s->temps[idx]);
964 /* Find helper name. */
965 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
967 const char *ret = NULL;
968 if (s->helpers) {
969 TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val);
970 if (info) {
971 ret = info->name;
974 return ret;
977 static const char * const cond_name[] =
979 [TCG_COND_NEVER] = "never",
980 [TCG_COND_ALWAYS] = "always",
981 [TCG_COND_EQ] = "eq",
982 [TCG_COND_NE] = "ne",
983 [TCG_COND_LT] = "lt",
984 [TCG_COND_GE] = "ge",
985 [TCG_COND_LE] = "le",
986 [TCG_COND_GT] = "gt",
987 [TCG_COND_LTU] = "ltu",
988 [TCG_COND_GEU] = "geu",
989 [TCG_COND_LEU] = "leu",
990 [TCG_COND_GTU] = "gtu"
993 static const char * const ldst_name[] =
995 [MO_UB] = "ub",
996 [MO_SB] = "sb",
997 [MO_LEUW] = "leuw",
998 [MO_LESW] = "lesw",
999 [MO_LEUL] = "leul",
1000 [MO_LESL] = "lesl",
1001 [MO_LEQ] = "leq",
1002 [MO_BEUW] = "beuw",
1003 [MO_BESW] = "besw",
1004 [MO_BEUL] = "beul",
1005 [MO_BESL] = "besl",
1006 [MO_BEQ] = "beq",
1009 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1010 #ifdef ALIGNED_ONLY
1011 [MO_UNALN >> MO_ASHIFT] = "un+",
1012 [MO_ALIGN >> MO_ASHIFT] = "",
1013 #else
1014 [MO_UNALN >> MO_ASHIFT] = "",
1015 [MO_ALIGN >> MO_ASHIFT] = "al+",
1016 #endif
1017 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
1018 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
1019 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
1020 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1021 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1022 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1025 void tcg_dump_ops(TCGContext *s)
1027 char buf[128];
1028 TCGOp *op;
1029 int oi;
1031 for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) {
1032 int i, k, nb_oargs, nb_iargs, nb_cargs;
1033 const TCGOpDef *def;
1034 const TCGArg *args;
1035 TCGOpcode c;
1036 int col = 0;
1038 op = &s->gen_op_buf[oi];
1039 c = op->opc;
1040 def = &tcg_op_defs[c];
1041 args = &s->gen_opparam_buf[op->args];
1043 if (c == INDEX_op_insn_start) {
1044 col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : "");
1046 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1047 target_ulong a;
1048 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1049 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
1050 #else
1051 a = args[i];
1052 #endif
1053 col += qemu_log(" " TARGET_FMT_lx, a);
1055 } else if (c == INDEX_op_call) {
1056 /* variable number of arguments */
1057 nb_oargs = op->callo;
1058 nb_iargs = op->calli;
1059 nb_cargs = def->nb_cargs;
1061 /* function name, flags, out args */
1062 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1063 tcg_find_helper(s, args[nb_oargs + nb_iargs]),
1064 args[nb_oargs + nb_iargs + 1], nb_oargs);
1065 for (i = 0; i < nb_oargs; i++) {
1066 col += qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1067 args[i]));
1069 for (i = 0; i < nb_iargs; i++) {
1070 TCGArg arg = args[nb_oargs + i];
1071 const char *t = "<dummy>";
1072 if (arg != TCG_CALL_DUMMY_ARG) {
1073 t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
1075 col += qemu_log(",%s", t);
1077 } else {
1078 col += qemu_log(" %s ", def->name);
1080 nb_oargs = def->nb_oargs;
1081 nb_iargs = def->nb_iargs;
1082 nb_cargs = def->nb_cargs;
1084 k = 0;
1085 for (i = 0; i < nb_oargs; i++) {
1086 if (k != 0) {
1087 col += qemu_log(",");
1089 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1090 args[k++]));
1092 for (i = 0; i < nb_iargs; i++) {
1093 if (k != 0) {
1094 col += qemu_log(",");
1096 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1097 args[k++]));
1099 switch (c) {
1100 case INDEX_op_brcond_i32:
1101 case INDEX_op_setcond_i32:
1102 case INDEX_op_movcond_i32:
1103 case INDEX_op_brcond2_i32:
1104 case INDEX_op_setcond2_i32:
1105 case INDEX_op_brcond_i64:
1106 case INDEX_op_setcond_i64:
1107 case INDEX_op_movcond_i64:
1108 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
1109 col += qemu_log(",%s", cond_name[args[k++]]);
1110 } else {
1111 col += qemu_log(",$0x%" TCG_PRIlx, args[k++]);
1113 i = 1;
1114 break;
1115 case INDEX_op_qemu_ld_i32:
1116 case INDEX_op_qemu_st_i32:
1117 case INDEX_op_qemu_ld_i64:
1118 case INDEX_op_qemu_st_i64:
1120 TCGMemOpIdx oi = args[k++];
1121 TCGMemOp op = get_memop(oi);
1122 unsigned ix = get_mmuidx(oi);
1124 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1125 col += qemu_log(",$0x%x,%u", op, ix);
1126 } else {
1127 const char *s_al, *s_op;
1128 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1129 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1130 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1132 i = 1;
1134 break;
1135 default:
1136 i = 0;
1137 break;
1139 switch (c) {
1140 case INDEX_op_set_label:
1141 case INDEX_op_br:
1142 case INDEX_op_brcond_i32:
1143 case INDEX_op_brcond_i64:
1144 case INDEX_op_brcond2_i32:
1145 col += qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
1146 i++, k++;
1147 break;
1148 default:
1149 break;
1151 for (; i < nb_cargs; i++, k++) {
1152 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
1155 if (op->life) {
1156 unsigned life = op->life;
1158 for (; col < 48; ++col) {
1159 putc(' ', qemu_logfile);
1162 if (life & (SYNC_ARG * 3)) {
1163 qemu_log(" sync:");
1164 for (i = 0; i < 2; ++i) {
1165 if (life & (SYNC_ARG << i)) {
1166 qemu_log(" %d", i);
1170 life /= DEAD_ARG;
1171 if (life) {
1172 qemu_log(" dead:");
1173 for (i = 0; life; ++i, life >>= 1) {
1174 if (life & 1) {
1175 qemu_log(" %d", i);
1180 qemu_log("\n");
1184 /* we give more priority to constraints with less registers */
1185 static int get_constraint_priority(const TCGOpDef *def, int k)
1187 const TCGArgConstraint *arg_ct;
1189 int i, n;
1190 arg_ct = &def->args_ct[k];
1191 if (arg_ct->ct & TCG_CT_ALIAS) {
1192 /* an alias is equivalent to a single register */
1193 n = 1;
1194 } else {
1195 if (!(arg_ct->ct & TCG_CT_REG))
1196 return 0;
1197 n = 0;
1198 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1199 if (tcg_regset_test_reg(arg_ct->u.regs, i))
1200 n++;
1203 return TCG_TARGET_NB_REGS - n + 1;
1206 /* sort from highest priority to lowest */
1207 static void sort_constraints(TCGOpDef *def, int start, int n)
1209 int i, j, p1, p2, tmp;
1211 for(i = 0; i < n; i++)
1212 def->sorted_args[start + i] = start + i;
1213 if (n <= 1)
1214 return;
1215 for(i = 0; i < n - 1; i++) {
1216 for(j = i + 1; j < n; j++) {
1217 p1 = get_constraint_priority(def, def->sorted_args[start + i]);
1218 p2 = get_constraint_priority(def, def->sorted_args[start + j]);
1219 if (p1 < p2) {
1220 tmp = def->sorted_args[start + i];
1221 def->sorted_args[start + i] = def->sorted_args[start + j];
1222 def->sorted_args[start + j] = tmp;
1228 static void process_op_defs(TCGContext *s)
1230 TCGOpcode op;
1232 for (op = 0; op < NB_OPS; op++) {
1233 TCGOpDef *def = &tcg_op_defs[op];
1234 const TCGTargetOpDef *tdefs;
1235 TCGType type;
1236 int i, nb_args;
1238 if (def->flags & TCG_OPF_NOT_PRESENT) {
1239 continue;
1242 nb_args = def->nb_iargs + def->nb_oargs;
1243 if (nb_args == 0) {
1244 continue;
1247 tdefs = tcg_target_op_def(op);
1248 /* Missing TCGTargetOpDef entry. */
1249 tcg_debug_assert(tdefs != NULL);
1251 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
1252 for (i = 0; i < nb_args; i++) {
1253 const char *ct_str = tdefs->args_ct_str[i];
1254 /* Incomplete TCGTargetOpDef entry. */
1255 tcg_debug_assert(ct_str != NULL);
1257 tcg_regset_clear(def->args_ct[i].u.regs);
1258 def->args_ct[i].ct = 0;
1259 while (*ct_str != '\0') {
1260 switch(*ct_str) {
1261 case '0' ... '9':
1263 int oarg = *ct_str - '0';
1264 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
1265 tcg_debug_assert(oarg < def->nb_oargs);
1266 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
1267 /* TCG_CT_ALIAS is for the output arguments.
1268 The input is tagged with TCG_CT_IALIAS. */
1269 def->args_ct[i] = def->args_ct[oarg];
1270 def->args_ct[oarg].ct |= TCG_CT_ALIAS;
1271 def->args_ct[oarg].alias_index = i;
1272 def->args_ct[i].ct |= TCG_CT_IALIAS;
1273 def->args_ct[i].alias_index = oarg;
1275 ct_str++;
1276 break;
1277 case '&':
1278 def->args_ct[i].ct |= TCG_CT_NEWREG;
1279 ct_str++;
1280 break;
1281 case 'i':
1282 def->args_ct[i].ct |= TCG_CT_CONST;
1283 ct_str++;
1284 break;
1285 default:
1286 ct_str = target_parse_constraint(&def->args_ct[i],
1287 ct_str, type);
1288 /* Typo in TCGTargetOpDef constraint. */
1289 tcg_debug_assert(ct_str != NULL);
1294 /* TCGTargetOpDef entry with too much information? */
1295 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
1297 /* sort the constraints (XXX: this is just an heuristic) */
1298 sort_constraints(def, 0, def->nb_oargs);
1299 sort_constraints(def, def->nb_oargs, def->nb_iargs);
1303 void tcg_op_remove(TCGContext *s, TCGOp *op)
1305 int next = op->next;
1306 int prev = op->prev;
1308 /* We should never attempt to remove the list terminator. */
1309 tcg_debug_assert(op != &s->gen_op_buf[0]);
1311 s->gen_op_buf[next].prev = prev;
1312 s->gen_op_buf[prev].next = next;
1314 memset(op, 0, sizeof(*op));
1316 #ifdef CONFIG_PROFILER
1317 s->del_op_count++;
1318 #endif
1321 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
1322 TCGOpcode opc, int nargs)
1324 int oi = s->gen_next_op_idx;
1325 int pi = s->gen_next_parm_idx;
1326 int prev = old_op->prev;
1327 int next = old_op - s->gen_op_buf;
1328 TCGOp *new_op;
1330 tcg_debug_assert(oi < OPC_BUF_SIZE);
1331 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
1332 s->gen_next_op_idx = oi + 1;
1333 s->gen_next_parm_idx = pi + nargs;
1335 new_op = &s->gen_op_buf[oi];
1336 *new_op = (TCGOp){
1337 .opc = opc,
1338 .args = pi,
1339 .prev = prev,
1340 .next = next
1342 s->gen_op_buf[prev].next = oi;
1343 old_op->prev = oi;
1345 return new_op;
1348 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
1349 TCGOpcode opc, int nargs)
1351 int oi = s->gen_next_op_idx;
1352 int pi = s->gen_next_parm_idx;
1353 int prev = old_op - s->gen_op_buf;
1354 int next = old_op->next;
1355 TCGOp *new_op;
1357 tcg_debug_assert(oi < OPC_BUF_SIZE);
1358 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
1359 s->gen_next_op_idx = oi + 1;
1360 s->gen_next_parm_idx = pi + nargs;
1362 new_op = &s->gen_op_buf[oi];
1363 *new_op = (TCGOp){
1364 .opc = opc,
1365 .args = pi,
1366 .prev = prev,
1367 .next = next
1369 s->gen_op_buf[next].prev = oi;
1370 old_op->next = oi;
1372 return new_op;
1375 #define TS_DEAD 1
1376 #define TS_MEM 2
1378 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
1379 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
1381 /* liveness analysis: end of function: all temps are dead, and globals
1382 should be in memory. */
1383 static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
1385 memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
1386 memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
1389 /* liveness analysis: end of basic block: all temps are dead, globals
1390 and local temps should be in memory. */
1391 static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
1393 int i, n;
1395 tcg_la_func_end(s, temp_state);
1396 for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
1397 if (s->temps[i].temp_local) {
1398 temp_state[i] |= TS_MEM;
1403 /* Liveness analysis : update the opc_arg_life array to tell if a
1404 given input arguments is dead. Instructions updating dead
1405 temporaries are removed. */
1406 static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
1408 int nb_globals = s->nb_globals;
1409 int oi, oi_prev;
1411 tcg_la_func_end(s, temp_state);
1413 for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
1414 int i, nb_iargs, nb_oargs;
1415 TCGOpcode opc_new, opc_new2;
1416 bool have_opc_new2;
1417 TCGLifeData arg_life = 0;
1418 TCGArg arg;
1420 TCGOp * const op = &s->gen_op_buf[oi];
1421 TCGArg * const args = &s->gen_opparam_buf[op->args];
1422 TCGOpcode opc = op->opc;
1423 const TCGOpDef *def = &tcg_op_defs[opc];
1425 oi_prev = op->prev;
1427 switch (opc) {
1428 case INDEX_op_call:
1430 int call_flags;
1432 nb_oargs = op->callo;
1433 nb_iargs = op->calli;
1434 call_flags = args[nb_oargs + nb_iargs + 1];
1436 /* pure functions can be removed if their result is unused */
1437 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
1438 for (i = 0; i < nb_oargs; i++) {
1439 arg = args[i];
1440 if (temp_state[arg] != TS_DEAD) {
1441 goto do_not_remove_call;
1444 goto do_remove;
1445 } else {
1446 do_not_remove_call:
1448 /* output args are dead */
1449 for (i = 0; i < nb_oargs; i++) {
1450 arg = args[i];
1451 if (temp_state[arg] & TS_DEAD) {
1452 arg_life |= DEAD_ARG << i;
1454 if (temp_state[arg] & TS_MEM) {
1455 arg_life |= SYNC_ARG << i;
1457 temp_state[arg] = TS_DEAD;
1460 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
1461 TCG_CALL_NO_READ_GLOBALS))) {
1462 /* globals should go back to memory */
1463 memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
1464 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
1465 /* globals should be synced to memory */
1466 for (i = 0; i < nb_globals; i++) {
1467 temp_state[i] |= TS_MEM;
1471 /* record arguments that die in this helper */
1472 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1473 arg = args[i];
1474 if (arg != TCG_CALL_DUMMY_ARG) {
1475 if (temp_state[arg] & TS_DEAD) {
1476 arg_life |= DEAD_ARG << i;
1480 /* input arguments are live for preceding opcodes */
1481 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1482 arg = args[i];
1483 if (arg != TCG_CALL_DUMMY_ARG) {
1484 temp_state[arg] &= ~TS_DEAD;
1489 break;
1490 case INDEX_op_insn_start:
1491 break;
1492 case INDEX_op_discard:
1493 /* mark the temporary as dead */
1494 temp_state[args[0]] = TS_DEAD;
1495 break;
1497 case INDEX_op_add2_i32:
1498 opc_new = INDEX_op_add_i32;
1499 goto do_addsub2;
1500 case INDEX_op_sub2_i32:
1501 opc_new = INDEX_op_sub_i32;
1502 goto do_addsub2;
1503 case INDEX_op_add2_i64:
1504 opc_new = INDEX_op_add_i64;
1505 goto do_addsub2;
1506 case INDEX_op_sub2_i64:
1507 opc_new = INDEX_op_sub_i64;
1508 do_addsub2:
1509 nb_iargs = 4;
1510 nb_oargs = 2;
1511 /* Test if the high part of the operation is dead, but not
1512 the low part. The result can be optimized to a simple
1513 add or sub. This happens often for x86_64 guest when the
1514 cpu mode is set to 32 bit. */
1515 if (temp_state[args[1]] == TS_DEAD) {
1516 if (temp_state[args[0]] == TS_DEAD) {
1517 goto do_remove;
1519 /* Replace the opcode and adjust the args in place,
1520 leaving 3 unused args at the end. */
1521 op->opc = opc = opc_new;
1522 args[1] = args[2];
1523 args[2] = args[4];
1524 /* Fall through and mark the single-word operation live. */
1525 nb_iargs = 2;
1526 nb_oargs = 1;
1528 goto do_not_remove;
1530 case INDEX_op_mulu2_i32:
1531 opc_new = INDEX_op_mul_i32;
1532 opc_new2 = INDEX_op_muluh_i32;
1533 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
1534 goto do_mul2;
1535 case INDEX_op_muls2_i32:
1536 opc_new = INDEX_op_mul_i32;
1537 opc_new2 = INDEX_op_mulsh_i32;
1538 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
1539 goto do_mul2;
1540 case INDEX_op_mulu2_i64:
1541 opc_new = INDEX_op_mul_i64;
1542 opc_new2 = INDEX_op_muluh_i64;
1543 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
1544 goto do_mul2;
1545 case INDEX_op_muls2_i64:
1546 opc_new = INDEX_op_mul_i64;
1547 opc_new2 = INDEX_op_mulsh_i64;
1548 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
1549 goto do_mul2;
1550 do_mul2:
1551 nb_iargs = 2;
1552 nb_oargs = 2;
1553 if (temp_state[args[1]] == TS_DEAD) {
1554 if (temp_state[args[0]] == TS_DEAD) {
1555 /* Both parts of the operation are dead. */
1556 goto do_remove;
1558 /* The high part of the operation is dead; generate the low. */
1559 op->opc = opc = opc_new;
1560 args[1] = args[2];
1561 args[2] = args[3];
1562 } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) {
1563 /* The low part of the operation is dead; generate the high. */
1564 op->opc = opc = opc_new2;
1565 args[0] = args[1];
1566 args[1] = args[2];
1567 args[2] = args[3];
1568 } else {
1569 goto do_not_remove;
1571 /* Mark the single-word operation live. */
1572 nb_oargs = 1;
1573 goto do_not_remove;
1575 default:
1576 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
1577 nb_iargs = def->nb_iargs;
1578 nb_oargs = def->nb_oargs;
1580 /* Test if the operation can be removed because all
1581 its outputs are dead. We assume that nb_oargs == 0
1582 implies side effects */
1583 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
1584 for (i = 0; i < nb_oargs; i++) {
1585 if (temp_state[args[i]] != TS_DEAD) {
1586 goto do_not_remove;
1589 do_remove:
1590 tcg_op_remove(s, op);
1591 } else {
1592 do_not_remove:
1593 /* output args are dead */
1594 for (i = 0; i < nb_oargs; i++) {
1595 arg = args[i];
1596 if (temp_state[arg] & TS_DEAD) {
1597 arg_life |= DEAD_ARG << i;
1599 if (temp_state[arg] & TS_MEM) {
1600 arg_life |= SYNC_ARG << i;
1602 temp_state[arg] = TS_DEAD;
1605 /* if end of basic block, update */
1606 if (def->flags & TCG_OPF_BB_END) {
1607 tcg_la_bb_end(s, temp_state);
1608 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
1609 /* globals should be synced to memory */
1610 for (i = 0; i < nb_globals; i++) {
1611 temp_state[i] |= TS_MEM;
1615 /* record arguments that die in this opcode */
1616 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1617 arg = args[i];
1618 if (temp_state[arg] & TS_DEAD) {
1619 arg_life |= DEAD_ARG << i;
1622 /* input arguments are live for preceding opcodes */
1623 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1624 temp_state[args[i]] &= ~TS_DEAD;
1627 break;
1629 op->life = arg_life;
1633 /* Liveness analysis: Convert indirect regs to direct temporaries. */
1634 static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
1636 int nb_globals = s->nb_globals;
1637 int16_t *dir_temps;
1638 int i, oi, oi_next;
1639 bool changes = false;
1641 dir_temps = tcg_malloc(nb_globals * sizeof(int16_t));
1642 memset(dir_temps, 0, nb_globals * sizeof(int16_t));
1644 /* Create a temporary for each indirect global. */
1645 for (i = 0; i < nb_globals; ++i) {
1646 TCGTemp *its = &s->temps[i];
1647 if (its->indirect_reg) {
1648 TCGTemp *dts = tcg_temp_alloc(s);
1649 dts->type = its->type;
1650 dts->base_type = its->base_type;
1651 dir_temps[i] = temp_idx(s, dts);
1655 memset(temp_state, TS_DEAD, nb_globals);
1657 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
1658 TCGOp *op = &s->gen_op_buf[oi];
1659 TCGArg *args = &s->gen_opparam_buf[op->args];
1660 TCGOpcode opc = op->opc;
1661 const TCGOpDef *def = &tcg_op_defs[opc];
1662 TCGLifeData arg_life = op->life;
1663 int nb_iargs, nb_oargs, call_flags;
1664 TCGArg arg, dir;
1666 oi_next = op->next;
1668 if (opc == INDEX_op_call) {
1669 nb_oargs = op->callo;
1670 nb_iargs = op->calli;
1671 call_flags = args[nb_oargs + nb_iargs + 1];
1672 } else {
1673 nb_iargs = def->nb_iargs;
1674 nb_oargs = def->nb_oargs;
1676 /* Set flags similar to how calls require. */
1677 if (def->flags & TCG_OPF_BB_END) {
1678 /* Like writing globals: save_globals */
1679 call_flags = 0;
1680 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
1681 /* Like reading globals: sync_globals */
1682 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
1683 } else {
1684 /* No effect on globals. */
1685 call_flags = (TCG_CALL_NO_READ_GLOBALS |
1686 TCG_CALL_NO_WRITE_GLOBALS);
1690 /* Make sure that input arguments are available. */
1691 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1692 arg = args[i];
1693 /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */
1694 if (arg < nb_globals) {
1695 dir = dir_temps[arg];
1696 if (dir != 0 && temp_state[arg] == TS_DEAD) {
1697 TCGTemp *its = &s->temps[arg];
1698 TCGOpcode lopc = (its->type == TCG_TYPE_I32
1699 ? INDEX_op_ld_i32
1700 : INDEX_op_ld_i64);
1701 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
1702 TCGArg *largs = &s->gen_opparam_buf[lop->args];
1704 largs[0] = dir;
1705 largs[1] = temp_idx(s, its->mem_base);
1706 largs[2] = its->mem_offset;
1708 /* Loaded, but synced with memory. */
1709 temp_state[arg] = TS_MEM;
1714 /* Perform input replacement, and mark inputs that became dead.
1715 No action is required except keeping temp_state up to date
1716 so that we reload when needed. */
1717 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1718 arg = args[i];
1719 if (arg < nb_globals) {
1720 dir = dir_temps[arg];
1721 if (dir != 0) {
1722 args[i] = dir;
1723 changes = true;
1724 if (IS_DEAD_ARG(i)) {
1725 temp_state[arg] = TS_DEAD;
1731 /* Liveness analysis should ensure that the following are
1732 all correct, for call sites and basic block end points. */
1733 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
1734 /* Nothing to do */
1735 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
1736 for (i = 0; i < nb_globals; ++i) {
1737 /* Liveness should see that globals are synced back,
1738 that is, either TS_DEAD or TS_MEM. */
1739 tcg_debug_assert(dir_temps[i] == 0
1740 || temp_state[i] != 0);
1742 } else {
1743 for (i = 0; i < nb_globals; ++i) {
1744 /* Liveness should see that globals are saved back,
1745 that is, TS_DEAD, waiting to be reloaded. */
1746 tcg_debug_assert(dir_temps[i] == 0
1747 || temp_state[i] == TS_DEAD);
1751 /* Outputs become available. */
1752 for (i = 0; i < nb_oargs; i++) {
1753 arg = args[i];
1754 if (arg >= nb_globals) {
1755 continue;
1757 dir = dir_temps[arg];
1758 if (dir == 0) {
1759 continue;
1761 args[i] = dir;
1762 changes = true;
1764 /* The output is now live and modified. */
1765 temp_state[arg] = 0;
1767 /* Sync outputs upon their last write. */
1768 if (NEED_SYNC_ARG(i)) {
1769 TCGTemp *its = &s->temps[arg];
1770 TCGOpcode sopc = (its->type == TCG_TYPE_I32
1771 ? INDEX_op_st_i32
1772 : INDEX_op_st_i64);
1773 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
1774 TCGArg *sargs = &s->gen_opparam_buf[sop->args];
1776 sargs[0] = dir;
1777 sargs[1] = temp_idx(s, its->mem_base);
1778 sargs[2] = its->mem_offset;
1780 temp_state[arg] = TS_MEM;
1782 /* Drop outputs that are dead. */
1783 if (IS_DEAD_ARG(i)) {
1784 temp_state[arg] = TS_DEAD;
1789 return changes;
1792 #ifdef CONFIG_DEBUG_TCG
1793 static void dump_regs(TCGContext *s)
1795 TCGTemp *ts;
1796 int i;
1797 char buf[64];
1799 for(i = 0; i < s->nb_temps; i++) {
1800 ts = &s->temps[i];
1801 printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i));
1802 switch(ts->val_type) {
1803 case TEMP_VAL_REG:
1804 printf("%s", tcg_target_reg_names[ts->reg]);
1805 break;
1806 case TEMP_VAL_MEM:
1807 printf("%d(%s)", (int)ts->mem_offset,
1808 tcg_target_reg_names[ts->mem_base->reg]);
1809 break;
1810 case TEMP_VAL_CONST:
1811 printf("$0x%" TCG_PRIlx, ts->val);
1812 break;
1813 case TEMP_VAL_DEAD:
1814 printf("D");
1815 break;
1816 default:
1817 printf("???");
1818 break;
1820 printf("\n");
1823 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1824 if (s->reg_to_temp[i] != NULL) {
1825 printf("%s: %s\n",
1826 tcg_target_reg_names[i],
1827 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
1832 static void check_regs(TCGContext *s)
1834 int reg;
1835 int k;
1836 TCGTemp *ts;
1837 char buf[64];
1839 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
1840 ts = s->reg_to_temp[reg];
1841 if (ts != NULL) {
1842 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
1843 printf("Inconsistency for register %s:\n",
1844 tcg_target_reg_names[reg]);
1845 goto fail;
1849 for (k = 0; k < s->nb_temps; k++) {
1850 ts = &s->temps[k];
1851 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
1852 && s->reg_to_temp[ts->reg] != ts) {
1853 printf("Inconsistency for temp %s:\n",
1854 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
1855 fail:
1856 printf("reg state:\n");
1857 dump_regs(s);
1858 tcg_abort();
1862 #endif
1864 static void temp_allocate_frame(TCGContext *s, int temp)
1866 TCGTemp *ts;
1867 ts = &s->temps[temp];
1868 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
1869 /* Sparc64 stack is accessed with offset of 2047 */
1870 s->current_frame_offset = (s->current_frame_offset +
1871 (tcg_target_long)sizeof(tcg_target_long) - 1) &
1872 ~(sizeof(tcg_target_long) - 1);
1873 #endif
1874 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
1875 s->frame_end) {
1876 tcg_abort();
1878 ts->mem_offset = s->current_frame_offset;
1879 ts->mem_base = s->frame_temp;
1880 ts->mem_allocated = 1;
1881 s->current_frame_offset += sizeof(tcg_target_long);
1884 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet);
1886 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
1887 mark it free; otherwise mark it dead. */
1888 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
1890 if (ts->fixed_reg) {
1891 return;
1893 if (ts->val_type == TEMP_VAL_REG) {
1894 s->reg_to_temp[ts->reg] = NULL;
1896 ts->val_type = (free_or_dead < 0
1897 || ts->temp_local
1898 || temp_idx(s, ts) < s->nb_globals
1899 ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1902 /* Mark a temporary as dead. */
1903 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
1905 temp_free_or_dead(s, ts, 1);
1908 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
1909 registers needs to be allocated to store a constant. If 'free_or_dead'
1910 is non-zero, subsequently release the temporary; if it is positive, the
1911 temp is dead; if it is negative, the temp is free. */
1912 static void temp_sync(TCGContext *s, TCGTemp *ts,
1913 TCGRegSet allocated_regs, int free_or_dead)
1915 if (ts->fixed_reg) {
1916 return;
1918 if (!ts->mem_coherent) {
1919 if (!ts->mem_allocated) {
1920 temp_allocate_frame(s, temp_idx(s, ts));
1922 switch (ts->val_type) {
1923 case TEMP_VAL_CONST:
1924 /* If we're going to free the temp immediately, then we won't
1925 require it later in a register, so attempt to store the
1926 constant to memory directly. */
1927 if (free_or_dead
1928 && tcg_out_sti(s, ts->type, ts->val,
1929 ts->mem_base->reg, ts->mem_offset)) {
1930 break;
1932 temp_load(s, ts, tcg_target_available_regs[ts->type],
1933 allocated_regs);
1934 /* fallthrough */
1936 case TEMP_VAL_REG:
1937 tcg_out_st(s, ts->type, ts->reg,
1938 ts->mem_base->reg, ts->mem_offset);
1939 break;
1941 case TEMP_VAL_MEM:
1942 break;
1944 case TEMP_VAL_DEAD:
1945 default:
1946 tcg_abort();
1948 ts->mem_coherent = 1;
1950 if (free_or_dead) {
1951 temp_free_or_dead(s, ts, free_or_dead);
1955 /* free register 'reg' by spilling the corresponding temporary if necessary */
1956 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
1958 TCGTemp *ts = s->reg_to_temp[reg];
1959 if (ts != NULL) {
1960 temp_sync(s, ts, allocated_regs, -1);
1964 /* Allocate a register belonging to reg1 & ~reg2 */
1965 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
1966 TCGRegSet allocated_regs, bool rev)
1968 int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
1969 const int *order;
1970 TCGReg reg;
1971 TCGRegSet reg_ct;
1973 tcg_regset_andnot(reg_ct, desired_regs, allocated_regs);
1974 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
1976 /* first try free registers */
1977 for(i = 0; i < n; i++) {
1978 reg = order[i];
1979 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
1980 return reg;
1983 /* XXX: do better spill choice */
1984 for(i = 0; i < n; i++) {
1985 reg = order[i];
1986 if (tcg_regset_test_reg(reg_ct, reg)) {
1987 tcg_reg_free(s, reg, allocated_regs);
1988 return reg;
1992 tcg_abort();
1995 /* Make sure the temporary is in a register. If needed, allocate the register
1996 from DESIRED while avoiding ALLOCATED. */
1997 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
1998 TCGRegSet allocated_regs)
2000 TCGReg reg;
2002 switch (ts->val_type) {
2003 case TEMP_VAL_REG:
2004 return;
2005 case TEMP_VAL_CONST:
2006 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2007 tcg_out_movi(s, ts->type, reg, ts->val);
2008 ts->mem_coherent = 0;
2009 break;
2010 case TEMP_VAL_MEM:
2011 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2012 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2013 ts->mem_coherent = 1;
2014 break;
2015 case TEMP_VAL_DEAD:
2016 default:
2017 tcg_abort();
2019 ts->reg = reg;
2020 ts->val_type = TEMP_VAL_REG;
2021 s->reg_to_temp[reg] = ts;
2024 /* Save a temporary to memory. 'allocated_regs' is used in case a
2025 temporary registers needs to be allocated to store a constant. */
2026 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
2028 /* The liveness analysis already ensures that globals are back
2029 in memory. Keep an tcg_debug_assert for safety. */
2030 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
2033 /* save globals to their canonical location and assume they can be
2034 modified be the following code. 'allocated_regs' is used in case a
2035 temporary registers needs to be allocated to store a constant. */
2036 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
2038 int i;
2040 for (i = 0; i < s->nb_globals; i++) {
2041 temp_save(s, &s->temps[i], allocated_regs);
2045 /* sync globals to their canonical location and assume they can be
2046 read by the following code. 'allocated_regs' is used in case a
2047 temporary registers needs to be allocated to store a constant. */
2048 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2050 int i;
2052 for (i = 0; i < s->nb_globals; i++) {
2053 TCGTemp *ts = &s->temps[i];
2054 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
2055 || ts->fixed_reg
2056 || ts->mem_coherent);
2060 /* at the end of a basic block, we assume all temporaries are dead and
2061 all globals are stored at their canonical location. */
2062 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2064 int i;
2066 for (i = s->nb_globals; i < s->nb_temps; i++) {
2067 TCGTemp *ts = &s->temps[i];
2068 if (ts->temp_local) {
2069 temp_save(s, ts, allocated_regs);
2070 } else {
2071 /* The liveness analysis already ensures that temps are dead.
2072 Keep an tcg_debug_assert for safety. */
2073 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
2077 save_globals(s, allocated_regs);
2080 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
2081 tcg_target_ulong val, TCGLifeData arg_life)
2083 if (ots->fixed_reg) {
2084 /* For fixed registers, we do not do any constant propagation. */
2085 tcg_out_movi(s, ots->type, ots->reg, val);
2086 return;
2089 /* The movi is not explicitly generated here. */
2090 if (ots->val_type == TEMP_VAL_REG) {
2091 s->reg_to_temp[ots->reg] = NULL;
2093 ots->val_type = TEMP_VAL_CONST;
2094 ots->val = val;
2095 ots->mem_coherent = 0;
2096 if (NEED_SYNC_ARG(0)) {
2097 temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0));
2098 } else if (IS_DEAD_ARG(0)) {
2099 temp_dead(s, ots);
2103 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
2104 TCGLifeData arg_life)
2106 TCGTemp *ots = &s->temps[args[0]];
2107 tcg_target_ulong val = args[1];
2109 tcg_reg_alloc_do_movi(s, ots, val, arg_life);
2112 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
2113 const TCGArg *args, TCGLifeData arg_life)
2115 TCGRegSet allocated_regs;
2116 TCGTemp *ts, *ots;
2117 TCGType otype, itype;
2119 tcg_regset_set(allocated_regs, s->reserved_regs);
2120 ots = &s->temps[args[0]];
2121 ts = &s->temps[args[1]];
2123 /* Note that otype != itype for no-op truncation. */
2124 otype = ots->type;
2125 itype = ts->type;
2127 if (ts->val_type == TEMP_VAL_CONST) {
2128 /* propagate constant or generate sti */
2129 tcg_target_ulong val = ts->val;
2130 if (IS_DEAD_ARG(1)) {
2131 temp_dead(s, ts);
2133 tcg_reg_alloc_do_movi(s, ots, val, arg_life);
2134 return;
2137 /* If the source value is in memory we're going to be forced
2138 to have it in a register in order to perform the copy. Copy
2139 the SOURCE value into its own register first, that way we
2140 don't have to reload SOURCE the next time it is used. */
2141 if (ts->val_type == TEMP_VAL_MEM) {
2142 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
2145 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
2146 if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2147 /* mov to a non-saved dead register makes no sense (even with
2148 liveness analysis disabled). */
2149 tcg_debug_assert(NEED_SYNC_ARG(0));
2150 if (!ots->mem_allocated) {
2151 temp_allocate_frame(s, args[0]);
2153 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
2154 if (IS_DEAD_ARG(1)) {
2155 temp_dead(s, ts);
2157 temp_dead(s, ots);
2158 } else {
2159 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
2160 /* the mov can be suppressed */
2161 if (ots->val_type == TEMP_VAL_REG) {
2162 s->reg_to_temp[ots->reg] = NULL;
2164 ots->reg = ts->reg;
2165 temp_dead(s, ts);
2166 } else {
2167 if (ots->val_type != TEMP_VAL_REG) {
2168 /* When allocating a new register, make sure to not spill the
2169 input one. */
2170 tcg_regset_set_reg(allocated_regs, ts->reg);
2171 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
2172 allocated_regs, ots->indirect_base);
2174 tcg_out_mov(s, otype, ots->reg, ts->reg);
2176 ots->val_type = TEMP_VAL_REG;
2177 ots->mem_coherent = 0;
2178 s->reg_to_temp[ots->reg] = ots;
2179 if (NEED_SYNC_ARG(0)) {
2180 temp_sync(s, ots, allocated_regs, 0);
2185 static void tcg_reg_alloc_op(TCGContext *s,
2186 const TCGOpDef *def, TCGOpcode opc,
2187 const TCGArg *args, TCGLifeData arg_life)
2189 TCGRegSet i_allocated_regs;
2190 TCGRegSet o_allocated_regs;
2191 int i, k, nb_iargs, nb_oargs;
2192 TCGReg reg;
2193 TCGArg arg;
2194 const TCGArgConstraint *arg_ct;
2195 TCGTemp *ts;
2196 TCGArg new_args[TCG_MAX_OP_ARGS];
2197 int const_args[TCG_MAX_OP_ARGS];
2199 nb_oargs = def->nb_oargs;
2200 nb_iargs = def->nb_iargs;
2202 /* copy constants */
2203 memcpy(new_args + nb_oargs + nb_iargs,
2204 args + nb_oargs + nb_iargs,
2205 sizeof(TCGArg) * def->nb_cargs);
2207 tcg_regset_set(i_allocated_regs, s->reserved_regs);
2208 tcg_regset_set(o_allocated_regs, s->reserved_regs);
2210 /* satisfy input constraints */
2211 for(k = 0; k < nb_iargs; k++) {
2212 i = def->sorted_args[nb_oargs + k];
2213 arg = args[i];
2214 arg_ct = &def->args_ct[i];
2215 ts = &s->temps[arg];
2217 if (ts->val_type == TEMP_VAL_CONST
2218 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
2219 /* constant is OK for instruction */
2220 const_args[i] = 1;
2221 new_args[i] = ts->val;
2222 goto iarg_end;
2225 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
2227 if (arg_ct->ct & TCG_CT_IALIAS) {
2228 if (ts->fixed_reg) {
2229 /* if fixed register, we must allocate a new register
2230 if the alias is not the same register */
2231 if (arg != args[arg_ct->alias_index])
2232 goto allocate_in_reg;
2233 } else {
2234 /* if the input is aliased to an output and if it is
2235 not dead after the instruction, we must allocate
2236 a new register and move it */
2237 if (!IS_DEAD_ARG(i)) {
2238 goto allocate_in_reg;
2240 /* check if the current register has already been allocated
2241 for another input aliased to an output */
2242 int k2, i2;
2243 for (k2 = 0 ; k2 < k ; k2++) {
2244 i2 = def->sorted_args[nb_oargs + k2];
2245 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
2246 (new_args[i2] == ts->reg)) {
2247 goto allocate_in_reg;
2252 reg = ts->reg;
2253 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2254 /* nothing to do : the constraint is satisfied */
2255 } else {
2256 allocate_in_reg:
2257 /* allocate a new register matching the constraint
2258 and move the temporary register into it */
2259 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
2260 ts->indirect_base);
2261 tcg_out_mov(s, ts->type, reg, ts->reg);
2263 new_args[i] = reg;
2264 const_args[i] = 0;
2265 tcg_regset_set_reg(i_allocated_regs, reg);
2266 iarg_end: ;
2269 /* mark dead temporaries and free the associated registers */
2270 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2271 if (IS_DEAD_ARG(i)) {
2272 temp_dead(s, &s->temps[args[i]]);
2276 if (def->flags & TCG_OPF_BB_END) {
2277 tcg_reg_alloc_bb_end(s, i_allocated_regs);
2278 } else {
2279 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2280 /* XXX: permit generic clobber register list ? */
2281 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2282 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2283 tcg_reg_free(s, i, i_allocated_regs);
2287 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2288 /* sync globals if the op has side effects and might trigger
2289 an exception. */
2290 sync_globals(s, i_allocated_regs);
2293 /* satisfy the output constraints */
2294 for(k = 0; k < nb_oargs; k++) {
2295 i = def->sorted_args[k];
2296 arg = args[i];
2297 arg_ct = &def->args_ct[i];
2298 ts = &s->temps[arg];
2299 if ((arg_ct->ct & TCG_CT_ALIAS)
2300 && !const_args[arg_ct->alias_index]) {
2301 reg = new_args[arg_ct->alias_index];
2302 } else if (arg_ct->ct & TCG_CT_NEWREG) {
2303 reg = tcg_reg_alloc(s, arg_ct->u.regs,
2304 i_allocated_regs | o_allocated_regs,
2305 ts->indirect_base);
2306 } else {
2307 /* if fixed register, we try to use it */
2308 reg = ts->reg;
2309 if (ts->fixed_reg &&
2310 tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2311 goto oarg_end;
2313 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
2314 ts->indirect_base);
2316 tcg_regset_set_reg(o_allocated_regs, reg);
2317 /* if a fixed register is used, then a move will be done afterwards */
2318 if (!ts->fixed_reg) {
2319 if (ts->val_type == TEMP_VAL_REG) {
2320 s->reg_to_temp[ts->reg] = NULL;
2322 ts->val_type = TEMP_VAL_REG;
2323 ts->reg = reg;
2324 /* temp value is modified, so the value kept in memory is
2325 potentially not the same */
2326 ts->mem_coherent = 0;
2327 s->reg_to_temp[reg] = ts;
2329 oarg_end:
2330 new_args[i] = reg;
2334 /* emit instruction */
2335 tcg_out_op(s, opc, new_args, const_args);
2337 /* move the outputs in the correct register if needed */
2338 for(i = 0; i < nb_oargs; i++) {
2339 ts = &s->temps[args[i]];
2340 reg = new_args[i];
2341 if (ts->fixed_reg && ts->reg != reg) {
2342 tcg_out_mov(s, ts->type, ts->reg, reg);
2344 if (NEED_SYNC_ARG(i)) {
2345 temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
2346 } else if (IS_DEAD_ARG(i)) {
2347 temp_dead(s, ts);
2352 #ifdef TCG_TARGET_STACK_GROWSUP
2353 #define STACK_DIR(x) (-(x))
2354 #else
2355 #define STACK_DIR(x) (x)
2356 #endif
2358 static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
2359 const TCGArg * const args, TCGLifeData arg_life)
2361 int flags, nb_regs, i;
2362 TCGReg reg;
2363 TCGArg arg;
2364 TCGTemp *ts;
2365 intptr_t stack_offset;
2366 size_t call_stack_size;
2367 tcg_insn_unit *func_addr;
2368 int allocate_args;
2369 TCGRegSet allocated_regs;
2371 func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
2372 flags = args[nb_oargs + nb_iargs + 1];
2374 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2375 if (nb_regs > nb_iargs) {
2376 nb_regs = nb_iargs;
2379 /* assign stack slots first */
2380 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
2381 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
2382 ~(TCG_TARGET_STACK_ALIGN - 1);
2383 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
2384 if (allocate_args) {
2385 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2386 preallocate call stack */
2387 tcg_abort();
2390 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
2391 for(i = nb_regs; i < nb_iargs; i++) {
2392 arg = args[nb_oargs + i];
2393 #ifdef TCG_TARGET_STACK_GROWSUP
2394 stack_offset -= sizeof(tcg_target_long);
2395 #endif
2396 if (arg != TCG_CALL_DUMMY_ARG) {
2397 ts = &s->temps[arg];
2398 temp_load(s, ts, tcg_target_available_regs[ts->type],
2399 s->reserved_regs);
2400 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
2402 #ifndef TCG_TARGET_STACK_GROWSUP
2403 stack_offset += sizeof(tcg_target_long);
2404 #endif
2407 /* assign input registers */
2408 tcg_regset_set(allocated_regs, s->reserved_regs);
2409 for(i = 0; i < nb_regs; i++) {
2410 arg = args[nb_oargs + i];
2411 if (arg != TCG_CALL_DUMMY_ARG) {
2412 ts = &s->temps[arg];
2413 reg = tcg_target_call_iarg_regs[i];
2414 tcg_reg_free(s, reg, allocated_regs);
2416 if (ts->val_type == TEMP_VAL_REG) {
2417 if (ts->reg != reg) {
2418 tcg_out_mov(s, ts->type, reg, ts->reg);
2420 } else {
2421 TCGRegSet arg_set;
2423 tcg_regset_clear(arg_set);
2424 tcg_regset_set_reg(arg_set, reg);
2425 temp_load(s, ts, arg_set, allocated_regs);
2428 tcg_regset_set_reg(allocated_regs, reg);
2432 /* mark dead temporaries and free the associated registers */
2433 for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2434 if (IS_DEAD_ARG(i)) {
2435 temp_dead(s, &s->temps[args[i]]);
2439 /* clobber call registers */
2440 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2441 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2442 tcg_reg_free(s, i, allocated_regs);
2446 /* Save globals if they might be written by the helper, sync them if
2447 they might be read. */
2448 if (flags & TCG_CALL_NO_READ_GLOBALS) {
2449 /* Nothing to do */
2450 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
2451 sync_globals(s, allocated_regs);
2452 } else {
2453 save_globals(s, allocated_regs);
2456 tcg_out_call(s, func_addr);
2458 /* assign output registers and emit moves if needed */
2459 for(i = 0; i < nb_oargs; i++) {
2460 arg = args[i];
2461 ts = &s->temps[arg];
2462 reg = tcg_target_call_oarg_regs[i];
2463 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
2465 if (ts->fixed_reg) {
2466 if (ts->reg != reg) {
2467 tcg_out_mov(s, ts->type, ts->reg, reg);
2469 } else {
2470 if (ts->val_type == TEMP_VAL_REG) {
2471 s->reg_to_temp[ts->reg] = NULL;
2473 ts->val_type = TEMP_VAL_REG;
2474 ts->reg = reg;
2475 ts->mem_coherent = 0;
2476 s->reg_to_temp[reg] = ts;
2477 if (NEED_SYNC_ARG(i)) {
2478 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
2479 } else if (IS_DEAD_ARG(i)) {
2480 temp_dead(s, ts);
2486 #ifdef CONFIG_PROFILER
2488 static int64_t tcg_table_op_count[NB_OPS];
2490 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
2492 int i;
2494 for (i = 0; i < NB_OPS; i++) {
2495 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
2496 tcg_table_op_count[i]);
2499 #else
2500 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
2502 cpu_fprintf(f, "[TCG profiler not compiled]\n");
2504 #endif
2507 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
2509 int i, oi, oi_next, num_insns;
2511 #ifdef CONFIG_PROFILER
2513 int n;
2515 n = s->gen_op_buf[0].prev + 1;
2516 s->op_count += n;
2517 if (n > s->op_count_max) {
2518 s->op_count_max = n;
2521 n = s->nb_temps;
2522 s->temp_count += n;
2523 if (n > s->temp_count_max) {
2524 s->temp_count_max = n;
2527 #endif
2529 #ifdef DEBUG_DISAS
2530 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
2531 && qemu_log_in_addr_range(tb->pc))) {
2532 qemu_log_lock();
2533 qemu_log("OP:\n");
2534 tcg_dump_ops(s);
2535 qemu_log("\n");
2536 qemu_log_unlock();
2538 #endif
2540 #ifdef CONFIG_PROFILER
2541 s->opt_time -= profile_getclock();
2542 #endif
2544 #ifdef USE_TCG_OPTIMIZATIONS
2545 tcg_optimize(s);
2546 #endif
2548 #ifdef CONFIG_PROFILER
2549 s->opt_time += profile_getclock();
2550 s->la_time -= profile_getclock();
2551 #endif
2554 uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects);
2556 liveness_pass_1(s, temp_state);
2558 if (s->nb_indirects > 0) {
2559 #ifdef DEBUG_DISAS
2560 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
2561 && qemu_log_in_addr_range(tb->pc))) {
2562 qemu_log_lock();
2563 qemu_log("OP before indirect lowering:\n");
2564 tcg_dump_ops(s);
2565 qemu_log("\n");
2566 qemu_log_unlock();
2568 #endif
2569 /* Replace indirect temps with direct temps. */
2570 if (liveness_pass_2(s, temp_state)) {
2571 /* If changes were made, re-run liveness. */
2572 liveness_pass_1(s, temp_state);
2577 #ifdef CONFIG_PROFILER
2578 s->la_time += profile_getclock();
2579 #endif
2581 #ifdef DEBUG_DISAS
2582 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
2583 && qemu_log_in_addr_range(tb->pc))) {
2584 qemu_log_lock();
2585 qemu_log("OP after optimization and liveness analysis:\n");
2586 tcg_dump_ops(s);
2587 qemu_log("\n");
2588 qemu_log_unlock();
2590 #endif
2592 tcg_reg_alloc_start(s);
2594 s->code_buf = tb->tc_ptr;
2595 s->code_ptr = tb->tc_ptr;
2597 tcg_out_tb_init(s);
2599 num_insns = -1;
2600 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
2601 TCGOp * const op = &s->gen_op_buf[oi];
2602 TCGArg * const args = &s->gen_opparam_buf[op->args];
2603 TCGOpcode opc = op->opc;
2604 const TCGOpDef *def = &tcg_op_defs[opc];
2605 TCGLifeData arg_life = op->life;
2607 oi_next = op->next;
2608 #ifdef CONFIG_PROFILER
2609 tcg_table_op_count[opc]++;
2610 #endif
2612 switch (opc) {
2613 case INDEX_op_mov_i32:
2614 case INDEX_op_mov_i64:
2615 tcg_reg_alloc_mov(s, def, args, arg_life);
2616 break;
2617 case INDEX_op_movi_i32:
2618 case INDEX_op_movi_i64:
2619 tcg_reg_alloc_movi(s, args, arg_life);
2620 break;
2621 case INDEX_op_insn_start:
2622 if (num_insns >= 0) {
2623 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
2625 num_insns++;
2626 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2627 target_ulong a;
2628 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2629 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
2630 #else
2631 a = args[i];
2632 #endif
2633 s->gen_insn_data[num_insns][i] = a;
2635 break;
2636 case INDEX_op_discard:
2637 temp_dead(s, &s->temps[args[0]]);
2638 break;
2639 case INDEX_op_set_label:
2640 tcg_reg_alloc_bb_end(s, s->reserved_regs);
2641 tcg_out_label(s, arg_label(args[0]), s->code_ptr);
2642 break;
2643 case INDEX_op_call:
2644 tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life);
2645 break;
2646 default:
2647 /* Sanity check that we've not introduced any unhandled opcodes. */
2648 if (def->flags & TCG_OPF_NOT_PRESENT) {
2649 tcg_abort();
2651 /* Note: in order to speed up the code, it would be much
2652 faster to have specialized register allocator functions for
2653 some common argument patterns */
2654 tcg_reg_alloc_op(s, def, opc, args, arg_life);
2655 break;
2657 #ifdef CONFIG_DEBUG_TCG
2658 check_regs(s);
2659 #endif
2660 /* Test for (pending) buffer overflow. The assumption is that any
2661 one operation beginning below the high water mark cannot overrun
2662 the buffer completely. Thus we can test for overflow after
2663 generating code without having to check during generation. */
2664 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
2665 return -1;
2668 tcg_debug_assert(num_insns >= 0);
2669 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
2671 /* Generate TB finalization at the end of block */
2672 if (!tcg_out_tb_finalize(s)) {
2673 return -1;
2676 /* flush instruction cache */
2677 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
2679 return tcg_current_code_size(s);
2682 #ifdef CONFIG_PROFILER
2683 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2685 TCGContext *s = &tcg_ctx;
2686 int64_t tb_count = s->tb_count;
2687 int64_t tb_div_count = tb_count ? tb_count : 1;
2688 int64_t tot = s->interm_time + s->code_time;
2690 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
2691 tot, tot / 2.4e9);
2692 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
2693 tb_count, s->tb_count1 - tb_count,
2694 (double)(s->tb_count1 - s->tb_count)
2695 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
2696 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
2697 (double)s->op_count / tb_div_count, s->op_count_max);
2698 cpu_fprintf(f, "deleted ops/TB %0.2f\n",
2699 (double)s->del_op_count / tb_div_count);
2700 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
2701 (double)s->temp_count / tb_div_count, s->temp_count_max);
2702 cpu_fprintf(f, "avg host code/TB %0.1f\n",
2703 (double)s->code_out_len / tb_div_count);
2704 cpu_fprintf(f, "avg search data/TB %0.1f\n",
2705 (double)s->search_out_len / tb_div_count);
2707 cpu_fprintf(f, "cycles/op %0.1f\n",
2708 s->op_count ? (double)tot / s->op_count : 0);
2709 cpu_fprintf(f, "cycles/in byte %0.1f\n",
2710 s->code_in_len ? (double)tot / s->code_in_len : 0);
2711 cpu_fprintf(f, "cycles/out byte %0.1f\n",
2712 s->code_out_len ? (double)tot / s->code_out_len : 0);
2713 cpu_fprintf(f, "cycles/search byte %0.1f\n",
2714 s->search_out_len ? (double)tot / s->search_out_len : 0);
2715 if (tot == 0) {
2716 tot = 1;
2718 cpu_fprintf(f, " gen_interm time %0.1f%%\n",
2719 (double)s->interm_time / tot * 100.0);
2720 cpu_fprintf(f, " gen_code time %0.1f%%\n",
2721 (double)s->code_time / tot * 100.0);
2722 cpu_fprintf(f, "optim./code time %0.1f%%\n",
2723 (double)s->opt_time / (s->code_time ? s->code_time : 1)
2724 * 100.0);
2725 cpu_fprintf(f, "liveness/code time %0.1f%%\n",
2726 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
2727 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
2728 s->restore_count);
2729 cpu_fprintf(f, " avg cycles %0.1f\n",
2730 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
2732 #else
2733 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2735 cpu_fprintf(f, "[TCG profiler not compiled]\n");
2737 #endif
2739 #ifdef ELF_HOST_MACHINE
2740 /* In order to use this feature, the backend needs to do three things:
2742 (1) Define ELF_HOST_MACHINE to indicate both what value to
2743 put into the ELF image and to indicate support for the feature.
2745 (2) Define tcg_register_jit. This should create a buffer containing
2746 the contents of a .debug_frame section that describes the post-
2747 prologue unwind info for the tcg machine.
2749 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
2752 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
2753 typedef enum {
2754 JIT_NOACTION = 0,
2755 JIT_REGISTER_FN,
2756 JIT_UNREGISTER_FN
2757 } jit_actions_t;
2759 struct jit_code_entry {
2760 struct jit_code_entry *next_entry;
2761 struct jit_code_entry *prev_entry;
2762 const void *symfile_addr;
2763 uint64_t symfile_size;
2766 struct jit_descriptor {
2767 uint32_t version;
2768 uint32_t action_flag;
2769 struct jit_code_entry *relevant_entry;
2770 struct jit_code_entry *first_entry;
2773 void __jit_debug_register_code(void) __attribute__((noinline));
2774 void __jit_debug_register_code(void)
2776 asm("");
2779 /* Must statically initialize the version, because GDB may check
2780 the version before we can set it. */
2781 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
2783 /* End GDB interface. */
2785 static int find_string(const char *strtab, const char *str)
2787 const char *p = strtab + 1;
2789 while (1) {
2790 if (strcmp(p, str) == 0) {
2791 return p - strtab;
2793 p += strlen(p) + 1;
2797 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
2798 const void *debug_frame,
2799 size_t debug_frame_size)
2801 struct __attribute__((packed)) DebugInfo {
2802 uint32_t len;
2803 uint16_t version;
2804 uint32_t abbrev;
2805 uint8_t ptr_size;
2806 uint8_t cu_die;
2807 uint16_t cu_lang;
2808 uintptr_t cu_low_pc;
2809 uintptr_t cu_high_pc;
2810 uint8_t fn_die;
2811 char fn_name[16];
2812 uintptr_t fn_low_pc;
2813 uintptr_t fn_high_pc;
2814 uint8_t cu_eoc;
2817 struct ElfImage {
2818 ElfW(Ehdr) ehdr;
2819 ElfW(Phdr) phdr;
2820 ElfW(Shdr) shdr[7];
2821 ElfW(Sym) sym[2];
2822 struct DebugInfo di;
2823 uint8_t da[24];
2824 char str[80];
2827 struct ElfImage *img;
2829 static const struct ElfImage img_template = {
2830 .ehdr = {
2831 .e_ident[EI_MAG0] = ELFMAG0,
2832 .e_ident[EI_MAG1] = ELFMAG1,
2833 .e_ident[EI_MAG2] = ELFMAG2,
2834 .e_ident[EI_MAG3] = ELFMAG3,
2835 .e_ident[EI_CLASS] = ELF_CLASS,
2836 .e_ident[EI_DATA] = ELF_DATA,
2837 .e_ident[EI_VERSION] = EV_CURRENT,
2838 .e_type = ET_EXEC,
2839 .e_machine = ELF_HOST_MACHINE,
2840 .e_version = EV_CURRENT,
2841 .e_phoff = offsetof(struct ElfImage, phdr),
2842 .e_shoff = offsetof(struct ElfImage, shdr),
2843 .e_ehsize = sizeof(ElfW(Shdr)),
2844 .e_phentsize = sizeof(ElfW(Phdr)),
2845 .e_phnum = 1,
2846 .e_shentsize = sizeof(ElfW(Shdr)),
2847 .e_shnum = ARRAY_SIZE(img->shdr),
2848 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
2849 #ifdef ELF_HOST_FLAGS
2850 .e_flags = ELF_HOST_FLAGS,
2851 #endif
2852 #ifdef ELF_OSABI
2853 .e_ident[EI_OSABI] = ELF_OSABI,
2854 #endif
2856 .phdr = {
2857 .p_type = PT_LOAD,
2858 .p_flags = PF_X,
2860 .shdr = {
2861 [0] = { .sh_type = SHT_NULL },
2862 /* Trick: The contents of code_gen_buffer are not present in
2863 this fake ELF file; that got allocated elsewhere. Therefore
2864 we mark .text as SHT_NOBITS (similar to .bss) so that readers
2865 will not look for contents. We can record any address. */
2866 [1] = { /* .text */
2867 .sh_type = SHT_NOBITS,
2868 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
2870 [2] = { /* .debug_info */
2871 .sh_type = SHT_PROGBITS,
2872 .sh_offset = offsetof(struct ElfImage, di),
2873 .sh_size = sizeof(struct DebugInfo),
2875 [3] = { /* .debug_abbrev */
2876 .sh_type = SHT_PROGBITS,
2877 .sh_offset = offsetof(struct ElfImage, da),
2878 .sh_size = sizeof(img->da),
2880 [4] = { /* .debug_frame */
2881 .sh_type = SHT_PROGBITS,
2882 .sh_offset = sizeof(struct ElfImage),
2884 [5] = { /* .symtab */
2885 .sh_type = SHT_SYMTAB,
2886 .sh_offset = offsetof(struct ElfImage, sym),
2887 .sh_size = sizeof(img->sym),
2888 .sh_info = 1,
2889 .sh_link = ARRAY_SIZE(img->shdr) - 1,
2890 .sh_entsize = sizeof(ElfW(Sym)),
2892 [6] = { /* .strtab */
2893 .sh_type = SHT_STRTAB,
2894 .sh_offset = offsetof(struct ElfImage, str),
2895 .sh_size = sizeof(img->str),
2898 .sym = {
2899 [1] = { /* code_gen_buffer */
2900 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
2901 .st_shndx = 1,
2904 .di = {
2905 .len = sizeof(struct DebugInfo) - 4,
2906 .version = 2,
2907 .ptr_size = sizeof(void *),
2908 .cu_die = 1,
2909 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
2910 .fn_die = 2,
2911 .fn_name = "code_gen_buffer"
2913 .da = {
2914 1, /* abbrev number (the cu) */
2915 0x11, 1, /* DW_TAG_compile_unit, has children */
2916 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
2917 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2918 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2919 0, 0, /* end of abbrev */
2920 2, /* abbrev number (the fn) */
2921 0x2e, 0, /* DW_TAG_subprogram, no children */
2922 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
2923 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2924 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2925 0, 0, /* end of abbrev */
2926 0 /* no more abbrev */
2928 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
2929 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
2932 /* We only need a single jit entry; statically allocate it. */
2933 static struct jit_code_entry one_entry;
2935 uintptr_t buf = (uintptr_t)buf_ptr;
2936 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
2937 DebugFrameHeader *dfh;
2939 img = g_malloc(img_size);
2940 *img = img_template;
2942 img->phdr.p_vaddr = buf;
2943 img->phdr.p_paddr = buf;
2944 img->phdr.p_memsz = buf_size;
2946 img->shdr[1].sh_name = find_string(img->str, ".text");
2947 img->shdr[1].sh_addr = buf;
2948 img->shdr[1].sh_size = buf_size;
2950 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
2951 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
2953 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
2954 img->shdr[4].sh_size = debug_frame_size;
2956 img->shdr[5].sh_name = find_string(img->str, ".symtab");
2957 img->shdr[6].sh_name = find_string(img->str, ".strtab");
2959 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
2960 img->sym[1].st_value = buf;
2961 img->sym[1].st_size = buf_size;
2963 img->di.cu_low_pc = buf;
2964 img->di.cu_high_pc = buf + buf_size;
2965 img->di.fn_low_pc = buf;
2966 img->di.fn_high_pc = buf + buf_size;
2968 dfh = (DebugFrameHeader *)(img + 1);
2969 memcpy(dfh, debug_frame, debug_frame_size);
2970 dfh->fde.func_start = buf;
2971 dfh->fde.func_len = buf_size;
2973 #ifdef DEBUG_JIT
2974 /* Enable this block to be able to debug the ELF image file creation.
2975 One can use readelf, objdump, or other inspection utilities. */
2977 FILE *f = fopen("/tmp/qemu.jit", "w+b");
2978 if (f) {
2979 if (fwrite(img, img_size, 1, f) != img_size) {
2980 /* Avoid stupid unused return value warning for fwrite. */
2982 fclose(f);
2985 #endif
2987 one_entry.symfile_addr = img;
2988 one_entry.symfile_size = img_size;
2990 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
2991 __jit_debug_descriptor.relevant_entry = &one_entry;
2992 __jit_debug_descriptor.first_entry = &one_entry;
2993 __jit_debug_register_code();
2995 #else
2996 /* No support for the feature. Provide the entry point expected by exec.c,
2997 and implement the internal function we declared earlier. */
2999 static void tcg_register_jit_int(void *buf, size_t size,
3000 const void *debug_frame,
3001 size_t debug_frame_size)
3005 void tcg_register_jit(void *buf, size_t buf_size)
3008 #endif /* ELF_HOST_MACHINE */