2 * Optimizations for Tiny Code Generator for QEMU
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
31 #include "qemu-common.h"
34 #if TCG_TARGET_REG_BITS == 64
35 #define CASE_OP_32_64(x) \
36 glue(glue(case INDEX_op_, x), _i32): \
37 glue(glue(case INDEX_op_, x), _i64)
39 #define CASE_OP_32_64(x) \
40 glue(glue(case INDEX_op_, x), _i32)
51 struct tcg_temp_info
{
58 static struct tcg_temp_info temps
[TCG_MAX_TEMPS
];
60 /* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some
61 class of equivalent temp's, a new representative should be chosen in this
63 static void reset_temp(TCGArg temp
, int nb_temps
, int nb_globals
)
66 TCGArg new_base
= (TCGArg
)-1;
67 if (temps
[temp
].state
== TCG_TEMP_HAS_COPY
) {
68 for (i
= temps
[temp
].next_copy
; i
!= temp
; i
= temps
[i
].next_copy
) {
69 if (i
>= nb_globals
) {
70 temps
[i
].state
= TCG_TEMP_HAS_COPY
;
75 for (i
= temps
[temp
].next_copy
; i
!= temp
; i
= temps
[i
].next_copy
) {
76 if (new_base
== (TCGArg
)-1) {
77 temps
[i
].state
= TCG_TEMP_ANY
;
79 temps
[i
].val
= new_base
;
82 temps
[temps
[temp
].next_copy
].prev_copy
= temps
[temp
].prev_copy
;
83 temps
[temps
[temp
].prev_copy
].next_copy
= temps
[temp
].next_copy
;
84 } else if (temps
[temp
].state
== TCG_TEMP_COPY
) {
85 temps
[temps
[temp
].next_copy
].prev_copy
= temps
[temp
].prev_copy
;
86 temps
[temps
[temp
].prev_copy
].next_copy
= temps
[temp
].next_copy
;
87 new_base
= temps
[temp
].val
;
89 temps
[temp
].state
= TCG_TEMP_ANY
;
90 if (new_base
!= (TCGArg
)-1 && temps
[new_base
].next_copy
== new_base
) {
91 temps
[new_base
].state
= TCG_TEMP_ANY
;
95 static int op_bits(int op
)
98 case INDEX_op_mov_i32
:
99 case INDEX_op_add_i32
:
100 case INDEX_op_sub_i32
:
101 case INDEX_op_mul_i32
:
102 case INDEX_op_and_i32
:
103 case INDEX_op_or_i32
:
104 case INDEX_op_xor_i32
:
105 case INDEX_op_shl_i32
:
106 case INDEX_op_shr_i32
:
107 case INDEX_op_sar_i32
:
108 #ifdef TCG_TARGET_HAS_rot_i32
109 case INDEX_op_rotl_i32
:
110 case INDEX_op_rotr_i32
:
112 #ifdef TCG_TARGET_HAS_not_i32
113 case INDEX_op_not_i32
:
115 #ifdef TCG_TARGET_HAS_ext8s_i32
116 case INDEX_op_ext8s_i32
:
118 #ifdef TCG_TARGET_HAS_ext16s_i32
119 case INDEX_op_ext16s_i32
:
121 #ifdef TCG_TARGET_HAS_ext8u_i32
122 case INDEX_op_ext8u_i32
:
124 #ifdef TCG_TARGET_HAS_ext16u_i32
125 case INDEX_op_ext16u_i32
:
128 #if TCG_TARGET_REG_BITS == 64
129 case INDEX_op_mov_i64
:
130 case INDEX_op_add_i64
:
131 case INDEX_op_sub_i64
:
132 case INDEX_op_mul_i64
:
133 case INDEX_op_and_i64
:
134 case INDEX_op_or_i64
:
135 case INDEX_op_xor_i64
:
136 case INDEX_op_shl_i64
:
137 case INDEX_op_shr_i64
:
138 case INDEX_op_sar_i64
:
139 #ifdef TCG_TARGET_HAS_rot_i64
140 case INDEX_op_rotl_i64
:
141 case INDEX_op_rotr_i64
:
143 #ifdef TCG_TARGET_HAS_not_i64
144 case INDEX_op_not_i64
:
146 #ifdef TCG_TARGET_HAS_ext8s_i64
147 case INDEX_op_ext8s_i64
:
149 #ifdef TCG_TARGET_HAS_ext16s_i64
150 case INDEX_op_ext16s_i64
:
152 #ifdef TCG_TARGET_HAS_ext32s_i64
153 case INDEX_op_ext32s_i64
:
155 #ifdef TCG_TARGET_HAS_ext8u_i64
156 case INDEX_op_ext8u_i64
:
158 #ifdef TCG_TARGET_HAS_ext16u_i64
159 case INDEX_op_ext16u_i64
:
161 #ifdef TCG_TARGET_HAS_ext32u_i64
162 case INDEX_op_ext32u_i64
:
167 fprintf(stderr
, "Unrecognized operation %d in op_bits.\n", op
);
172 static int op_to_movi(int op
)
174 switch (op_bits(op
)) {
176 return INDEX_op_movi_i32
;
177 #if TCG_TARGET_REG_BITS == 64
179 return INDEX_op_movi_i64
;
182 fprintf(stderr
, "op_to_movi: unexpected return value of "
183 "function op_bits.\n");
188 static void tcg_opt_gen_mov(TCGArg
*gen_args
, TCGArg dst
, TCGArg src
,
189 int nb_temps
, int nb_globals
)
191 reset_temp(dst
, nb_temps
, nb_globals
);
192 assert(temps
[src
].state
!= TCG_TEMP_COPY
);
193 if (src
>= nb_globals
) {
194 assert(temps
[src
].state
!= TCG_TEMP_CONST
);
195 if (temps
[src
].state
!= TCG_TEMP_HAS_COPY
) {
196 temps
[src
].state
= TCG_TEMP_HAS_COPY
;
197 temps
[src
].next_copy
= src
;
198 temps
[src
].prev_copy
= src
;
200 temps
[dst
].state
= TCG_TEMP_COPY
;
201 temps
[dst
].val
= src
;
202 temps
[dst
].next_copy
= temps
[src
].next_copy
;
203 temps
[dst
].prev_copy
= src
;
204 temps
[temps
[dst
].next_copy
].prev_copy
= dst
;
205 temps
[src
].next_copy
= dst
;
211 static void tcg_opt_gen_movi(TCGArg
*gen_args
, TCGArg dst
, TCGArg val
,
212 int nb_temps
, int nb_globals
)
214 reset_temp(dst
, nb_temps
, nb_globals
);
215 temps
[dst
].state
= TCG_TEMP_CONST
;
216 temps
[dst
].val
= val
;
221 static int op_to_mov(int op
)
223 switch (op_bits(op
)) {
225 return INDEX_op_mov_i32
;
226 #if TCG_TARGET_REG_BITS == 64
228 return INDEX_op_mov_i64
;
231 fprintf(stderr
, "op_to_mov: unexpected return value of "
232 "function op_bits.\n");
237 static TCGArg
do_constant_folding_2(int op
, TCGArg x
, TCGArg y
)
258 case INDEX_op_shl_i32
:
259 return (uint32_t)x
<< (uint32_t)y
;
261 #if TCG_TARGET_REG_BITS == 64
262 case INDEX_op_shl_i64
:
263 return (uint64_t)x
<< (uint64_t)y
;
266 case INDEX_op_shr_i32
:
267 return (uint32_t)x
>> (uint32_t)y
;
269 #if TCG_TARGET_REG_BITS == 64
270 case INDEX_op_shr_i64
:
271 return (uint64_t)x
>> (uint64_t)y
;
274 case INDEX_op_sar_i32
:
275 return (int32_t)x
>> (int32_t)y
;
277 #if TCG_TARGET_REG_BITS == 64
278 case INDEX_op_sar_i64
:
279 return (int64_t)x
>> (int64_t)y
;
282 #ifdef TCG_TARGET_HAS_rot_i32
283 case INDEX_op_rotr_i32
:
284 #if TCG_TARGET_REG_BITS == 64
288 x
= (x
<< (32 - y
)) | (x
>> y
);
292 #ifdef TCG_TARGET_HAS_rot_i64
293 #if TCG_TARGET_REG_BITS == 64
294 case INDEX_op_rotr_i64
:
295 x
= (x
<< (64 - y
)) | (x
>> y
);
300 #ifdef TCG_TARGET_HAS_rot_i32
301 case INDEX_op_rotl_i32
:
302 #if TCG_TARGET_REG_BITS == 64
306 x
= (x
<< y
) | (x
>> (32 - y
));
310 #ifdef TCG_TARGET_HAS_rot_i64
311 #if TCG_TARGET_REG_BITS == 64
312 case INDEX_op_rotl_i64
:
313 x
= (x
<< y
) | (x
>> (64 - y
));
318 #if defined(TCG_TARGET_HAS_not_i32) || defined(TCG_TARGET_HAS_not_i64)
319 #ifdef TCG_TARGET_HAS_not_i32
320 case INDEX_op_not_i32
:
322 #ifdef TCG_TARGET_HAS_not_i64
323 case INDEX_op_not_i64
:
328 #if defined(TCG_TARGET_HAS_ext8s_i32) || defined(TCG_TARGET_HAS_ext8s_i64)
329 #ifdef TCG_TARGET_HAS_ext8s_i32
330 case INDEX_op_ext8s_i32
:
332 #ifdef TCG_TARGET_HAS_ext8s_i64
333 case INDEX_op_ext8s_i64
:
338 #if defined(TCG_TARGET_HAS_ext16s_i32) || defined(TCG_TARGET_HAS_ext16s_i64)
339 #ifdef TCG_TARGET_HAS_ext16s_i32
340 case INDEX_op_ext16s_i32
:
342 #ifdef TCG_TARGET_HAS_ext16s_i64
343 case INDEX_op_ext16s_i64
:
348 #if defined(TCG_TARGET_HAS_ext8u_i32) || defined(TCG_TARGET_HAS_ext8u_i64)
349 #ifdef TCG_TARGET_HAS_ext8u_i32
350 case INDEX_op_ext8u_i32
:
352 #ifdef TCG_TARGET_HAS_ext8u_i64
353 case INDEX_op_ext8u_i64
:
358 #if defined(TCG_TARGET_HAS_ext16u_i32) || defined(TCG_TARGET_HAS_ext16u_i64)
359 #ifdef TCG_TARGET_HAS_ext16u_i32
360 case INDEX_op_ext16u_i32
:
362 #ifdef TCG_TARGET_HAS_ext16u_i64
363 case INDEX_op_ext16u_i64
:
368 #if TCG_TARGET_REG_BITS == 64
369 #ifdef TCG_TARGET_HAS_ext32s_i64
370 case INDEX_op_ext32s_i64
:
374 #ifdef TCG_TARGET_HAS_ext32u_i64
375 case INDEX_op_ext32u_i64
:
382 "Unrecognized operation %d in do_constant_folding.\n", op
);
387 static TCGArg
do_constant_folding(int op
, TCGArg x
, TCGArg y
)
389 TCGArg res
= do_constant_folding_2(op
, x
, y
);
390 #if TCG_TARGET_REG_BITS == 64
391 if (op_bits(op
) == 32) {
398 /* Propagate constants and copies, fold constant expressions. */
399 static TCGArg
*tcg_constant_folding(TCGContext
*s
, uint16_t *tcg_opc_ptr
,
400 TCGArg
*args
, TCGOpDef
*tcg_op_defs
)
402 int i
, nb_ops
, op_index
, op
, nb_temps
, nb_globals
, nb_call_args
;
406 /* Array VALS has an element for each temp.
407 If this temp holds a constant then its value is kept in VALS' element.
408 If this temp is a copy of other ones then this equivalence class'
409 representative is kept in VALS' element.
410 If this temp is neither copy nor constant then corresponding VALS'
411 element is unused. */
413 nb_temps
= s
->nb_temps
;
414 nb_globals
= s
->nb_globals
;
415 memset(temps
, 0, nb_temps
* sizeof(struct tcg_temp_info
));
417 nb_ops
= tcg_opc_ptr
- gen_opc_buf
;
419 for (op_index
= 0; op_index
< nb_ops
; op_index
++) {
420 op
= gen_opc_buf
[op_index
];
421 def
= &tcg_op_defs
[op
];
422 /* Do copy propagation */
423 if (!(def
->flags
& (TCG_OPF_CALL_CLOBBER
| TCG_OPF_SIDE_EFFECTS
))) {
424 assert(op
!= INDEX_op_call
);
425 for (i
= def
->nb_oargs
; i
< def
->nb_oargs
+ def
->nb_iargs
; i
++) {
426 if (temps
[args
[i
]].state
== TCG_TEMP_COPY
) {
427 args
[i
] = temps
[args
[i
]].val
;
432 /* For commutative operations make constant second argument */
439 if (temps
[args
[1]].state
== TCG_TEMP_CONST
) {
449 /* Simplify expression if possible. */
456 #ifdef TCG_TARGET_HAS_rot_i32
457 case INDEX_op_rotl_i32
:
458 case INDEX_op_rotr_i32
:
460 #ifdef TCG_TARGET_HAS_rot_i64
461 case INDEX_op_rotl_i64
:
462 case INDEX_op_rotr_i64
:
464 if (temps
[args
[1]].state
== TCG_TEMP_CONST
) {
465 /* Proceed with possible constant folding. */
468 if (temps
[args
[2]].state
== TCG_TEMP_CONST
469 && temps
[args
[2]].val
== 0) {
470 if ((temps
[args
[0]].state
== TCG_TEMP_COPY
471 && temps
[args
[0]].val
== args
[1])
472 || args
[0] == args
[1]) {
474 gen_opc_buf
[op_index
] = INDEX_op_nop
;
476 gen_opc_buf
[op_index
] = op_to_mov(op
);
477 tcg_opt_gen_mov(gen_args
, args
[0], args
[1],
478 nb_temps
, nb_globals
);
486 if ((temps
[args
[2]].state
== TCG_TEMP_CONST
487 && temps
[args
[2]].val
== 0)) {
488 gen_opc_buf
[op_index
] = op_to_movi(op
);
489 tcg_opt_gen_movi(gen_args
, args
[0], 0, nb_temps
, nb_globals
);
497 if (args
[1] == args
[2]) {
498 if (args
[1] == args
[0]) {
500 gen_opc_buf
[op_index
] = INDEX_op_nop
;
502 gen_opc_buf
[op_index
] = op_to_mov(op
);
503 tcg_opt_gen_mov(gen_args
, args
[0], args
[1], nb_temps
,
513 /* Propagate constants through copy operations and do constant
514 folding. Constants will be substituted to arguments by register
515 allocator where needed and possible. Also detect copies. */
518 if ((temps
[args
[1]].state
== TCG_TEMP_COPY
519 && temps
[args
[1]].val
== args
[0])
520 || args
[0] == args
[1]) {
522 gen_opc_buf
[op_index
] = INDEX_op_nop
;
525 if (temps
[args
[1]].state
!= TCG_TEMP_CONST
) {
526 tcg_opt_gen_mov(gen_args
, args
[0], args
[1],
527 nb_temps
, nb_globals
);
532 /* Source argument is constant. Rewrite the operation and
533 let movi case handle it. */
535 gen_opc_buf
[op_index
] = op
;
536 args
[1] = temps
[args
[1]].val
;
539 tcg_opt_gen_movi(gen_args
, args
[0], args
[1], nb_temps
, nb_globals
);
544 #ifdef TCG_TARGET_HAS_ext8s_i32
545 case INDEX_op_ext8s_i32
:
547 #ifdef TCG_TARGET_HAS_ext8s_i64
548 case INDEX_op_ext8s_i64
:
550 #ifdef TCG_TARGET_HAS_ext16s_i32
551 case INDEX_op_ext16s_i32
:
553 #ifdef TCG_TARGET_HAS_ext16s_i64
554 case INDEX_op_ext16s_i64
:
556 #ifdef TCG_TARGET_HAS_ext8u_i32
557 case INDEX_op_ext8u_i32
:
559 #ifdef TCG_TARGET_HAS_ext8u_i64
560 case INDEX_op_ext8u_i64
:
562 #ifdef TCG_TARGET_HAS_ext16u_i32
563 case INDEX_op_ext16u_i32
:
565 #ifdef TCG_TARGET_HAS_ext16u_i64
566 case INDEX_op_ext16u_i64
:
568 #if TCG_TARGET_REG_BITS == 64
569 case INDEX_op_ext32s_i64
:
570 case INDEX_op_ext32u_i64
:
572 if (temps
[args
[1]].state
== TCG_TEMP_CONST
) {
573 gen_opc_buf
[op_index
] = op_to_movi(op
);
574 tmp
= do_constant_folding(op
, temps
[args
[1]].val
, 0);
575 tcg_opt_gen_movi(gen_args
, args
[0], tmp
, nb_temps
, nb_globals
);
580 reset_temp(args
[0], nb_temps
, nb_globals
);
581 gen_args
[0] = args
[0];
582 gen_args
[1] = args
[1];
596 #ifdef TCG_TARGET_HAS_rot_i32
597 case INDEX_op_rotl_i32
:
598 case INDEX_op_rotr_i32
:
600 #ifdef TCG_TARGET_HAS_rot_i64
601 case INDEX_op_rotl_i64
:
602 case INDEX_op_rotr_i64
:
604 if (temps
[args
[1]].state
== TCG_TEMP_CONST
605 && temps
[args
[2]].state
== TCG_TEMP_CONST
) {
606 gen_opc_buf
[op_index
] = op_to_movi(op
);
607 tmp
= do_constant_folding(op
, temps
[args
[1]].val
,
609 tcg_opt_gen_movi(gen_args
, args
[0], tmp
, nb_temps
, nb_globals
);
614 reset_temp(args
[0], nb_temps
, nb_globals
);
615 gen_args
[0] = args
[0];
616 gen_args
[1] = args
[1];
617 gen_args
[2] = args
[2];
623 nb_call_args
= (args
[0] >> 16) + (args
[0] & 0xffff);
624 if (!(args
[nb_call_args
+ 1] & (TCG_CALL_CONST
| TCG_CALL_PURE
))) {
625 for (i
= 0; i
< nb_globals
; i
++) {
626 reset_temp(i
, nb_temps
, nb_globals
);
629 for (i
= 0; i
< (args
[0] >> 16); i
++) {
630 reset_temp(args
[i
+ 1], nb_temps
, nb_globals
);
632 i
= nb_call_args
+ 3;
640 case INDEX_op_set_label
:
643 CASE_OP_32_64(brcond
):
644 memset(temps
, 0, nb_temps
* sizeof(struct tcg_temp_info
));
645 for (i
= 0; i
< def
->nb_args
; i
++) {
652 /* Default case: we do know nothing about operation so no
653 propagation is done. We only trash output args. */
654 for (i
= 0; i
< def
->nb_oargs
; i
++) {
655 reset_temp(args
[i
], nb_temps
, nb_globals
);
657 for (i
= 0; i
< def
->nb_args
; i
++) {
658 gen_args
[i
] = args
[i
];
660 args
+= def
->nb_args
;
661 gen_args
+= def
->nb_args
;
669 TCGArg
*tcg_optimize(TCGContext
*s
, uint16_t *tcg_opc_ptr
,
670 TCGArg
*args
, TCGOpDef
*tcg_op_defs
)
673 res
= tcg_constant_folding(s
, tcg_opc_ptr
, args
, tcg_op_defs
);