3 * Local constant, copy and tree propagation.
5 * To make some sense of the tree mover, read mono/docs/tree-mover.txt
8 * Paolo Molaro (lupus@ximian.com)
9 * Dietmar Maurer (dietmar@ximian.com)
10 * Massimiliano Mantione (massi@ximian.com)
12 * (C) 2006 Novell, Inc. http://www.novell.com
13 * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
14 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
18 #include <mono/utils/mono-compiler.h>
28 #include <mono/metadata/debug-helpers.h>
29 #include <mono/metadata/mempool.h>
30 #include <mono/metadata/opcodes.h>
31 #include <mono/utils/unlocked.h>
35 #ifndef MONO_ARCH_IS_OP_MEMBASE
36 #define MONO_ARCH_IS_OP_MEMBASE(opcode) FALSE
40 mono_bitset_mp_new_noinit (MonoMemPool
*mp
, guint32 max_size
)
42 int size
= mono_bitset_alloc_size (max_size
, 0);
45 mem
= mono_mempool_alloc (mp
, size
);
46 return mono_bitset_mem_new (mem
, max_size
, MONO_BITSET_DONT_FREE
);
49 struct magic_unsigned
{
60 /* http://www.hackersdelight.org/hdcodetxt/magicu.c.txt */
61 static struct magic_unsigned
62 compute_magic_unsigned (guint32 divisor
) {
63 guint32 nc
, delta
, q1
, r1
, q2
, r2
;
64 struct magic_unsigned magu
;
69 nc
= -1 - (-divisor
) % divisor
;
72 r1
= 0x80000000 - q1
* nc
;
73 q2
= 0x7FFFFFFF / divisor
;
74 r2
= 0x7FFFFFFF - q2
* divisor
;
86 if (r2
+ 1 >= divisor
- r2
) {
90 r2
= 2 * r2
+ 1 - divisor
;
97 delta
= divisor
- 1 - r2
;
98 } while (!gt
&& (q1
< delta
|| (q1
== delta
&& r1
== 0)));
100 magu
.magic_number
= q2
+ 1;
105 /* http://www.hackersdelight.org/hdcodetxt/magic.c.txt */
106 static struct magic_signed
107 compute_magic_signed (gint32 divisor
) {
109 guint32 ad
, anc
, delta
, q1
, r1
, q2
, r2
, t
;
110 const guint32 two31
= 0x80000000;
111 struct magic_signed mag
;
114 t
= two31
+ ((unsigned)divisor
>> 31);
115 anc
= t
- 1 - t
% ad
;
118 r1
= two31
- q1
* anc
;
120 r2
= two31
- q2
* ad
;
139 } while (q1
< delta
|| (q1
== delta
&& r1
== 0));
141 mag
.magic_number
= q2
+ 1;
143 mag
.magic_number
= -mag
.magic_number
;
149 mono_strength_reduction_division (MonoCompile
*cfg
, MonoInst
*ins
)
151 gboolean allocated_vregs
= FALSE
;
153 * We don't use it on 32bit systems because on those
154 * platforms we emulate long multiplication, driving the
155 * performance back down.
157 switch (ins
->opcode
) {
158 case OP_IDIV_UN_IMM
: {
160 #if SIZEOF_REGISTER == 8
161 guint32 dividend_reg
;
165 struct magic_unsigned mag
;
166 int power2
= mono_is_power_of_two (ins
->inst_imm
);
168 /* The decomposition doesn't handle exception throwing */
169 if (ins
->inst_imm
== 0)
173 ins
->opcode
= OP_ISHR_UN_IMM
;
175 ins
->inst_imm
= power2
;
178 if (cfg
->backend
->disable_div_with_mul
)
180 allocated_vregs
= TRUE
;
182 * Replacement of unsigned division with multiplication,
183 * shifts and additions Hacker's Delight, chapter 10-10.
185 mag
= compute_magic_unsigned (ins
->inst_imm
);
186 tmp_regl
= alloc_lreg (cfg
);
187 #if SIZEOF_REGISTER == 8
188 dividend_reg
= alloc_lreg (cfg
);
189 MONO_EMIT_NEW_I8CONST (cfg
, tmp_regl
, mag
.magic_number
);
190 MONO_EMIT_NEW_UNALU (cfg
, OP_ZEXT_I4
, dividend_reg
, ins
->sreg1
);
191 MONO_EMIT_NEW_BIALU (cfg
, OP_LMUL
, tmp_regl
, dividend_reg
, tmp_regl
);
193 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_LSHR_UN_IMM
, tmp_regl
, tmp_regl
, 32);
194 MONO_EMIT_NEW_BIALU (cfg
, OP_LADD
, tmp_regl
, tmp_regl
, dividend_reg
);
195 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_LSHR_UN_IMM
, ins
->dreg
, tmp_regl
, mag
.shift
);
197 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_LSHR_UN_IMM
, ins
->dreg
, tmp_regl
, 32 + mag
.shift
);
200 tmp_regi
= alloc_ireg (cfg
);
201 MONO_EMIT_NEW_ICONST (cfg
, tmp_regi
, mag
.magic_number
);
202 MONO_EMIT_NEW_BIALU (cfg
, OP_BIGMUL_UN
, tmp_regl
, ins
->sreg1
, tmp_regi
);
203 /* Long shifts below will be decomposed during cprop */
205 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_LSHR_UN_IMM
, tmp_regl
, tmp_regl
, 32);
206 MONO_EMIT_NEW_BIALU (cfg
, OP_IADDCC
, MONO_LVREG_LS (tmp_regl
), MONO_LVREG_LS (tmp_regl
), ins
->sreg1
);
207 /* MONO_LVREG_MS (tmp_reg) is 0, save in it the carry */
208 MONO_EMIT_NEW_BIALU (cfg
, OP_IADC
, MONO_LVREG_MS (tmp_regl
), MONO_LVREG_MS (tmp_regl
), MONO_LVREG_MS (tmp_regl
));
209 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_LSHR_UN_IMM
, tmp_regl
, tmp_regl
, mag
.shift
);
211 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_LSHR_UN_IMM
, tmp_regl
, tmp_regl
, 32 + mag
.shift
);
213 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, ins
->dreg
, MONO_LVREG_LS (tmp_regl
));
215 UnlockedIncrement (&mono_jit_stats
.optimized_divisions
);
220 #if SIZEOF_REGISTER == 8
221 guint32 dividend_reg
;
225 struct magic_signed mag
;
226 int power2
= mono_is_power_of_two (ins
->inst_imm
);
227 /* The decomposition doesn't handle exception throwing */
228 /* Optimization with MUL does not apply for -1, 0 and 1 divisors */
229 if (ins
->inst_imm
== 0 || ins
->inst_imm
== -1) {
231 } else if (ins
->inst_imm
== 1) {
232 ins
->opcode
= OP_MOVE
;
236 allocated_vregs
= TRUE
;
238 guint32 r1
= alloc_ireg (cfg
);
239 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_UN_IMM
, r1
, ins
->sreg1
, 31);
240 MONO_EMIT_NEW_BIALU (cfg
, OP_IADD
, r1
, r1
, ins
->sreg1
);
241 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_IMM
, ins
->dreg
, r1
, 1);
243 } else if (power2
> 0 && power2
< 31) {
244 guint32 r1
= alloc_ireg (cfg
);
245 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_IMM
, r1
, ins
->sreg1
, 31);
246 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_UN_IMM
, r1
, r1
, (32 - power2
));
247 MONO_EMIT_NEW_BIALU (cfg
, OP_IADD
, r1
, r1
, ins
->sreg1
);
248 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_IMM
, ins
->dreg
, r1
, power2
);
252 if (cfg
->backend
->disable_div_with_mul
)
255 * Replacement of signed division with multiplication,
256 * shifts and additions Hacker's Delight, chapter 10-6.
258 mag
= compute_magic_signed (ins
->inst_imm
);
259 tmp_regl
= alloc_lreg (cfg
);
260 #if SIZEOF_REGISTER == 8
261 dividend_reg
= alloc_lreg (cfg
);
262 MONO_EMIT_NEW_I8CONST (cfg
, tmp_regl
, mag
.magic_number
);
263 MONO_EMIT_NEW_UNALU (cfg
, OP_SEXT_I4
, dividend_reg
, ins
->sreg1
);
264 MONO_EMIT_NEW_BIALU (cfg
, OP_LMUL
, tmp_regl
, dividend_reg
, tmp_regl
);
265 if ((ins
->inst_imm
> 0 && mag
.magic_number
< 0) || (ins
->inst_imm
< 0 && mag
.magic_number
> 0)) {
266 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_LSHR_IMM
, tmp_regl
, tmp_regl
, 32);
267 if (ins
->inst_imm
> 0 && mag
.magic_number
< 0) {
268 MONO_EMIT_NEW_BIALU (cfg
, OP_LADD
, tmp_regl
, tmp_regl
, dividend_reg
);
269 } else if (ins
->inst_imm
< 0 && mag
.magic_number
> 0) {
270 MONO_EMIT_NEW_BIALU (cfg
, OP_LSUB
, tmp_regl
, tmp_regl
, dividend_reg
);
272 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_LSHR_IMM
, tmp_regl
, tmp_regl
, mag
.shift
);
274 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_LSHR_IMM
, tmp_regl
, tmp_regl
, 32 + mag
.shift
);
276 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_LSHR_UN_IMM
, ins
->dreg
, tmp_regl
, SIZEOF_REGISTER
* 8 - 1);
277 MONO_EMIT_NEW_BIALU (cfg
, OP_LADD
, ins
->dreg
, ins
->dreg
, tmp_regl
);
279 tmp_regi
= alloc_ireg (cfg
);
280 MONO_EMIT_NEW_ICONST (cfg
, tmp_regi
, mag
.magic_number
);
281 MONO_EMIT_NEW_BIALU (cfg
, OP_BIGMUL
, tmp_regl
, ins
->sreg1
, tmp_regi
);
282 if ((ins
->inst_imm
> 0 && mag
.magic_number
< 0) || (ins
->inst_imm
< 0 && mag
.magic_number
> 0)) {
283 if (ins
->inst_imm
> 0 && mag
.magic_number
< 0) {
284 /* Opposite sign, cannot overflow */
285 MONO_EMIT_NEW_BIALU (cfg
, OP_IADD
, tmp_regi
, MONO_LVREG_MS (tmp_regl
), ins
->sreg1
);
286 } else if (ins
->inst_imm
< 0 && mag
.magic_number
> 0) {
287 /* Same sign, cannot overflow */
288 MONO_EMIT_NEW_BIALU (cfg
, OP_ISUB
, tmp_regi
, MONO_LVREG_MS (tmp_regl
), ins
->sreg1
);
290 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_IMM
, tmp_regi
, tmp_regi
, mag
.shift
);
292 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_IMM
, tmp_regi
, MONO_LVREG_MS (tmp_regl
), mag
.shift
);
294 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_UN_IMM
, ins
->dreg
, tmp_regi
, SIZEOF_REGISTER
* 8 - 1);
295 MONO_EMIT_NEW_BIALU (cfg
, OP_IADD
, ins
->dreg
, ins
->dreg
, tmp_regi
);
297 UnlockedIncrement (&mono_jit_stats
.optimized_divisions
);
301 return allocated_vregs
;
305 * Replaces ins with optimized opcodes.
307 * We can emit to cbb the equivalent instructions which will be used as
308 * replacement for ins, or simply change the fields of ins. Spec needs to
309 * be updated if we silently change the opcode of ins.
311 * Returns TRUE if additional vregs were allocated.
314 mono_strength_reduction_ins (MonoCompile
*cfg
, MonoInst
*ins
, const char **spec
)
316 gboolean allocated_vregs
= FALSE
;
318 /* FIXME: Add long/float */
319 switch (ins
->opcode
) {
322 if (ins
->dreg
== ins
->sreg1
) {
330 #if SIZEOF_REGISTER == 8
334 if (ins
->inst_imm
== 0) {
335 ins
->opcode
= OP_MOVE
;
340 #if SIZEOF_REGISTER == 8
343 if (ins
->inst_imm
== 0) {
344 ins
->opcode
= (ins
->opcode
== OP_LMUL_IMM
) ? OP_I8CONST
: OP_ICONST
;
347 } else if (ins
->inst_imm
== 1) {
348 ins
->opcode
= OP_MOVE
;
349 } else if ((ins
->opcode
== OP_IMUL_IMM
) && (ins
->inst_imm
== -1)) {
350 ins
->opcode
= OP_INEG
;
351 } else if ((ins
->opcode
== OP_LMUL_IMM
) && (ins
->inst_imm
== -1)) {
352 ins
->opcode
= OP_LNEG
;
354 int power2
= mono_is_power_of_two (ins
->inst_imm
);
356 ins
->opcode
= (ins
->opcode
== OP_MUL_IMM
) ? OP_SHL_IMM
: ((ins
->opcode
== OP_LMUL_IMM
) ? OP_LSHL_IMM
: OP_ISHL_IMM
);
357 ins
->inst_imm
= power2
;
361 case OP_IREM_UN_IMM
: {
362 int power2
= mono_is_power_of_two (ins
->inst_imm
);
365 ins
->opcode
= OP_IAND_IMM
;
367 ins
->inst_imm
= (1 << power2
) - 1;
373 if ((!COMPILE_LLVM (cfg
)) && (!cfg
->backend
->optimized_div
))
374 allocated_vregs
= mono_strength_reduction_division (cfg
, ins
);
377 #if SIZEOF_REGISTER == 8
381 int power
= mono_is_power_of_two (ins
->inst_imm
);
382 if (ins
->inst_imm
== 1) {
383 ins
->opcode
= OP_ICONST
;
384 MONO_INST_NULLIFY_SREGS (ins
);
386 } else if ((ins
->inst_imm
> 0) && (ins
->inst_imm
< (1LL << 32)) &&
387 (power
!= -1) && (!cfg
->backend
->optimized_div
)) {
388 gboolean is_long
= ins
->opcode
== OP_LREM_IMM
;
389 int compensator_reg
= alloc_ireg (cfg
);
390 int intermediate_reg
;
392 /* Based on gcc code */
394 /* Add compensation for negative numerators */
397 intermediate_reg
= compensator_reg
;
398 MONO_EMIT_NEW_BIALU_IMM (cfg
, is_long
? OP_LSHR_IMM
: OP_ISHR_IMM
, intermediate_reg
, ins
->sreg1
, is_long
? 63 : 31);
400 intermediate_reg
= ins
->sreg1
;
403 MONO_EMIT_NEW_BIALU_IMM (cfg
, is_long
? OP_LSHR_UN_IMM
: OP_ISHR_UN_IMM
, compensator_reg
, intermediate_reg
, (is_long
? 64 : 32) - power
);
404 MONO_EMIT_NEW_BIALU (cfg
, is_long
? OP_LADD
: OP_IADD
, ins
->dreg
, ins
->sreg1
, compensator_reg
);
405 /* Compute remainder */
406 MONO_EMIT_NEW_BIALU_IMM (cfg
, is_long
? OP_LAND_IMM
: OP_AND_IMM
, ins
->dreg
, ins
->dreg
, (1 << power
) - 1);
407 /* Remove compensation */
408 MONO_EMIT_NEW_BIALU (cfg
, is_long
? OP_LSUB
: OP_ISUB
, ins
->dreg
, ins
->dreg
, compensator_reg
);
410 allocated_vregs
= TRUE
;
414 #if SIZEOF_REGISTER == 4
416 if (COMPILE_LLVM (cfg
))
418 if (ins
->inst_c1
== 32) {
419 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, MONO_LVREG_LS (ins
->dreg
), MONO_LVREG_MS (ins
->sreg1
));
420 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_IMM
, MONO_LVREG_MS (ins
->dreg
), MONO_LVREG_MS (ins
->sreg1
), 31);
421 } else if (ins
->inst_c1
== 0) {
422 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, MONO_LVREG_LS (ins
->dreg
), MONO_LVREG_LS (ins
->sreg1
));
423 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, MONO_LVREG_MS (ins
->dreg
), MONO_LVREG_MS (ins
->sreg1
));
424 } else if (ins
->inst_c1
> 32) {
425 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_IMM
, MONO_LVREG_LS (ins
->dreg
), MONO_LVREG_MS (ins
->sreg1
), ins
->inst_c1
- 32);
426 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_IMM
, MONO_LVREG_MS (ins
->dreg
), MONO_LVREG_MS (ins
->sreg1
), 31);
428 guint32 tmpreg
= alloc_ireg (cfg
);
429 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHL_IMM
, tmpreg
, MONO_LVREG_MS (ins
->sreg1
), 32 - ins
->inst_c1
);
430 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_IMM
, MONO_LVREG_MS (ins
->dreg
), MONO_LVREG_MS (ins
->sreg1
), ins
->inst_c1
);
431 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_UN_IMM
, MONO_LVREG_LS (ins
->dreg
), MONO_LVREG_LS (ins
->sreg1
), ins
->inst_c1
);
432 MONO_EMIT_NEW_BIALU (cfg
, OP_IOR
, MONO_LVREG_LS (ins
->dreg
), MONO_LVREG_LS (ins
->dreg
), tmpreg
);
433 allocated_vregs
= TRUE
;
437 case OP_LSHR_UN_IMM
: {
438 if (COMPILE_LLVM (cfg
))
440 if (ins
->inst_c1
== 32) {
441 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, MONO_LVREG_LS (ins
->dreg
), MONO_LVREG_MS (ins
->sreg1
));
442 MONO_EMIT_NEW_ICONST (cfg
, MONO_LVREG_MS (ins
->dreg
), 0);
443 } else if (ins
->inst_c1
== 0) {
444 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, MONO_LVREG_LS (ins
->dreg
), MONO_LVREG_LS (ins
->sreg1
));
445 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, MONO_LVREG_MS (ins
->dreg
), MONO_LVREG_MS (ins
->sreg1
));
446 } else if (ins
->inst_c1
> 32) {
447 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_UN_IMM
, MONO_LVREG_LS (ins
->dreg
), MONO_LVREG_MS (ins
->sreg1
), ins
->inst_c1
- 32);
448 MONO_EMIT_NEW_ICONST (cfg
, MONO_LVREG_MS (ins
->dreg
), 0);
450 guint32 tmpreg
= alloc_ireg (cfg
);
451 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHL_IMM
, tmpreg
, MONO_LVREG_MS (ins
->sreg1
), 32 - ins
->inst_c1
);
452 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_UN_IMM
, MONO_LVREG_MS (ins
->dreg
), MONO_LVREG_MS (ins
->sreg1
), ins
->inst_c1
);
453 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_UN_IMM
, MONO_LVREG_LS (ins
->dreg
), MONO_LVREG_LS (ins
->sreg1
), ins
->inst_c1
);
454 MONO_EMIT_NEW_BIALU (cfg
, OP_IOR
, MONO_LVREG_LS (ins
->dreg
), MONO_LVREG_LS (ins
->dreg
), tmpreg
);
455 allocated_vregs
= TRUE
;
460 if (COMPILE_LLVM (cfg
))
462 if (ins
->inst_c1
== 32) {
463 /* just move the lower half to the upper and zero the lower word */
464 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, MONO_LVREG_MS (ins
->dreg
), MONO_LVREG_LS (ins
->sreg1
));
465 MONO_EMIT_NEW_ICONST (cfg
, MONO_LVREG_LS (ins
->dreg
), 0);
466 } else if (ins
->inst_c1
== 0) {
467 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, MONO_LVREG_LS (ins
->dreg
), MONO_LVREG_LS (ins
->sreg1
));
468 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, MONO_LVREG_MS (ins
->dreg
), MONO_LVREG_MS (ins
->sreg1
));
469 } else if (ins
->inst_c1
> 32) {
470 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHL_IMM
, MONO_LVREG_MS (ins
->dreg
), MONO_LVREG_LS (ins
->sreg1
), ins
->inst_c1
- 32);
471 MONO_EMIT_NEW_ICONST (cfg
, MONO_LVREG_LS (ins
->dreg
), 0);
473 guint32 tmpreg
= alloc_ireg (cfg
);
474 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHR_UN_IMM
, tmpreg
, MONO_LVREG_LS (ins
->sreg1
), 32 - ins
->inst_c1
);
475 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHL_IMM
, MONO_LVREG_MS (ins
->dreg
), MONO_LVREG_MS (ins
->sreg1
), ins
->inst_c1
);
476 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_ISHL_IMM
, MONO_LVREG_LS (ins
->dreg
), MONO_LVREG_LS (ins
->sreg1
), ins
->inst_c1
);
477 MONO_EMIT_NEW_BIALU (cfg
, OP_IOR
, MONO_LVREG_MS (ins
->dreg
), MONO_LVREG_MS (ins
->dreg
), tmpreg
);
478 allocated_vregs
= TRUE
;
488 *spec
= INS_INFO (ins
->opcode
);
489 return allocated_vregs
;
495 * A combined local copy and constant propagation pass.
498 mono_local_cprop (MonoCompile
*cfg
)
500 MonoBasicBlock
*bb
, *bb_opt
;
504 int filter
= FILTER_IL_SEQ_POINT
;
505 int initial_max_vregs
= cfg
->next_vreg
;
507 max
= cfg
->next_vreg
;
508 defs
= (MonoInst
**)mono_mempool_alloc (cfg
->mempool
, sizeof (MonoInst
*) * cfg
->next_vreg
);
509 def_index
= (gint32
*)mono_mempool_alloc (cfg
->mempool
, sizeof (guint32
) * cfg
->next_vreg
);
510 cfg
->cbb
= bb_opt
= mono_mempool_alloc0 ((cfg
)->mempool
, sizeof (MonoBasicBlock
));
512 for (bb
= cfg
->bb_entry
; bb
; bb
= bb
->next_bb
) {
517 /* Manually init the defs entries used by the bblock */
518 MONO_BB_FOR_EACH_INS (bb
, ins
) {
519 int sregs
[MONO_MAX_SRC_REGS
];
522 if (ins
->dreg
!= -1) {
523 #if SIZEOF_REGISTER == 4
524 const char *spec
= INS_INFO (ins
->opcode
);
525 if (spec
[MONO_INST_DEST
] == 'l') {
526 defs
[ins
->dreg
+ 1] = NULL
;
527 defs
[ins
->dreg
+ 2] = NULL
;
530 defs
[ins
->dreg
] = NULL
;
533 num_sregs
= mono_inst_get_src_registers (ins
, sregs
);
534 for (i
= 0; i
< num_sregs
; ++i
) {
535 int sreg
= sregs
[i
];
536 #if SIZEOF_REGISTER == 4
537 const char *spec
= INS_INFO (ins
->opcode
);
538 if (spec
[MONO_INST_SRC1
+ i
] == 'l') {
539 defs
[sreg
+ 1] = NULL
;
540 defs
[sreg
+ 2] = NULL
;
548 last_call_index
= -1;
549 MONO_BB_FOR_EACH_INS (bb
, ins
) {
550 const char *spec
= INS_INFO (ins
->opcode
);
551 int regtype
, srcindex
, sreg
;
553 int sregs
[MONO_MAX_SRC_REGS
];
555 if (ins
->opcode
== OP_NOP
) {
556 MONO_DELETE_INS (bb
, ins
);
560 g_assert (ins
->opcode
> MONO_CEE_LAST
);
562 /* FIXME: Optimize this */
563 if (ins
->opcode
== OP_LDADDR
) {
564 MonoInst
*var
= (MonoInst
*)ins
->inst_p0
;
566 defs
[var
->dreg
] = NULL
;
568 if (!MONO_TYPE_ISSTRUCT (var->inst_vtype))
573 if (MONO_IS_STORE_MEMBASE (ins
)) {
577 if ((regtype
== 'i') && (sreg
!= -1) && defs
[sreg
]) {
578 MonoInst
*def
= defs
[sreg
];
580 if ((def
->opcode
== OP_MOVE
) && (!defs
[def
->sreg1
] || (def_index
[def
->sreg1
] < def_index
[sreg
])) && !vreg_is_volatile (cfg
, def
->sreg1
)) {
581 int vreg
= def
->sreg1
;
582 if (cfg
->verbose_level
> 2) printf ("CCOPY: R%d -> R%d\n", sreg
, vreg
);
588 num_sregs
= mono_inst_get_src_registers (ins
, sregs
);
589 for (srcindex
= 0; srcindex
< num_sregs
; ++srcindex
) {
592 mono_inst_get_src_registers (ins
, sregs
);
594 regtype
= spec
[MONO_INST_SRC1
+ srcindex
];
595 sreg
= sregs
[srcindex
];
597 if ((regtype
== ' ') || (sreg
== -1) || (!defs
[sreg
]))
602 /* Copy propagation */
604 * The first check makes sure the source of the copy did not change since
606 * The second check avoids volatile variables.
607 * The third check avoids copy propagating local vregs through a call,
608 * since the lvreg will be spilled
609 * The fourth check avoids copy propagating a vreg in cases where
610 * it would be eliminated anyway by reverse copy propagation later,
611 * because propagating it would create another use for it, thus making
612 * it impossible to use reverse copy propagation.
614 /* Enabling this for floats trips up the fp stack */
616 * Enabling this for floats on amd64 seems to cause a failure in
617 * basic-math.cs, most likely because it gets rid of some r8->r4
620 if (MONO_IS_MOVE (def
) &&
621 (!defs
[def
->sreg1
] || (def_index
[def
->sreg1
] < def_index
[sreg
])) &&
622 !vreg_is_volatile (cfg
, def
->sreg1
) &&
623 /* This avoids propagating local vregs across calls */
624 ((get_vreg_to_inst (cfg
, def
->sreg1
) || !defs
[def
->sreg1
] || (def_index
[def
->sreg1
] >= last_call_index
) || (def
->opcode
== OP_VMOVE
))) &&
625 !(defs
[def
->sreg1
] && mono_inst_next (defs
[def
->sreg1
], filter
) == def
) &&
626 (!MONO_ARCH_USE_FPSTACK
|| (def
->opcode
!= OP_FMOVE
)) &&
627 (def
->opcode
!= OP_FMOVE
)) {
628 int vreg
= def
->sreg1
;
630 if (cfg
->verbose_level
> 2) printf ("CCOPY/2: R%d -> R%d\n", sreg
, vreg
);
631 sregs
[srcindex
] = vreg
;
632 mono_inst_set_src_registers (ins
, sregs
);
634 /* Allow further iterations */
639 /* Constant propagation */
640 /* is_inst_imm is only needed for binops */
641 if ((((def
->opcode
== OP_ICONST
) || ((sizeof (gpointer
) == 8) && (def
->opcode
== OP_I8CONST
)) || (def
->opcode
== OP_PCONST
)))
643 (!MONO_ARCH_USE_FPSTACK
&& (def
->opcode
== OP_R8CONST
))) {
646 /* srcindex == 1 -> binop, ins->sreg2 == -1 -> unop */
647 if ((srcindex
== 1) && (ins
->sreg1
!= -1) && defs
[ins
->sreg1
] &&
648 ((defs
[ins
->sreg1
]->opcode
== OP_ICONST
) || defs
[ins
->sreg1
]->opcode
== OP_PCONST
) &&
650 /* Both arguments are constants, perform cfold */
651 mono_constant_fold_ins (cfg
, ins
, defs
[ins
->sreg1
], defs
[ins
->sreg2
], TRUE
);
652 } else if ((srcindex
== 0) && (ins
->sreg2
!= -1) && defs
[ins
->sreg2
]) {
653 /* Arg 1 is constant, swap arguments if possible */
654 int opcode
= ins
->opcode
;
655 mono_constant_fold_ins (cfg
, ins
, defs
[ins
->sreg1
], defs
[ins
->sreg2
], TRUE
);
656 if (ins
->opcode
!= opcode
) {
657 /* Allow further iterations */
661 } else if ((srcindex
== 0) && (ins
->sreg2
== -1)) {
662 /* Constant unop, perform cfold */
663 mono_constant_fold_ins (cfg
, ins
, defs
[ins
->sreg1
], NULL
, TRUE
);
666 opcode2
= mono_op_to_op_imm (ins
->opcode
);
667 if ((opcode2
!= -1) && mono_arch_is_inst_imm (ins
->opcode
, opcode2
, def
->inst_c0
) && ((srcindex
== 1) || (ins
->sreg2
== -1))) {
668 ins
->opcode
= opcode2
;
669 if ((def
->opcode
== OP_I8CONST
) && TARGET_SIZEOF_VOID_P
== 4)
670 ins
->inst_l
= def
->inst_l
;
671 else if (regtype
== 'l' && TARGET_SIZEOF_VOID_P
== 4)
672 /* This can happen if the def was a result of an iconst+conv.i8, which is transformed into just an iconst */
673 ins
->inst_l
= def
->inst_c0
;
675 ins
->inst_imm
= def
->inst_c0
;
676 sregs
[srcindex
] = -1;
677 mono_inst_set_src_registers (ins
, sregs
);
679 if ((opcode2
== OP_VOIDCALL
) || (opcode2
== OP_CALL
) || (opcode2
== OP_LCALL
) || (opcode2
== OP_FCALL
))
680 ((MonoCallInst
*)ins
)->fptr
= (gpointer
)(uintptr_t)ins
->inst_imm
;
682 /* Allow further iterations */
688 #if defined(TARGET_X86) || defined(TARGET_AMD64)
689 if ((ins
->opcode
== OP_X86_LEA
) && (srcindex
== 1)) {
690 #if SIZEOF_REGISTER == 8
691 /* FIXME: Use OP_PADD_IMM when the new JIT is done */
692 ins
->opcode
= OP_LADD_IMM
;
694 ins
->opcode
= OP_ADD_IMM
;
696 ins
->inst_imm
+= def
->inst_c0
<< ins
->backend
.shift_amount
;
700 opcode2
= mono_load_membase_to_load_mem (ins
->opcode
);
701 if ((srcindex
== 0) && (opcode2
!= -1) && mono_arch_is_inst_imm (ins
->opcode
, opcode2
, def
->inst_c0
)) {
702 ins
->opcode
= opcode2
;
703 ins
->inst_imm
= def
->inst_c0
+ ins
->inst_offset
;
708 else if (((def
->opcode
== OP_ADD_IMM
) || (def
->opcode
== OP_LADD_IMM
)) && (MONO_IS_LOAD_MEMBASE (ins
) || MONO_ARCH_IS_OP_MEMBASE (ins
->opcode
))) {
709 /* ADD_IMM is created by spill_global_vars */
711 * We have to guarantee that def->sreg1 haven't changed since def->dreg
712 * was defined. cfg->frame_reg is assumed to remain constant.
714 if ((def
->sreg1
== cfg
->frame_reg
) || ((mono_inst_next (def
, filter
) == ins
) && (def
->dreg
!= def
->sreg1
))) {
715 ins
->inst_basereg
= def
->sreg1
;
716 ins
->inst_offset
+= def
->inst_imm
;
718 } else if ((ins
->opcode
== OP_ISUB_IMM
) && (def
->opcode
== OP_IADD_IMM
) && (mono_inst_next (def
, filter
) == ins
) && (def
->dreg
!= def
->sreg1
)) {
719 ins
->sreg1
= def
->sreg1
;
720 ins
->inst_imm
-= def
->inst_imm
;
721 } else if ((ins
->opcode
== OP_IADD_IMM
) && (def
->opcode
== OP_ISUB_IMM
) && (mono_inst_next (def
, filter
) == ins
) && (def
->dreg
!= def
->sreg1
)) {
722 ins
->sreg1
= def
->sreg1
;
723 ins
->inst_imm
-= def
->inst_imm
;
724 } else if (ins
->opcode
== OP_STOREI1_MEMBASE_REG
&&
725 (def
->opcode
== OP_ICONV_TO_U1
|| def
->opcode
== OP_ICONV_TO_I1
|| def
->opcode
== OP_SEXT_I4
|| (SIZEOF_REGISTER
== 8 && def
->opcode
== OP_LCONV_TO_U1
)) &&
726 (!defs
[def
->sreg1
] || (def_index
[def
->sreg1
] < def_index
[sreg
]))) {
727 /* Avoid needless sign extension */
728 ins
->sreg1
= def
->sreg1
;
729 } else if (ins
->opcode
== OP_STOREI2_MEMBASE_REG
&&
730 (def
->opcode
== OP_ICONV_TO_U2
|| def
->opcode
== OP_ICONV_TO_I2
|| def
->opcode
== OP_SEXT_I4
|| (SIZEOF_REGISTER
== 8 && def
->opcode
== OP_LCONV_TO_I2
)) &&
731 (!defs
[def
->sreg1
] || (def_index
[def
->sreg1
] < def_index
[sreg
]))) {
732 /* Avoid needless sign extension */
733 ins
->sreg1
= def
->sreg1
;
734 } else if (ins
->opcode
== OP_COMPARE_IMM
&& def
->opcode
== OP_LDADDR
&& ins
->inst_imm
== 0) {
737 memset (&dummy_arg1
, 0, sizeof (MonoInst
));
738 dummy_arg1
.opcode
= OP_ICONST
;
739 dummy_arg1
.inst_c0
= 1;
741 mono_constant_fold_ins (cfg
, ins
, &dummy_arg1
, NULL
, TRUE
);
742 } else if (srcindex
== 0 && ins
->opcode
== OP_COMPARE
&& defs
[ins
->sreg1
]->opcode
== OP_PCONST
&& defs
[ins
->sreg2
] && defs
[ins
->sreg2
]->opcode
== OP_PCONST
) {
743 /* typeof(T) == typeof(..) */
744 mono_constant_fold_ins (cfg
, ins
, defs
[ins
->sreg1
], defs
[ins
->sreg2
], TRUE
);
745 } else if (ins
->opcode
== OP_MOVE
&& def
->opcode
== OP_LDADDR
) {
746 ins
->opcode
= OP_LDADDR
;
748 ins
->inst_p0
= def
->inst_p0
;
749 ins
->klass
= def
->klass
;
753 g_assert (cfg
->cbb
== bb_opt
);
754 g_assert (!bb_opt
->code
);
755 /* Do strength reduction here */
756 if (mono_strength_reduction_ins (cfg
, ins
, &spec
) && max
< cfg
->next_vreg
) {
757 MonoInst
**defs_prev
= defs
;
758 gint32
*def_index_prev
= def_index
;
759 guint32 prev_max
= max
;
760 guint32 additional_vregs
= cfg
->next_vreg
- initial_max_vregs
;
762 /* We have more vregs so we need to reallocate defs and def_index arrays */
763 max
= initial_max_vregs
+ additional_vregs
* 2;
764 defs
= (MonoInst
**)mono_mempool_alloc (cfg
->mempool
, sizeof (MonoInst
*) * max
);
765 def_index
= (gint32
*)mono_mempool_alloc (cfg
->mempool
, sizeof (guint32
) * max
);
767 /* Keep the entries for the previous vregs, zero the rest */
768 memcpy (defs
, defs_prev
, sizeof (MonoInst
*) * prev_max
);
769 memset (defs
+ prev_max
, 0, sizeof (MonoInst
*) * (max
- prev_max
));
770 memcpy (def_index
, def_index_prev
, sizeof (guint32
) * prev_max
);
771 memset (def_index
+ prev_max
, 0, sizeof (guint32
) * (max
- prev_max
));
774 if (cfg
->cbb
->code
|| (cfg
->cbb
!= bb_opt
)) {
775 MonoInst
*saved_prev
= ins
->prev
;
777 /* If we have code in cbb, we need to replace ins with the decomposition */
778 mono_replace_ins (cfg
, bb
, ins
, &ins
->prev
, bb_opt
, cfg
->cbb
);
779 bb_opt
->code
= bb_opt
->last_ins
= NULL
;
780 bb_opt
->in_count
= bb_opt
->out_count
= 0;
784 /* first instruction of basic block got replaced, so create
785 * dummy inst that points to start of basic block */
786 MONO_INST_NEW (cfg
, saved_prev
, OP_NOP
);
787 saved_prev
= bb
->code
;
789 /* ins is hanging, continue scanning the emitted code */
794 if (spec
[MONO_INST_DEST
] != ' ') {
795 MonoInst
*def
= defs
[ins
->dreg
];
797 if (def
&& (def
->opcode
== OP_ADD_IMM
) && (def
->sreg1
== cfg
->frame_reg
) && (MONO_IS_STORE_MEMBASE (ins
))) {
798 /* ADD_IMM is created by spill_global_vars */
799 /* cfg->frame_reg is assumed to remain constant */
800 ins
->inst_destbasereg
= def
->sreg1
;
801 ins
->inst_offset
+= def
->inst_imm
;
804 if (!MONO_IS_STORE_MEMBASE (ins
) && !vreg_is_volatile (cfg
, ins
->dreg
)) {
805 defs
[ins
->dreg
] = ins
;
806 def_index
[ins
->dreg
] = ins_index
;
810 if (MONO_IS_CALL (ins
))
811 last_call_index
= ins_index
;
819 reg_is_softreg_no_fpstack (int reg
, const char spec
)
821 return (spec
== 'i' && reg
>= MONO_MAX_IREGS
)
822 || ((spec
== 'f' && reg
>= MONO_MAX_FREGS
) && !MONO_ARCH_USE_FPSTACK
)
823 #ifdef MONO_ARCH_SIMD_INTRINSICS
824 || (spec
== 'x' && reg
>= MONO_MAX_XREGS
)
830 reg_is_softreg (int reg
, const char spec
)
832 return (spec
== 'i' && reg
>= MONO_MAX_IREGS
)
833 || (spec
== 'f' && reg
>= MONO_MAX_FREGS
)
834 #ifdef MONO_ARCH_SIMD_INTRINSICS
835 || (spec
== 'x' && reg
>= MONO_MAX_XREGS
)
841 mono_is_simd_accessor (MonoInst
*ins
)
843 switch (ins
->opcode
) {
844 #ifdef MONO_ARCH_SIMD_INTRINSICS
852 case OP_INSERTX_U1_SLOW
:
853 case OP_INSERTX_I4_SLOW
:
854 case OP_INSERTX_R4_SLOW
:
855 case OP_INSERTX_R8_SLOW
:
856 case OP_INSERTX_I8_SLOW
:
867 * Get rid of the dead assignments to local vregs like the ones created by the
871 mono_local_deadce (MonoCompile
*cfg
)
874 MonoInst
*ins
, *prev
;
875 MonoBitSet
*used
, *defined
;
877 //mono_print_code (cfg, "BEFORE LOCAL-DEADCE");
880 * Assignments to global vregs can't be eliminated so this pass must come
881 * after the handle_global_vregs () pass.
884 used
= mono_bitset_mp_new_noinit (cfg
->mempool
, cfg
->next_vreg
+ 1);
885 defined
= mono_bitset_mp_new_noinit (cfg
->mempool
, cfg
->next_vreg
+ 1);
887 /* First pass: collect liveness info */
888 for (bb
= cfg
->bb_entry
; bb
; bb
= bb
->next_bb
) {
889 /* Manually init the defs entries used by the bblock */
890 MONO_BB_FOR_EACH_INS (bb
, ins
) {
891 const char *spec
= INS_INFO (ins
->opcode
);
892 int sregs
[MONO_MAX_SRC_REGS
];
895 if (spec
[MONO_INST_DEST
] != ' ') {
896 mono_bitset_clear_fast (used
, ins
->dreg
);
897 mono_bitset_clear_fast (defined
, ins
->dreg
);
898 #if SIZEOF_REGISTER == 4
900 mono_bitset_clear_fast (used
, ins
->dreg
+ 1);
901 mono_bitset_clear_fast (defined
, ins
->dreg
+ 1);
904 num_sregs
= mono_inst_get_src_registers (ins
, sregs
);
905 for (i
= 0; i
< num_sregs
; ++i
) {
906 mono_bitset_clear_fast (used
, sregs
[i
]);
907 #if SIZEOF_REGISTER == 4
908 mono_bitset_clear_fast (used
, sregs
[i
] + 1);
914 * Make a reverse pass over the instruction list
916 MONO_BB_FOR_EACH_INS_REVERSE_SAFE (bb
, prev
, ins
) {
917 const char *spec
= INS_INFO (ins
->opcode
);
918 int sregs
[MONO_MAX_SRC_REGS
];
920 MonoInst
*prev_f
= mono_inst_prev (ins
, FILTER_NOP
| FILTER_IL_SEQ_POINT
);
922 if (ins
->opcode
== OP_NOP
) {
923 MONO_DELETE_INS (bb
, ins
);
927 g_assert (ins
->opcode
> MONO_CEE_LAST
);
929 if (MONO_IS_NON_FP_MOVE (ins
) && prev_f
) {
934 spec2
= INS_INFO (def
->opcode
);
937 * Perform a limited kind of reverse copy propagation, i.e.
938 * transform B <- FOO; A <- B into A <- FOO
939 * This isn't copyprop, not deadce, but it can only be performed
940 * after handle_global_vregs () has run.
942 if (!get_vreg_to_inst (cfg
, ins
->sreg1
) && (spec2
[MONO_INST_DEST
] != ' ') && (def
->dreg
== ins
->sreg1
) && !mono_bitset_test_fast (used
, ins
->sreg1
) && !MONO_IS_STORE_MEMBASE (def
) && reg_is_softreg (ins
->sreg1
, spec
[MONO_INST_DEST
]) && !mono_is_simd_accessor (def
)) {
943 if (cfg
->verbose_level
> 2) {
944 printf ("\tReverse copyprop in BB%d on ", bb
->block_num
);
945 mono_print_ins (ins
);
948 def
->dreg
= ins
->dreg
;
949 MONO_DELETE_INS (bb
, ins
);
950 spec
= INS_INFO (ins
->opcode
);
954 /* Enabling this on x86 could screw up the fp stack */
955 if (reg_is_softreg_no_fpstack (ins
->dreg
, spec
[MONO_INST_DEST
])) {
957 * Assignments to global vregs can only be eliminated if there is another
958 * assignment to the same vreg later in the same bblock.
960 if (!mono_bitset_test_fast (used
, ins
->dreg
) &&
961 (!get_vreg_to_inst (cfg
, ins
->dreg
) || (!bb
->extended
&& !vreg_is_volatile (cfg
, ins
->dreg
) && mono_bitset_test_fast (defined
, ins
->dreg
))) &&
962 MONO_INS_HAS_NO_SIDE_EFFECT (ins
)) {
963 /* Happens with CMOV instructions */
964 if (prev_f
&& prev_f
->opcode
== OP_ICOMPARE_IMM
) {
965 MonoInst
*prev
= prev_f
;
967 * Can't use DELETE_INS since that would interfere with the
972 //printf ("DEADCE: "); mono_print_ins (ins);
973 MONO_DELETE_INS (bb
, ins
);
974 spec
= INS_INFO (ins
->opcode
);
977 if (spec
[MONO_INST_DEST
] != ' ')
978 mono_bitset_clear_fast (used
, ins
->dreg
);
981 if (spec
[MONO_INST_DEST
] != ' ')
982 mono_bitset_set_fast (defined
, ins
->dreg
);
983 num_sregs
= mono_inst_get_src_registers (ins
, sregs
);
984 for (i
= 0; i
< num_sregs
; ++i
)
985 mono_bitset_set_fast (used
, sregs
[i
]);
986 if (MONO_IS_STORE_MEMBASE (ins
))
987 mono_bitset_set_fast (used
, ins
->dreg
);
989 if (MONO_IS_CALL (ins
)) {
990 MonoCallInst
*call
= (MonoCallInst
*)ins
;
993 if (call
->out_ireg_args
) {
994 for (l
= call
->out_ireg_args
; l
; l
= l
->next
) {
995 guint32 regpair
, reg
;
997 regpair
= (guint32
)(gssize
)(l
->data
);
998 reg
= regpair
& 0xffffff;
1000 mono_bitset_set_fast (used
, reg
);
1004 if (call
->out_freg_args
) {
1005 for (l
= call
->out_freg_args
; l
; l
= l
->next
) {
1006 guint32 regpair
, reg
;
1008 regpair
= (guint32
)(gssize
)(l
->data
);
1009 reg
= regpair
& 0xffffff;
1011 mono_bitset_set_fast (used
, reg
);
1018 //mono_print_code (cfg, "AFTER LOCAL-DEADCE");
1021 #else /* !DISABLE_JIT */
1023 MONO_EMPTY_SOURCE_FILE (local_propagation
);
1025 #endif /* !DISABLE_JIT */