[ci] Bump timeout in ms-test-suite
[mono-project.git] / mono / mini / local-propagation.c
blobd8f62355b8ef1ecc69e4195fb804ef1f305fa687
1 /*
2 * local-propagation.c: Local constant, copy and tree propagation.
4 * To make some sense of the tree mover, read mono/docs/tree-mover.txt
6 * Author:
7 * Paolo Molaro (lupus@ximian.com)
8 * Dietmar Maurer (dietmar@ximian.com)
9 * Massimiliano Mantione (massi@ximian.com)
11 * (C) 2006 Novell, Inc. http://www.novell.com
12 * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
13 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
16 #include <config.h>
17 #include <mono/utils/mono-compiler.h>
19 #ifndef DISABLE_JIT
21 #include <string.h>
22 #include <stdio.h>
23 #ifdef HAVE_ALLOCA_H
24 #include <alloca.h>
25 #endif
27 #include <mono/metadata/debug-helpers.h>
28 #include <mono/metadata/mempool.h>
29 #include <mono/metadata/opcodes.h>
30 #include "mini.h"
31 #include "ir-emit.h"
33 #ifndef MONO_ARCH_IS_OP_MEMBASE
34 #define MONO_ARCH_IS_OP_MEMBASE(opcode) FALSE
35 #endif
37 static inline MonoBitSet*
38 mono_bitset_mp_new_noinit (MonoMemPool *mp, guint32 max_size)
40 int size = mono_bitset_alloc_size (max_size, 0);
41 gpointer mem;
43 mem = mono_mempool_alloc (mp, size);
44 return mono_bitset_mem_new (mem, max_size, MONO_BITSET_DONT_FREE);
47 struct magic_unsigned {
48 guint32 magic_number;
49 gboolean addition;
50 int shift;
53 struct magic_signed {
54 gint32 magic_number;
55 int shift;
58 /* http://www.hackersdelight.org/hdcodetxt/magicu.c.txt */
59 static struct magic_unsigned
60 compute_magic_unsigned (guint32 divisor) {
61 guint32 nc, delta, q1, r1, q2, r2;
62 struct magic_unsigned magu;
63 gboolean gt = FALSE;
64 int p;
66 magu.addition = 0;
67 nc = -1 - (-divisor) % divisor;
68 p = 31;
69 q1 = 0x80000000 / nc;
70 r1 = 0x80000000 - q1 * nc;
71 q2 = 0x7FFFFFFF / divisor;
72 r2 = 0x7FFFFFFF - q2 * divisor;
73 do {
74 p = p + 1;
75 if (q1 >= 0x80000000)
76 gt = TRUE;
77 if (r1 >= nc - r1) {
78 q1 = 2 * q1 + 1;
79 r1 = 2 * r1 - nc;
80 } else {
81 q1 = 2 * q1;
82 r1 = 2 * r1;
84 if (r2 + 1 >= divisor - r2) {
85 if (q2 >= 0x7FFFFFFF)
86 magu.addition = 1;
87 q2 = 2 * q2 + 1;
88 r2 = 2 * r2 + 1 - divisor;
89 } else {
90 if (q2 >= 0x80000000)
91 magu.addition = 1;
92 q2 = 2 * q2;
93 r2 = 2 * r2 + 1;
95 delta = divisor - 1 - r2;
96 } while (!gt && (q1 < delta || (q1 == delta && r1 == 0)));
98 magu.magic_number = q2 + 1;
99 magu.shift = p - 32;
100 return magu;
103 /* http://www.hackersdelight.org/hdcodetxt/magic.c.txt */
104 static struct magic_signed
105 compute_magic_signed (gint32 divisor) {
106 int p;
107 guint32 ad, anc, delta, q1, r1, q2, r2, t;
108 const guint32 two31 = 0x80000000;
109 struct magic_signed mag;
111 ad = abs (divisor);
112 t = two31 + ((unsigned)divisor >> 31);
113 anc = t - 1 - t % ad;
114 p = 31;
115 q1 = two31 / anc;
116 r1 = two31 - q1 * anc;
117 q2 = two31 / ad;
118 r2 = two31 - q2 * ad;
119 do {
120 p++;
121 q1 *= 2;
122 r1 *= 2;
123 if (r1 >= anc) {
124 q1++;
125 r1 -= anc;
128 q2 *= 2;
129 r2 *= 2;
131 if (r2 >= ad) {
132 q2++;
133 r2 -= ad;
136 delta = ad - r2;
137 } while (q1 < delta || (q1 == delta && r1 == 0));
139 mag.magic_number = q2 + 1;
140 if (divisor < 0)
141 mag.magic_number = -mag.magic_number;
142 mag.shift = p - 32;
143 return mag;
146 static gboolean
147 mono_strength_reduction_division (MonoCompile *cfg, MonoInst *ins)
149 gboolean allocated_vregs = FALSE;
151 * We don't use it on 32bit systems because on those
152 * platforms we emulate long multiplication, driving the
153 * performance back down.
155 switch (ins->opcode) {
156 case OP_IDIV_UN_IMM: {
157 guint32 tmp_regl;
158 #if SIZEOF_REGISTER == 8
159 guint32 dividend_reg;
160 #else
161 guint32 tmp_regi;
162 #endif
163 struct magic_unsigned mag;
164 int power2 = mono_is_power_of_two (ins->inst_imm);
166 /* The decomposition doesn't handle exception throwing */
167 if (ins->inst_imm == 0)
168 break;
170 if (power2 >= 0) {
171 ins->opcode = OP_ISHR_UN_IMM;
172 ins->sreg2 = -1;
173 ins->inst_imm = power2;
174 break;
176 if (cfg->backend->disable_div_with_mul)
177 break;
178 allocated_vregs = TRUE;
180 * Replacement of unsigned division with multiplication,
181 * shifts and additions Hacker's Delight, chapter 10-10.
183 mag = compute_magic_unsigned (ins->inst_imm);
184 tmp_regl = alloc_lreg (cfg);
185 #if SIZEOF_REGISTER == 8
186 dividend_reg = alloc_lreg (cfg);
187 MONO_EMIT_NEW_I8CONST (cfg, tmp_regl, mag.magic_number);
188 MONO_EMIT_NEW_UNALU (cfg, OP_ZEXT_I4, dividend_reg, ins->sreg1);
189 MONO_EMIT_NEW_BIALU (cfg, OP_LMUL, tmp_regl, dividend_reg, tmp_regl);
190 if (mag.addition) {
191 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, tmp_regl, tmp_regl, 32);
192 MONO_EMIT_NEW_BIALU (cfg, OP_LADD, tmp_regl, tmp_regl, dividend_reg);
193 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, ins->dreg, tmp_regl, mag.shift);
194 } else {
195 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, ins->dreg, tmp_regl, 32 + mag.shift);
197 #else
198 tmp_regi = alloc_ireg (cfg);
199 MONO_EMIT_NEW_ICONST (cfg, tmp_regi, mag.magic_number);
200 MONO_EMIT_NEW_BIALU (cfg, OP_BIGMUL_UN, tmp_regl, ins->sreg1, tmp_regi);
201 /* Long shifts below will be decomposed during cprop */
202 if (mag.addition) {
203 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, tmp_regl, tmp_regl, 32);
204 MONO_EMIT_NEW_BIALU (cfg, OP_IADDCC, MONO_LVREG_LS (tmp_regl), MONO_LVREG_LS (tmp_regl), ins->sreg1);
205 /* MONO_LVREG_MS (tmp_reg) is 0, save in it the carry */
206 MONO_EMIT_NEW_BIALU (cfg, OP_IADC, MONO_LVREG_MS (tmp_regl), MONO_LVREG_MS (tmp_regl), MONO_LVREG_MS (tmp_regl));
207 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, tmp_regl, tmp_regl, mag.shift);
208 } else {
209 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, tmp_regl, tmp_regl, 32 + mag.shift);
211 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, ins->dreg, MONO_LVREG_LS (tmp_regl));
212 #endif
213 mono_jit_stats.optimized_divisions++;
214 break;
216 case OP_IDIV_IMM: {
217 guint32 tmp_regl;
218 #if SIZEOF_REGISTER == 8
219 guint32 dividend_reg;
220 #else
221 guint32 tmp_regi;
222 #endif
223 struct magic_signed mag;
224 int power2 = mono_is_power_of_two (ins->inst_imm);
225 /* The decomposition doesn't handle exception throwing */
226 /* Optimization with MUL does not apply for -1, 0 and 1 divisors */
227 if (ins->inst_imm == 0 || ins->inst_imm == -1) {
228 break;
229 } else if (ins->inst_imm == 1) {
230 ins->opcode = OP_MOVE;
231 ins->inst_imm = 0;
232 break;
234 allocated_vregs = TRUE;
235 if (power2 == 1) {
236 guint32 r1 = alloc_ireg (cfg);
237 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, r1, ins->sreg1, 31);
238 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, r1, r1, ins->sreg1);
239 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, ins->dreg, r1, 1);
240 break;
241 } else if (power2 > 0 && power2 < 31) {
242 guint32 r1 = alloc_ireg (cfg);
243 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, r1, ins->sreg1, 31);
244 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, r1, r1, (32 - power2));
245 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, r1, r1, ins->sreg1);
246 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, ins->dreg, r1, power2);
247 break;
250 if (cfg->backend->disable_div_with_mul)
251 break;
253 * Replacement of signed division with multiplication,
254 * shifts and additions Hacker's Delight, chapter 10-6.
256 mag = compute_magic_signed (ins->inst_imm);
257 tmp_regl = alloc_lreg (cfg);
258 #if SIZEOF_REGISTER == 8
259 dividend_reg = alloc_lreg (cfg);
260 MONO_EMIT_NEW_I8CONST (cfg, tmp_regl, mag.magic_number);
261 MONO_EMIT_NEW_UNALU (cfg, OP_SEXT_I4, dividend_reg, ins->sreg1);
262 MONO_EMIT_NEW_BIALU (cfg, OP_LMUL, tmp_regl, dividend_reg, tmp_regl);
263 if ((ins->inst_imm > 0 && mag.magic_number < 0) || (ins->inst_imm < 0 && mag.magic_number > 0)) {
264 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_IMM, tmp_regl, tmp_regl, 32);
265 if (ins->inst_imm > 0 && mag.magic_number < 0) {
266 MONO_EMIT_NEW_BIALU (cfg, OP_LADD, tmp_regl, tmp_regl, dividend_reg);
267 } else if (ins->inst_imm < 0 && mag.magic_number > 0) {
268 MONO_EMIT_NEW_BIALU (cfg, OP_LSUB, tmp_regl, tmp_regl, dividend_reg);
270 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_IMM, tmp_regl, tmp_regl, mag.shift);
271 } else {
272 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_IMM, tmp_regl, tmp_regl, 32 + mag.shift);
274 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, ins->dreg, tmp_regl, SIZEOF_REGISTER * 8 - 1);
275 MONO_EMIT_NEW_BIALU (cfg, OP_LADD, ins->dreg, ins->dreg, tmp_regl);
276 #else
277 tmp_regi = alloc_ireg (cfg);
278 MONO_EMIT_NEW_ICONST (cfg, tmp_regi, mag.magic_number);
279 MONO_EMIT_NEW_BIALU (cfg, OP_BIGMUL, tmp_regl, ins->sreg1, tmp_regi);
280 if ((ins->inst_imm > 0 && mag.magic_number < 0) || (ins->inst_imm < 0 && mag.magic_number > 0)) {
281 if (ins->inst_imm > 0 && mag.magic_number < 0) {
282 /* Opposite sign, cannot overflow */
283 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, tmp_regi, MONO_LVREG_MS (tmp_regl), ins->sreg1);
284 } else if (ins->inst_imm < 0 && mag.magic_number > 0) {
285 /* Same sign, cannot overflow */
286 MONO_EMIT_NEW_BIALU (cfg, OP_ISUB, tmp_regi, MONO_LVREG_MS (tmp_regl), ins->sreg1);
288 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, tmp_regi, tmp_regi, mag.shift);
289 } else {
290 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, tmp_regi, MONO_LVREG_MS (tmp_regl), mag.shift);
292 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, ins->dreg, tmp_regi, SIZEOF_REGISTER * 8 - 1);
293 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, ins->dreg, ins->dreg, tmp_regi);
294 #endif
295 mono_jit_stats.optimized_divisions++;
296 break;
299 return allocated_vregs;
303 * Replaces ins with optimized opcodes.
305 * We can emit to cbb the equivalent instructions which will be used as
306 * replacement for ins, or simply change the fields of ins. Spec needs to
307 * be updated if we silently change the opcode of ins.
309 * Returns TRUE if additional vregs were allocated.
311 static gboolean
312 mono_strength_reduction_ins (MonoCompile *cfg, MonoInst *ins, const char **spec)
314 gboolean allocated_vregs = FALSE;
316 /* FIXME: Add long/float */
317 switch (ins->opcode) {
318 case OP_MOVE:
319 case OP_XMOVE:
320 if (ins->dreg == ins->sreg1) {
321 NULLIFY_INS (ins);
323 break;
324 case OP_ADD_IMM:
325 case OP_IADD_IMM:
326 case OP_SUB_IMM:
327 case OP_ISUB_IMM:
328 #if SIZEOF_REGISTER == 8
329 case OP_LADD_IMM:
330 case OP_LSUB_IMM:
331 #endif
332 if (ins->inst_imm == 0) {
333 ins->opcode = OP_MOVE;
335 break;
336 case OP_MUL_IMM:
337 case OP_IMUL_IMM:
338 #if SIZEOF_REGISTER == 8
339 case OP_LMUL_IMM:
340 #endif
341 if (ins->inst_imm == 0) {
342 ins->opcode = (ins->opcode == OP_LMUL_IMM) ? OP_I8CONST : OP_ICONST;
343 ins->inst_c0 = 0;
344 ins->sreg1 = -1;
345 } else if (ins->inst_imm == 1) {
346 ins->opcode = OP_MOVE;
347 } else if ((ins->opcode == OP_IMUL_IMM) && (ins->inst_imm == -1)) {
348 ins->opcode = OP_INEG;
349 } else if ((ins->opcode == OP_LMUL_IMM) && (ins->inst_imm == -1)) {
350 ins->opcode = OP_LNEG;
351 } else {
352 int power2 = mono_is_power_of_two (ins->inst_imm);
353 if (power2 >= 0) {
354 ins->opcode = (ins->opcode == OP_MUL_IMM) ? OP_SHL_IMM : ((ins->opcode == OP_LMUL_IMM) ? OP_LSHL_IMM : OP_ISHL_IMM);
355 ins->inst_imm = power2;
358 break;
359 case OP_IREM_UN_IMM: {
360 int power2 = mono_is_power_of_two (ins->inst_imm);
362 if (power2 >= 0) {
363 ins->opcode = OP_IAND_IMM;
364 ins->sreg2 = -1;
365 ins->inst_imm = (1 << power2) - 1;
367 break;
369 case OP_IDIV_UN_IMM:
370 case OP_IDIV_IMM: {
371 if (!COMPILE_LLVM (cfg))
372 allocated_vregs = mono_strength_reduction_division (cfg, ins);
373 break;
375 #if SIZEOF_REGISTER == 8
376 case OP_LREM_IMM:
377 #endif
378 case OP_IREM_IMM: {
379 int power = mono_is_power_of_two (ins->inst_imm);
380 if (ins->inst_imm == 1) {
381 ins->opcode = OP_ICONST;
382 MONO_INST_NULLIFY_SREGS (ins);
383 ins->inst_c0 = 0;
384 #if __s390__
386 #else
387 } else if ((ins->inst_imm > 0) && (ins->inst_imm < (1LL << 32)) && (power != -1)) {
388 gboolean is_long = ins->opcode == OP_LREM_IMM;
389 int compensator_reg = alloc_ireg (cfg);
390 int intermediate_reg;
392 /* Based on gcc code */
394 /* Add compensation for negative numerators */
396 if (power > 1) {
397 intermediate_reg = compensator_reg;
398 MONO_EMIT_NEW_BIALU_IMM (cfg, is_long ? OP_LSHR_IMM : OP_ISHR_IMM, intermediate_reg, ins->sreg1, is_long ? 63 : 31);
399 } else {
400 intermediate_reg = ins->sreg1;
403 MONO_EMIT_NEW_BIALU_IMM (cfg, is_long ? OP_LSHR_UN_IMM : OP_ISHR_UN_IMM, compensator_reg, intermediate_reg, (is_long ? 64 : 32) - power);
404 MONO_EMIT_NEW_BIALU (cfg, is_long ? OP_LADD : OP_IADD, ins->dreg, ins->sreg1, compensator_reg);
405 /* Compute remainder */
406 MONO_EMIT_NEW_BIALU_IMM (cfg, is_long ? OP_LAND_IMM : OP_AND_IMM, ins->dreg, ins->dreg, (1 << power) - 1);
407 /* Remove compensation */
408 MONO_EMIT_NEW_BIALU (cfg, is_long ? OP_LSUB : OP_ISUB, ins->dreg, ins->dreg, compensator_reg);
410 allocated_vregs = TRUE;
412 #endif
413 break;
415 #if SIZEOF_REGISTER == 4
416 case OP_LSHR_IMM: {
417 if (COMPILE_LLVM (cfg))
418 break;
419 if (ins->inst_c1 == 32) {
420 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
421 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), 31);
422 } else if (ins->inst_c1 == 0) {
423 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
424 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
425 } else if (ins->inst_c1 > 32) {
426 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1 - 32);
427 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), 31);
428 } else {
429 guint32 tmpreg = alloc_ireg (cfg);
430 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, tmpreg, MONO_LVREG_MS (ins->sreg1), 32 - ins->inst_c1);
431 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1);
432 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1);
433 MONO_EMIT_NEW_BIALU (cfg, OP_IOR, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->dreg), tmpreg);
434 allocated_vregs = TRUE;
436 break;
438 case OP_LSHR_UN_IMM: {
439 if (COMPILE_LLVM (cfg))
440 break;
441 if (ins->inst_c1 == 32) {
442 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
443 MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_MS (ins->dreg), 0);
444 } else if (ins->inst_c1 == 0) {
445 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
446 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
447 } else if (ins->inst_c1 > 32) {
448 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1 - 32);
449 MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_MS (ins->dreg), 0);
450 } else {
451 guint32 tmpreg = alloc_ireg (cfg);
452 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, tmpreg, MONO_LVREG_MS (ins->sreg1), 32 - ins->inst_c1);
453 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1);
454 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1);
455 MONO_EMIT_NEW_BIALU (cfg, OP_IOR, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->dreg), tmpreg);
456 allocated_vregs = TRUE;
458 break;
460 case OP_LSHL_IMM: {
461 if (COMPILE_LLVM (cfg))
462 break;
463 if (ins->inst_c1 == 32) {
464 /* just move the lower half to the upper and zero the lower word */
465 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
466 MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_LS (ins->dreg), 0);
467 } else if (ins->inst_c1 == 0) {
468 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
469 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
470 } else if (ins->inst_c1 > 32) {
471 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1 - 32);
472 MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_LS (ins->dreg), 0);
473 } else {
474 guint32 tmpreg = alloc_ireg (cfg);
475 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, tmpreg, MONO_LVREG_LS (ins->sreg1), 32 - ins->inst_c1);
476 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1);
477 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1);
478 MONO_EMIT_NEW_BIALU (cfg, OP_IOR, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->dreg), tmpreg);
479 allocated_vregs = TRUE;
481 break;
483 #endif
485 default:
486 break;
489 *spec = INS_INFO (ins->opcode);
490 return allocated_vregs;
494 * mono_local_cprop:
496 * A combined local copy and constant propagation pass.
498 void
499 mono_local_cprop (MonoCompile *cfg)
501 MonoBasicBlock *bb, *bb_opt;
502 MonoInst **defs;
503 gint32 *def_index;
504 int max;
505 int filter = FILTER_IL_SEQ_POINT;
506 int initial_max_vregs = cfg->next_vreg;
508 max = cfg->next_vreg;
509 defs = (MonoInst **)mono_mempool_alloc (cfg->mempool, sizeof (MonoInst*) * cfg->next_vreg);
510 def_index = (gint32 *)mono_mempool_alloc (cfg->mempool, sizeof (guint32) * cfg->next_vreg);
511 cfg->cbb = bb_opt = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoBasicBlock));
513 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
514 MonoInst *ins;
515 int ins_index;
516 int last_call_index;
518 /* Manually init the defs entries used by the bblock */
519 MONO_BB_FOR_EACH_INS (bb, ins) {
520 int sregs [MONO_MAX_SRC_REGS];
521 int num_sregs, i;
523 if (ins->dreg != -1) {
524 #if SIZEOF_REGISTER == 4
525 const char *spec = INS_INFO (ins->opcode);
526 if (spec [MONO_INST_DEST] == 'l') {
527 defs [ins->dreg + 1] = NULL;
528 defs [ins->dreg + 2] = NULL;
530 #endif
531 defs [ins->dreg] = NULL;
534 num_sregs = mono_inst_get_src_registers (ins, sregs);
535 for (i = 0; i < num_sregs; ++i) {
536 int sreg = sregs [i];
537 #if SIZEOF_REGISTER == 4
538 const char *spec = INS_INFO (ins->opcode);
539 if (spec [MONO_INST_SRC1 + i] == 'l') {
540 defs [sreg + 1] = NULL;
541 defs [sreg + 2] = NULL;
543 #endif
544 defs [sreg] = NULL;
548 ins_index = 0;
549 last_call_index = -1;
550 MONO_BB_FOR_EACH_INS (bb, ins) {
551 const char *spec = INS_INFO (ins->opcode);
552 int regtype, srcindex, sreg;
553 int num_sregs;
554 int sregs [MONO_MAX_SRC_REGS];
556 if (ins->opcode == OP_NOP) {
557 MONO_DELETE_INS (bb, ins);
558 continue;
561 g_assert (ins->opcode > MONO_CEE_LAST);
563 /* FIXME: Optimize this */
564 if (ins->opcode == OP_LDADDR) {
565 MonoInst *var = (MonoInst *)ins->inst_p0;
567 defs [var->dreg] = NULL;
569 if (!MONO_TYPE_ISSTRUCT (var->inst_vtype))
570 break;
574 if (MONO_IS_STORE_MEMBASE (ins)) {
575 sreg = ins->dreg;
576 regtype = 'i';
578 if ((regtype == 'i') && (sreg != -1) && defs [sreg]) {
579 MonoInst *def = defs [sreg];
581 if ((def->opcode == OP_MOVE) && (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg])) && !vreg_is_volatile (cfg, def->sreg1)) {
582 int vreg = def->sreg1;
583 if (cfg->verbose_level > 2) printf ("CCOPY: R%d -> R%d\n", sreg, vreg);
584 ins->dreg = vreg;
589 num_sregs = mono_inst_get_src_registers (ins, sregs);
590 for (srcindex = 0; srcindex < num_sregs; ++srcindex) {
591 MonoInst *def;
593 mono_inst_get_src_registers (ins, sregs);
595 regtype = spec [MONO_INST_SRC1 + srcindex];
596 sreg = sregs [srcindex];
598 if ((regtype == ' ') || (sreg == -1) || (!defs [sreg]))
599 continue;
601 def = defs [sreg];
603 /* Copy propagation */
605 * The first check makes sure the source of the copy did not change since
606 * the copy was made.
607 * The second check avoids volatile variables.
608 * The third check avoids copy propagating local vregs through a call,
609 * since the lvreg will be spilled
610 * The fourth check avoids copy propagating a vreg in cases where
611 * it would be eliminated anyway by reverse copy propagation later,
612 * because propagating it would create another use for it, thus making
613 * it impossible to use reverse copy propagation.
615 /* Enabling this for floats trips up the fp stack */
617 * Enabling this for floats on amd64 seems to cause a failure in
618 * basic-math.cs, most likely because it gets rid of some r8->r4
619 * conversions.
621 if (MONO_IS_MOVE (def) &&
622 (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg])) &&
623 !vreg_is_volatile (cfg, def->sreg1) &&
624 /* This avoids propagating local vregs across calls */
625 ((get_vreg_to_inst (cfg, def->sreg1) || !defs [def->sreg1] || (def_index [def->sreg1] >= last_call_index) || (def->opcode == OP_VMOVE))) &&
626 !(defs [def->sreg1] && mono_inst_next (defs [def->sreg1], filter) == def) &&
627 (!MONO_ARCH_USE_FPSTACK || (def->opcode != OP_FMOVE)) &&
628 (def->opcode != OP_FMOVE)) {
629 int vreg = def->sreg1;
631 if (cfg->verbose_level > 2) printf ("CCOPY/2: R%d -> R%d\n", sreg, vreg);
632 sregs [srcindex] = vreg;
633 mono_inst_set_src_registers (ins, sregs);
635 /* Allow further iterations */
636 srcindex = -1;
637 continue;
640 /* Constant propagation */
641 /* FIXME: Make is_inst_imm a macro */
642 /* FIXME: Make is_inst_imm take an opcode argument */
643 /* is_inst_imm is only needed for binops */
644 if ((((def->opcode == OP_ICONST) || ((sizeof (gpointer) == 8) && (def->opcode == OP_I8CONST)) || (def->opcode == OP_PCONST)) &&
645 (((srcindex == 0) && (ins->sreg2 == -1)) || mono_arch_is_inst_imm (def->inst_c0))) ||
646 (!MONO_ARCH_USE_FPSTACK && (def->opcode == OP_R8CONST))) {
647 guint32 opcode2;
649 /* srcindex == 1 -> binop, ins->sreg2 == -1 -> unop */
650 if ((srcindex == 1) && (ins->sreg1 != -1) && defs [ins->sreg1] &&
651 ((defs [ins->sreg1]->opcode == OP_ICONST) || defs [ins->sreg1]->opcode == OP_PCONST) &&
652 defs [ins->sreg2]) {
653 /* Both arguments are constants, perform cfold */
654 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], defs [ins->sreg2], TRUE);
655 } else if ((srcindex == 0) && (ins->sreg2 != -1) && defs [ins->sreg2]) {
656 /* Arg 1 is constant, swap arguments if possible */
657 int opcode = ins->opcode;
658 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], defs [ins->sreg2], TRUE);
659 if (ins->opcode != opcode) {
660 /* Allow further iterations */
661 srcindex = -1;
662 continue;
664 } else if ((srcindex == 0) && (ins->sreg2 == -1)) {
665 /* Constant unop, perform cfold */
666 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], NULL, TRUE);
669 opcode2 = mono_op_to_op_imm (ins->opcode);
670 if ((opcode2 != -1) && mono_arch_is_inst_imm (def->inst_c0) && ((srcindex == 1) || (ins->sreg2 == -1))) {
671 ins->opcode = opcode2;
672 if ((def->opcode == OP_I8CONST) && (sizeof (gpointer) == 4)) {
673 ins->inst_ls_word = def->inst_ls_word;
674 ins->inst_ms_word = def->inst_ms_word;
675 } else {
676 ins->inst_imm = def->inst_c0;
678 sregs [srcindex] = -1;
679 mono_inst_set_src_registers (ins, sregs);
681 if ((opcode2 == OP_VOIDCALL) || (opcode2 == OP_CALL) || (opcode2 == OP_LCALL) || (opcode2 == OP_FCALL))
682 ((MonoCallInst*)ins)->fptr = (gpointer)ins->inst_imm;
684 /* Allow further iterations */
685 srcindex = -1;
686 continue;
688 else {
689 /* Special cases */
690 #if defined(TARGET_X86) || defined(TARGET_AMD64)
691 if ((ins->opcode == OP_X86_LEA) && (srcindex == 1)) {
692 #if SIZEOF_REGISTER == 8
693 /* FIXME: Use OP_PADD_IMM when the new JIT is done */
694 ins->opcode = OP_LADD_IMM;
695 #else
696 ins->opcode = OP_ADD_IMM;
697 #endif
698 ins->inst_imm += def->inst_c0 << ins->backend.shift_amount;
699 ins->sreg2 = -1;
701 #endif
702 opcode2 = mono_load_membase_to_load_mem (ins->opcode);
703 if ((srcindex == 0) && (opcode2 != -1) && mono_arch_is_inst_imm (def->inst_c0)) {
704 ins->opcode = opcode2;
705 ins->inst_imm = def->inst_c0 + ins->inst_offset;
706 ins->sreg1 = -1;
710 else if (((def->opcode == OP_ADD_IMM) || (def->opcode == OP_LADD_IMM)) && (MONO_IS_LOAD_MEMBASE (ins) || MONO_ARCH_IS_OP_MEMBASE (ins->opcode))) {
711 /* ADD_IMM is created by spill_global_vars */
713 * We have to guarantee that def->sreg1 haven't changed since def->dreg
714 * was defined. cfg->frame_reg is assumed to remain constant.
716 if ((def->sreg1 == cfg->frame_reg) || ((mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1))) {
717 ins->inst_basereg = def->sreg1;
718 ins->inst_offset += def->inst_imm;
720 } else if ((ins->opcode == OP_ISUB_IMM) && (def->opcode == OP_IADD_IMM) && (mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1)) {
721 ins->sreg1 = def->sreg1;
722 ins->inst_imm -= def->inst_imm;
723 } else if ((ins->opcode == OP_IADD_IMM) && (def->opcode == OP_ISUB_IMM) && (mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1)) {
724 ins->sreg1 = def->sreg1;
725 ins->inst_imm -= def->inst_imm;
726 } else if (ins->opcode == OP_STOREI1_MEMBASE_REG &&
727 (def->opcode == OP_ICONV_TO_U1 || def->opcode == OP_ICONV_TO_I1 || def->opcode == OP_SEXT_I4 || (SIZEOF_REGISTER == 8 && def->opcode == OP_LCONV_TO_U1)) &&
728 (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg]))) {
729 /* Avoid needless sign extension */
730 ins->sreg1 = def->sreg1;
731 } else if (ins->opcode == OP_STOREI2_MEMBASE_REG &&
732 (def->opcode == OP_ICONV_TO_U2 || def->opcode == OP_ICONV_TO_I2 || def->opcode == OP_SEXT_I4 || (SIZEOF_REGISTER == 8 && def->opcode == OP_LCONV_TO_I2)) &&
733 (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg]))) {
734 /* Avoid needless sign extension */
735 ins->sreg1 = def->sreg1;
736 } else if (ins->opcode == OP_COMPARE_IMM && def->opcode == OP_LDADDR && ins->inst_imm == 0) {
737 MonoInst dummy_arg1;
739 memset (&dummy_arg1, 0, sizeof (MonoInst));
740 dummy_arg1.opcode = OP_ICONST;
741 dummy_arg1.inst_c0 = 1;
743 mono_constant_fold_ins (cfg, ins, &dummy_arg1, NULL, TRUE);
744 } else if (srcindex == 0 && ins->opcode == OP_COMPARE && defs [ins->sreg1]->opcode == OP_PCONST && defs [ins->sreg2] && defs [ins->sreg2]->opcode == OP_PCONST) {
745 /* typeof(T) == typeof(..) */
746 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], defs [ins->sreg2], TRUE);
750 g_assert (cfg->cbb == bb_opt);
751 g_assert (!bb_opt->code);
752 /* Do strength reduction here */
753 if (mono_strength_reduction_ins (cfg, ins, &spec) && max < cfg->next_vreg) {
754 MonoInst **defs_prev = defs;
755 gint32 *def_index_prev = def_index;
756 guint32 prev_max = max;
757 guint32 additional_vregs = cfg->next_vreg - initial_max_vregs;
759 /* We have more vregs so we need to reallocate defs and def_index arrays */
760 max = initial_max_vregs + additional_vregs * 2;
761 defs = (MonoInst **)mono_mempool_alloc (cfg->mempool, sizeof (MonoInst*) * max);
762 def_index = (gint32 *)mono_mempool_alloc (cfg->mempool, sizeof (guint32) * max);
764 /* Keep the entries for the previous vregs, zero the rest */
765 memcpy (defs, defs_prev, sizeof (MonoInst*) * prev_max);
766 memset (defs + prev_max, 0, sizeof (MonoInst*) * (max - prev_max));
767 memcpy (def_index, def_index_prev, sizeof (guint32) * prev_max);
768 memset (def_index + prev_max, 0, sizeof (guint32) * (max - prev_max));
771 if (cfg->cbb->code || (cfg->cbb != bb_opt)) {
772 MonoInst *saved_prev = ins->prev;
774 /* If we have code in cbb, we need to replace ins with the decomposition */
775 mono_replace_ins (cfg, bb, ins, &ins->prev, bb_opt, cfg->cbb);
776 bb_opt->code = bb_opt->last_ins = NULL;
777 bb_opt->in_count = bb_opt->out_count = 0;
778 cfg->cbb = bb_opt;
780 /* ins is hanging, continue scanning the emitted code */
781 ins = saved_prev;
782 continue;
785 if (spec [MONO_INST_DEST] != ' ') {
786 MonoInst *def = defs [ins->dreg];
788 if (def && (def->opcode == OP_ADD_IMM) && (def->sreg1 == cfg->frame_reg) && (MONO_IS_STORE_MEMBASE (ins))) {
789 /* ADD_IMM is created by spill_global_vars */
790 /* cfg->frame_reg is assumed to remain constant */
791 ins->inst_destbasereg = def->sreg1;
792 ins->inst_offset += def->inst_imm;
795 if (!MONO_IS_STORE_MEMBASE (ins) && !vreg_is_volatile (cfg, ins->dreg)) {
796 defs [ins->dreg] = ins;
797 def_index [ins->dreg] = ins_index;
801 if (MONO_IS_CALL (ins))
802 last_call_index = ins_index;
804 ins_index ++;
809 static inline gboolean
810 reg_is_softreg_no_fpstack (int reg, const char spec)
812 return (spec == 'i' && reg >= MONO_MAX_IREGS)
813 || ((spec == 'f' && reg >= MONO_MAX_FREGS) && !MONO_ARCH_USE_FPSTACK)
814 #ifdef MONO_ARCH_SIMD_INTRINSICS
815 || (spec == 'x' && reg >= MONO_MAX_XREGS)
816 #endif
817 || (spec == 'v');
820 static inline gboolean
821 reg_is_softreg (int reg, const char spec)
823 return (spec == 'i' && reg >= MONO_MAX_IREGS)
824 || (spec == 'f' && reg >= MONO_MAX_FREGS)
825 #ifdef MONO_ARCH_SIMD_INTRINSICS
826 || (spec == 'x' && reg >= MONO_MAX_XREGS)
827 #endif
828 || (spec == 'v');
831 static inline gboolean
832 mono_is_simd_accessor (MonoInst *ins)
834 switch (ins->opcode) {
835 #ifdef MONO_ARCH_SIMD_INTRINSICS
836 case OP_INSERT_I1:
837 case OP_INSERT_I2:
838 case OP_INSERT_I4:
839 case OP_INSERT_I8:
840 case OP_INSERT_R4:
841 case OP_INSERT_R8:
843 case OP_INSERTX_U1_SLOW:
844 case OP_INSERTX_I4_SLOW:
845 case OP_INSERTX_R4_SLOW:
846 case OP_INSERTX_R8_SLOW:
847 case OP_INSERTX_I8_SLOW:
848 return TRUE;
849 #endif
850 default:
851 return FALSE;
856 * mono_local_deadce:
858 * Get rid of the dead assignments to local vregs like the ones created by the
859 * copyprop pass.
861 void
862 mono_local_deadce (MonoCompile *cfg)
864 MonoBasicBlock *bb;
865 MonoInst *ins, *prev;
866 MonoBitSet *used, *defined;
868 //mono_print_code (cfg, "BEFORE LOCAL-DEADCE");
871 * Assignments to global vregs can't be eliminated so this pass must come
872 * after the handle_global_vregs () pass.
875 used = mono_bitset_mp_new_noinit (cfg->mempool, cfg->next_vreg + 1);
876 defined = mono_bitset_mp_new_noinit (cfg->mempool, cfg->next_vreg + 1);
878 /* First pass: collect liveness info */
879 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
880 /* Manually init the defs entries used by the bblock */
881 MONO_BB_FOR_EACH_INS (bb, ins) {
882 const char *spec = INS_INFO (ins->opcode);
883 int sregs [MONO_MAX_SRC_REGS];
884 int num_sregs, i;
886 if (spec [MONO_INST_DEST] != ' ') {
887 mono_bitset_clear_fast (used, ins->dreg);
888 mono_bitset_clear_fast (defined, ins->dreg);
889 #if SIZEOF_REGISTER == 4
890 /* Regpairs */
891 mono_bitset_clear_fast (used, ins->dreg + 1);
892 mono_bitset_clear_fast (defined, ins->dreg + 1);
893 #endif
895 num_sregs = mono_inst_get_src_registers (ins, sregs);
896 for (i = 0; i < num_sregs; ++i) {
897 mono_bitset_clear_fast (used, sregs [i]);
898 #if SIZEOF_REGISTER == 4
899 mono_bitset_clear_fast (used, sregs [i] + 1);
900 #endif
905 * Make a reverse pass over the instruction list
907 MONO_BB_FOR_EACH_INS_REVERSE_SAFE (bb, prev, ins) {
908 const char *spec = INS_INFO (ins->opcode);
909 int sregs [MONO_MAX_SRC_REGS];
910 int num_sregs, i;
911 MonoInst *prev_f = mono_inst_prev (ins, FILTER_NOP | FILTER_IL_SEQ_POINT);
913 if (ins->opcode == OP_NOP) {
914 MONO_DELETE_INS (bb, ins);
915 continue;
918 g_assert (ins->opcode > MONO_CEE_LAST);
920 if (MONO_IS_NON_FP_MOVE (ins) && prev_f) {
921 MonoInst *def;
922 const char *spec2;
924 def = prev_f;
925 spec2 = INS_INFO (def->opcode);
928 * Perform a limited kind of reverse copy propagation, i.e.
929 * transform B <- FOO; A <- B into A <- FOO
930 * This isn't copyprop, not deadce, but it can only be performed
931 * after handle_global_vregs () has run.
933 if (!get_vreg_to_inst (cfg, ins->sreg1) && (spec2 [MONO_INST_DEST] != ' ') && (def->dreg == ins->sreg1) && !mono_bitset_test_fast (used, ins->sreg1) && !MONO_IS_STORE_MEMBASE (def) && reg_is_softreg (ins->sreg1, spec [MONO_INST_DEST]) && !mono_is_simd_accessor (def)) {
934 if (cfg->verbose_level > 2) {
935 printf ("\tReverse copyprop in BB%d on ", bb->block_num);
936 mono_print_ins (ins);
939 def->dreg = ins->dreg;
940 MONO_DELETE_INS (bb, ins);
941 spec = INS_INFO (ins->opcode);
945 /* Enabling this on x86 could screw up the fp stack */
946 if (reg_is_softreg_no_fpstack (ins->dreg, spec [MONO_INST_DEST])) {
948 * Assignments to global vregs can only be eliminated if there is another
949 * assignment to the same vreg later in the same bblock.
951 if (!mono_bitset_test_fast (used, ins->dreg) &&
952 (!get_vreg_to_inst (cfg, ins->dreg) || (!bb->extended && !vreg_is_volatile (cfg, ins->dreg) && mono_bitset_test_fast (defined, ins->dreg))) &&
953 MONO_INS_HAS_NO_SIDE_EFFECT (ins)) {
954 /* Happens with CMOV instructions */
955 if (prev_f && prev_f->opcode == OP_ICOMPARE_IMM) {
956 MonoInst *prev = prev_f;
958 * Can't use DELETE_INS since that would interfere with the
959 * FOR_EACH_INS loop.
961 NULLIFY_INS (prev);
963 //printf ("DEADCE: "); mono_print_ins (ins);
964 MONO_DELETE_INS (bb, ins);
965 spec = INS_INFO (ins->opcode);
968 if (spec [MONO_INST_DEST] != ' ')
969 mono_bitset_clear_fast (used, ins->dreg);
972 if (spec [MONO_INST_DEST] != ' ')
973 mono_bitset_set_fast (defined, ins->dreg);
974 num_sregs = mono_inst_get_src_registers (ins, sregs);
975 for (i = 0; i < num_sregs; ++i)
976 mono_bitset_set_fast (used, sregs [i]);
977 if (MONO_IS_STORE_MEMBASE (ins))
978 mono_bitset_set_fast (used, ins->dreg);
980 if (MONO_IS_CALL (ins)) {
981 MonoCallInst *call = (MonoCallInst*)ins;
982 GSList *l;
984 if (call->out_ireg_args) {
985 for (l = call->out_ireg_args; l; l = l->next) {
986 guint32 regpair, reg;
988 regpair = (guint32)(gssize)(l->data);
989 reg = regpair & 0xffffff;
991 mono_bitset_set_fast (used, reg);
995 if (call->out_freg_args) {
996 for (l = call->out_freg_args; l; l = l->next) {
997 guint32 regpair, reg;
999 regpair = (guint32)(gssize)(l->data);
1000 reg = regpair & 0xffffff;
1002 mono_bitset_set_fast (used, reg);
1009 //mono_print_code (cfg, "AFTER LOCAL-DEADCE");
1012 #else /* !DISABLE_JIT */
1014 MONO_EMPTY_SOURCE_FILE (local_propagation);
1016 #endif /* !DISABLE_JIT */