[interp] Remove varargs from InterpFrame and recompute it instead (#16598)
[mono-project.git] / mono / mini / local-propagation.c
blob03120af6e078345535084a47e4755a0c70df39a7
1 /**
2 * \file
3 * Local constant, copy and tree propagation.
5 * To make some sense of the tree mover, read mono/docs/tree-mover.txt
7 * Author:
8 * Paolo Molaro (lupus@ximian.com)
9 * Dietmar Maurer (dietmar@ximian.com)
10 * Massimiliano Mantione (massi@ximian.com)
12 * (C) 2006 Novell, Inc. http://www.novell.com
13 * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
14 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
17 #include <config.h>
18 #include <mono/utils/mono-compiler.h>
20 #ifndef DISABLE_JIT
22 #include <string.h>
23 #include <stdio.h>
24 #ifdef HAVE_ALLOCA_H
25 #include <alloca.h>
26 #endif
28 #include <mono/metadata/debug-helpers.h>
29 #include <mono/metadata/mempool.h>
30 #include <mono/metadata/opcodes.h>
31 #include <mono/utils/unlocked.h>
32 #include "mini.h"
33 #include "ir-emit.h"
35 #ifndef MONO_ARCH_IS_OP_MEMBASE
36 #define MONO_ARCH_IS_OP_MEMBASE(opcode) FALSE
37 #endif
39 static MonoBitSet*
40 mono_bitset_mp_new_noinit (MonoMemPool *mp, guint32 max_size)
42 int size = mono_bitset_alloc_size (max_size, 0);
43 gpointer mem;
45 mem = mono_mempool_alloc (mp, size);
46 return mono_bitset_mem_new (mem, max_size, MONO_BITSET_DONT_FREE);
49 struct magic_unsigned {
50 guint32 magic_number;
51 gboolean addition;
52 int shift;
55 struct magic_signed {
56 gint32 magic_number;
57 int shift;
60 /* http://www.hackersdelight.org/hdcodetxt/magicu.c.txt */
61 static struct magic_unsigned
62 compute_magic_unsigned (guint32 divisor) {
63 guint32 nc, delta, q1, r1, q2, r2;
64 struct magic_unsigned magu;
65 gboolean gt = FALSE;
66 int p;
68 magu.addition = 0;
69 nc = -1 - (-divisor) % divisor;
70 p = 31;
71 q1 = 0x80000000 / nc;
72 r1 = 0x80000000 - q1 * nc;
73 q2 = 0x7FFFFFFF / divisor;
74 r2 = 0x7FFFFFFF - q2 * divisor;
75 do {
76 p = p + 1;
77 if (q1 >= 0x80000000)
78 gt = TRUE;
79 if (r1 >= nc - r1) {
80 q1 = 2 * q1 + 1;
81 r1 = 2 * r1 - nc;
82 } else {
83 q1 = 2 * q1;
84 r1 = 2 * r1;
86 if (r2 + 1 >= divisor - r2) {
87 if (q2 >= 0x7FFFFFFF)
88 magu.addition = 1;
89 q2 = 2 * q2 + 1;
90 r2 = 2 * r2 + 1 - divisor;
91 } else {
92 if (q2 >= 0x80000000)
93 magu.addition = 1;
94 q2 = 2 * q2;
95 r2 = 2 * r2 + 1;
97 delta = divisor - 1 - r2;
98 } while (!gt && (q1 < delta || (q1 == delta && r1 == 0)));
100 magu.magic_number = q2 + 1;
101 magu.shift = p - 32;
102 return magu;
105 /* http://www.hackersdelight.org/hdcodetxt/magic.c.txt */
106 static struct magic_signed
107 compute_magic_signed (gint32 divisor) {
108 int p;
109 guint32 ad, anc, delta, q1, r1, q2, r2, t;
110 const guint32 two31 = 0x80000000;
111 struct magic_signed mag;
113 ad = abs (divisor);
114 t = two31 + ((unsigned)divisor >> 31);
115 anc = t - 1 - t % ad;
116 p = 31;
117 q1 = two31 / anc;
118 r1 = two31 - q1 * anc;
119 q2 = two31 / ad;
120 r2 = two31 - q2 * ad;
121 do {
122 p++;
123 q1 *= 2;
124 r1 *= 2;
125 if (r1 >= anc) {
126 q1++;
127 r1 -= anc;
130 q2 *= 2;
131 r2 *= 2;
133 if (r2 >= ad) {
134 q2++;
135 r2 -= ad;
138 delta = ad - r2;
139 } while (q1 < delta || (q1 == delta && r1 == 0));
141 mag.magic_number = q2 + 1;
142 if (divisor < 0)
143 mag.magic_number = -mag.magic_number;
144 mag.shift = p - 32;
145 return mag;
148 static gboolean
149 mono_strength_reduction_division (MonoCompile *cfg, MonoInst *ins)
151 gboolean allocated_vregs = FALSE;
153 * We don't use it on 32bit systems because on those
154 * platforms we emulate long multiplication, driving the
155 * performance back down.
157 switch (ins->opcode) {
158 case OP_IDIV_UN_IMM: {
159 guint32 tmp_regl;
160 #if SIZEOF_REGISTER == 8
161 guint32 dividend_reg;
162 #else
163 guint32 tmp_regi;
164 #endif
165 struct magic_unsigned mag;
166 int power2 = mono_is_power_of_two (ins->inst_imm);
168 /* The decomposition doesn't handle exception throwing */
169 if (ins->inst_imm == 0)
170 break;
172 if (power2 >= 0) {
173 ins->opcode = OP_ISHR_UN_IMM;
174 ins->sreg2 = -1;
175 ins->inst_imm = power2;
176 break;
178 if (cfg->backend->disable_div_with_mul)
179 break;
180 allocated_vregs = TRUE;
182 * Replacement of unsigned division with multiplication,
183 * shifts and additions Hacker's Delight, chapter 10-10.
185 mag = compute_magic_unsigned (ins->inst_imm);
186 tmp_regl = alloc_lreg (cfg);
187 #if SIZEOF_REGISTER == 8
188 dividend_reg = alloc_lreg (cfg);
189 MONO_EMIT_NEW_I8CONST (cfg, tmp_regl, mag.magic_number);
190 MONO_EMIT_NEW_UNALU (cfg, OP_ZEXT_I4, dividend_reg, ins->sreg1);
191 MONO_EMIT_NEW_BIALU (cfg, OP_LMUL, tmp_regl, dividend_reg, tmp_regl);
192 if (mag.addition) {
193 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, tmp_regl, tmp_regl, 32);
194 MONO_EMIT_NEW_BIALU (cfg, OP_LADD, tmp_regl, tmp_regl, dividend_reg);
195 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, ins->dreg, tmp_regl, mag.shift);
196 } else {
197 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, ins->dreg, tmp_regl, 32 + mag.shift);
199 #else
200 tmp_regi = alloc_ireg (cfg);
201 MONO_EMIT_NEW_ICONST (cfg, tmp_regi, mag.magic_number);
202 MONO_EMIT_NEW_BIALU (cfg, OP_BIGMUL_UN, tmp_regl, ins->sreg1, tmp_regi);
203 /* Long shifts below will be decomposed during cprop */
204 if (mag.addition) {
205 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, tmp_regl, tmp_regl, 32);
206 MONO_EMIT_NEW_BIALU (cfg, OP_IADDCC, MONO_LVREG_LS (tmp_regl), MONO_LVREG_LS (tmp_regl), ins->sreg1);
207 /* MONO_LVREG_MS (tmp_reg) is 0, save in it the carry */
208 MONO_EMIT_NEW_BIALU (cfg, OP_IADC, MONO_LVREG_MS (tmp_regl), MONO_LVREG_MS (tmp_regl), MONO_LVREG_MS (tmp_regl));
209 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, tmp_regl, tmp_regl, mag.shift);
210 } else {
211 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, tmp_regl, tmp_regl, 32 + mag.shift);
213 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, ins->dreg, MONO_LVREG_LS (tmp_regl));
214 #endif
215 UnlockedIncrement (&mono_jit_stats.optimized_divisions);
216 break;
218 case OP_IDIV_IMM: {
219 guint32 tmp_regl;
220 #if SIZEOF_REGISTER == 8
221 guint32 dividend_reg;
222 #else
223 guint32 tmp_regi;
224 #endif
225 struct magic_signed mag;
226 int power2 = mono_is_power_of_two (ins->inst_imm);
227 /* The decomposition doesn't handle exception throwing */
228 /* Optimization with MUL does not apply for -1, 0 and 1 divisors */
229 if (ins->inst_imm == 0 || ins->inst_imm == -1) {
230 break;
231 } else if (ins->inst_imm == 1) {
232 ins->opcode = OP_MOVE;
233 ins->inst_imm = 0;
234 break;
236 allocated_vregs = TRUE;
237 if (power2 == 1) {
238 guint32 r1 = alloc_ireg (cfg);
239 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, r1, ins->sreg1, 31);
240 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, r1, r1, ins->sreg1);
241 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, ins->dreg, r1, 1);
242 break;
243 } else if (power2 > 0 && power2 < 31) {
244 guint32 r1 = alloc_ireg (cfg);
245 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, r1, ins->sreg1, 31);
246 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, r1, r1, (32 - power2));
247 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, r1, r1, ins->sreg1);
248 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, ins->dreg, r1, power2);
249 break;
252 if (cfg->backend->disable_div_with_mul)
253 break;
255 * Replacement of signed division with multiplication,
256 * shifts and additions Hacker's Delight, chapter 10-6.
258 mag = compute_magic_signed (ins->inst_imm);
259 tmp_regl = alloc_lreg (cfg);
260 #if SIZEOF_REGISTER == 8
261 dividend_reg = alloc_lreg (cfg);
262 MONO_EMIT_NEW_I8CONST (cfg, tmp_regl, mag.magic_number);
263 MONO_EMIT_NEW_UNALU (cfg, OP_SEXT_I4, dividend_reg, ins->sreg1);
264 MONO_EMIT_NEW_BIALU (cfg, OP_LMUL, tmp_regl, dividend_reg, tmp_regl);
265 if ((ins->inst_imm > 0 && mag.magic_number < 0) || (ins->inst_imm < 0 && mag.magic_number > 0)) {
266 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_IMM, tmp_regl, tmp_regl, 32);
267 if (ins->inst_imm > 0 && mag.magic_number < 0) {
268 MONO_EMIT_NEW_BIALU (cfg, OP_LADD, tmp_regl, tmp_regl, dividend_reg);
269 } else if (ins->inst_imm < 0 && mag.magic_number > 0) {
270 MONO_EMIT_NEW_BIALU (cfg, OP_LSUB, tmp_regl, tmp_regl, dividend_reg);
272 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_IMM, tmp_regl, tmp_regl, mag.shift);
273 } else {
274 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_IMM, tmp_regl, tmp_regl, 32 + mag.shift);
276 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, ins->dreg, tmp_regl, SIZEOF_REGISTER * 8 - 1);
277 MONO_EMIT_NEW_BIALU (cfg, OP_LADD, ins->dreg, ins->dreg, tmp_regl);
278 #else
279 tmp_regi = alloc_ireg (cfg);
280 MONO_EMIT_NEW_ICONST (cfg, tmp_regi, mag.magic_number);
281 MONO_EMIT_NEW_BIALU (cfg, OP_BIGMUL, tmp_regl, ins->sreg1, tmp_regi);
282 if ((ins->inst_imm > 0 && mag.magic_number < 0) || (ins->inst_imm < 0 && mag.magic_number > 0)) {
283 if (ins->inst_imm > 0 && mag.magic_number < 0) {
284 /* Opposite sign, cannot overflow */
285 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, tmp_regi, MONO_LVREG_MS (tmp_regl), ins->sreg1);
286 } else if (ins->inst_imm < 0 && mag.magic_number > 0) {
287 /* Same sign, cannot overflow */
288 MONO_EMIT_NEW_BIALU (cfg, OP_ISUB, tmp_regi, MONO_LVREG_MS (tmp_regl), ins->sreg1);
290 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, tmp_regi, tmp_regi, mag.shift);
291 } else {
292 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, tmp_regi, MONO_LVREG_MS (tmp_regl), mag.shift);
294 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, ins->dreg, tmp_regi, SIZEOF_REGISTER * 8 - 1);
295 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, ins->dreg, ins->dreg, tmp_regi);
296 #endif
297 UnlockedIncrement (&mono_jit_stats.optimized_divisions);
298 break;
301 return allocated_vregs;
305 * Replaces ins with optimized opcodes.
307 * We can emit to cbb the equivalent instructions which will be used as
308 * replacement for ins, or simply change the fields of ins. Spec needs to
309 * be updated if we silently change the opcode of ins.
311 * Returns TRUE if additional vregs were allocated.
313 static gboolean
314 mono_strength_reduction_ins (MonoCompile *cfg, MonoInst *ins, const char **spec)
316 gboolean allocated_vregs = FALSE;
318 /* FIXME: Add long/float */
319 switch (ins->opcode) {
320 case OP_MOVE:
321 case OP_XMOVE:
322 if (ins->dreg == ins->sreg1) {
323 NULLIFY_INS (ins);
325 break;
326 case OP_ADD_IMM:
327 case OP_IADD_IMM:
328 case OP_SUB_IMM:
329 case OP_ISUB_IMM:
330 #if SIZEOF_REGISTER == 8
331 case OP_LADD_IMM:
332 case OP_LSUB_IMM:
333 #endif
334 if (ins->inst_imm == 0) {
335 ins->opcode = OP_MOVE;
337 break;
338 case OP_MUL_IMM:
339 case OP_IMUL_IMM:
340 #if SIZEOF_REGISTER == 8
341 case OP_LMUL_IMM:
342 #endif
343 if (ins->inst_imm == 0) {
344 ins->opcode = (ins->opcode == OP_LMUL_IMM) ? OP_I8CONST : OP_ICONST;
345 ins->inst_c0 = 0;
346 ins->sreg1 = -1;
347 } else if (ins->inst_imm == 1) {
348 ins->opcode = OP_MOVE;
349 } else if ((ins->opcode == OP_IMUL_IMM) && (ins->inst_imm == -1)) {
350 ins->opcode = OP_INEG;
351 } else if ((ins->opcode == OP_LMUL_IMM) && (ins->inst_imm == -1)) {
352 ins->opcode = OP_LNEG;
353 } else {
354 int power2 = mono_is_power_of_two (ins->inst_imm);
355 if (power2 >= 0) {
356 ins->opcode = (ins->opcode == OP_MUL_IMM) ? OP_SHL_IMM : ((ins->opcode == OP_LMUL_IMM) ? OP_LSHL_IMM : OP_ISHL_IMM);
357 ins->inst_imm = power2;
360 break;
361 case OP_IREM_UN_IMM: {
362 int power2 = mono_is_power_of_two (ins->inst_imm);
364 if (power2 >= 0) {
365 ins->opcode = OP_IAND_IMM;
366 ins->sreg2 = -1;
367 ins->inst_imm = (1 << power2) - 1;
369 break;
371 case OP_IDIV_UN_IMM:
372 case OP_IDIV_IMM: {
373 if ((!COMPILE_LLVM (cfg)) && (!cfg->backend->optimized_div))
374 allocated_vregs = mono_strength_reduction_division (cfg, ins);
375 break;
377 #if SIZEOF_REGISTER == 8
378 case OP_LREM_IMM:
379 #endif
380 case OP_IREM_IMM: {
381 int power = mono_is_power_of_two (ins->inst_imm);
382 if (ins->inst_imm == 1) {
383 ins->opcode = OP_ICONST;
384 MONO_INST_NULLIFY_SREGS (ins);
385 ins->inst_c0 = 0;
386 } else if ((ins->inst_imm > 0) && (ins->inst_imm < (1LL << 32)) &&
387 (power != -1) && (!cfg->backend->optimized_div)) {
388 gboolean is_long = ins->opcode == OP_LREM_IMM;
389 int compensator_reg = alloc_ireg (cfg);
390 int intermediate_reg;
392 /* Based on gcc code */
394 /* Add compensation for negative numerators */
396 if (power > 1) {
397 intermediate_reg = compensator_reg;
398 MONO_EMIT_NEW_BIALU_IMM (cfg, is_long ? OP_LSHR_IMM : OP_ISHR_IMM, intermediate_reg, ins->sreg1, is_long ? 63 : 31);
399 } else {
400 intermediate_reg = ins->sreg1;
403 MONO_EMIT_NEW_BIALU_IMM (cfg, is_long ? OP_LSHR_UN_IMM : OP_ISHR_UN_IMM, compensator_reg, intermediate_reg, (is_long ? 64 : 32) - power);
404 MONO_EMIT_NEW_BIALU (cfg, is_long ? OP_LADD : OP_IADD, ins->dreg, ins->sreg1, compensator_reg);
405 /* Compute remainder */
406 MONO_EMIT_NEW_BIALU_IMM (cfg, is_long ? OP_LAND_IMM : OP_AND_IMM, ins->dreg, ins->dreg, (1 << power) - 1);
407 /* Remove compensation */
408 MONO_EMIT_NEW_BIALU (cfg, is_long ? OP_LSUB : OP_ISUB, ins->dreg, ins->dreg, compensator_reg);
410 allocated_vregs = TRUE;
412 break;
414 #if SIZEOF_REGISTER == 4
415 case OP_LSHR_IMM: {
416 if (COMPILE_LLVM (cfg))
417 break;
418 if (ins->inst_c1 == 32) {
419 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
420 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), 31);
421 } else if (ins->inst_c1 == 0) {
422 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
423 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
424 } else if (ins->inst_c1 > 32) {
425 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1 - 32);
426 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), 31);
427 } else {
428 guint32 tmpreg = alloc_ireg (cfg);
429 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, tmpreg, MONO_LVREG_MS (ins->sreg1), 32 - ins->inst_c1);
430 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1);
431 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1);
432 MONO_EMIT_NEW_BIALU (cfg, OP_IOR, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->dreg), tmpreg);
433 allocated_vregs = TRUE;
435 break;
437 case OP_LSHR_UN_IMM: {
438 if (COMPILE_LLVM (cfg))
439 break;
440 if (ins->inst_c1 == 32) {
441 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
442 MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_MS (ins->dreg), 0);
443 } else if (ins->inst_c1 == 0) {
444 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
445 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
446 } else if (ins->inst_c1 > 32) {
447 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1 - 32);
448 MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_MS (ins->dreg), 0);
449 } else {
450 guint32 tmpreg = alloc_ireg (cfg);
451 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, tmpreg, MONO_LVREG_MS (ins->sreg1), 32 - ins->inst_c1);
452 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1);
453 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1);
454 MONO_EMIT_NEW_BIALU (cfg, OP_IOR, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->dreg), tmpreg);
455 allocated_vregs = TRUE;
457 break;
459 case OP_LSHL_IMM: {
460 if (COMPILE_LLVM (cfg))
461 break;
462 if (ins->inst_c1 == 32) {
463 /* just move the lower half to the upper and zero the lower word */
464 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
465 MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_LS (ins->dreg), 0);
466 } else if (ins->inst_c1 == 0) {
467 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
468 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
469 } else if (ins->inst_c1 > 32) {
470 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1 - 32);
471 MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_LS (ins->dreg), 0);
472 } else {
473 guint32 tmpreg = alloc_ireg (cfg);
474 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, tmpreg, MONO_LVREG_LS (ins->sreg1), 32 - ins->inst_c1);
475 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1);
476 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1);
477 MONO_EMIT_NEW_BIALU (cfg, OP_IOR, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->dreg), tmpreg);
478 allocated_vregs = TRUE;
480 break;
482 #endif
484 default:
485 break;
488 *spec = INS_INFO (ins->opcode);
489 return allocated_vregs;
493 * mono_local_cprop:
495 * A combined local copy and constant propagation pass.
497 void
498 mono_local_cprop (MonoCompile *cfg)
500 MonoBasicBlock *bb, *bb_opt;
501 MonoInst **defs;
502 gint32 *def_index;
503 int max;
504 int filter = FILTER_IL_SEQ_POINT;
505 int initial_max_vregs = cfg->next_vreg;
507 max = cfg->next_vreg;
508 defs = (MonoInst **)mono_mempool_alloc (cfg->mempool, sizeof (MonoInst*) * cfg->next_vreg);
509 def_index = (gint32 *)mono_mempool_alloc (cfg->mempool, sizeof (guint32) * cfg->next_vreg);
510 cfg->cbb = bb_opt = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoBasicBlock));
512 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
513 MonoInst *ins;
514 int ins_index;
515 int last_call_index;
517 /* Manually init the defs entries used by the bblock */
518 MONO_BB_FOR_EACH_INS (bb, ins) {
519 int sregs [MONO_MAX_SRC_REGS];
520 int num_sregs, i;
522 if (ins->dreg != -1) {
523 #if SIZEOF_REGISTER == 4
524 const char *spec = INS_INFO (ins->opcode);
525 if (spec [MONO_INST_DEST] == 'l') {
526 defs [ins->dreg + 1] = NULL;
527 defs [ins->dreg + 2] = NULL;
529 #endif
530 defs [ins->dreg] = NULL;
533 num_sregs = mono_inst_get_src_registers (ins, sregs);
534 for (i = 0; i < num_sregs; ++i) {
535 int sreg = sregs [i];
536 #if SIZEOF_REGISTER == 4
537 const char *spec = INS_INFO (ins->opcode);
538 if (spec [MONO_INST_SRC1 + i] == 'l') {
539 defs [sreg + 1] = NULL;
540 defs [sreg + 2] = NULL;
542 #endif
543 defs [sreg] = NULL;
547 ins_index = 0;
548 last_call_index = -1;
549 MONO_BB_FOR_EACH_INS (bb, ins) {
550 const char *spec = INS_INFO (ins->opcode);
551 int regtype, srcindex, sreg;
552 int num_sregs;
553 int sregs [MONO_MAX_SRC_REGS];
555 if (ins->opcode == OP_NOP) {
556 MONO_DELETE_INS (bb, ins);
557 continue;
560 g_assert (ins->opcode > MONO_CEE_LAST);
562 /* FIXME: Optimize this */
563 if (ins->opcode == OP_LDADDR) {
564 MonoInst *var = (MonoInst *)ins->inst_p0;
566 defs [var->dreg] = NULL;
568 if (!MONO_TYPE_ISSTRUCT (var->inst_vtype))
569 break;
573 if (MONO_IS_STORE_MEMBASE (ins)) {
574 sreg = ins->dreg;
575 regtype = 'i';
577 if ((regtype == 'i') && (sreg != -1) && defs [sreg]) {
578 MonoInst *def = defs [sreg];
580 if ((def->opcode == OP_MOVE) && (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg])) && !vreg_is_volatile (cfg, def->sreg1)) {
581 int vreg = def->sreg1;
582 if (cfg->verbose_level > 2) printf ("CCOPY: R%d -> R%d\n", sreg, vreg);
583 ins->dreg = vreg;
588 num_sregs = mono_inst_get_src_registers (ins, sregs);
589 for (srcindex = 0; srcindex < num_sregs; ++srcindex) {
590 MonoInst *def;
592 mono_inst_get_src_registers (ins, sregs);
594 regtype = spec [MONO_INST_SRC1 + srcindex];
595 sreg = sregs [srcindex];
597 if ((regtype == ' ') || (sreg == -1) || (!defs [sreg]))
598 continue;
600 def = defs [sreg];
602 /* Copy propagation */
604 * The first check makes sure the source of the copy did not change since
605 * the copy was made.
606 * The second check avoids volatile variables.
607 * The third check avoids copy propagating local vregs through a call,
608 * since the lvreg will be spilled
609 * The fourth check avoids copy propagating a vreg in cases where
610 * it would be eliminated anyway by reverse copy propagation later,
611 * because propagating it would create another use for it, thus making
612 * it impossible to use reverse copy propagation.
614 /* Enabling this for floats trips up the fp stack */
616 * Enabling this for floats on amd64 seems to cause a failure in
617 * basic-math.cs, most likely because it gets rid of some r8->r4
618 * conversions.
620 if (MONO_IS_MOVE (def) &&
621 (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg])) &&
622 !vreg_is_volatile (cfg, def->sreg1) &&
623 /* This avoids propagating local vregs across calls */
624 ((get_vreg_to_inst (cfg, def->sreg1) || !defs [def->sreg1] || (def_index [def->sreg1] >= last_call_index) || (def->opcode == OP_VMOVE))) &&
625 !(defs [def->sreg1] && mono_inst_next (defs [def->sreg1], filter) == def) &&
626 (!MONO_ARCH_USE_FPSTACK || (def->opcode != OP_FMOVE)) &&
627 (def->opcode != OP_FMOVE)) {
628 int vreg = def->sreg1;
630 if (cfg->verbose_level > 2) printf ("CCOPY/2: R%d -> R%d\n", sreg, vreg);
631 sregs [srcindex] = vreg;
632 mono_inst_set_src_registers (ins, sregs);
634 /* Allow further iterations */
635 srcindex = -1;
636 continue;
639 /* Constant propagation */
640 /* is_inst_imm is only needed for binops */
641 if ((((def->opcode == OP_ICONST) || ((sizeof (gpointer) == 8) && (def->opcode == OP_I8CONST)) || (def->opcode == OP_PCONST)))
643 (!MONO_ARCH_USE_FPSTACK && (def->opcode == OP_R8CONST))) {
644 guint32 opcode2;
646 /* srcindex == 1 -> binop, ins->sreg2 == -1 -> unop */
647 if ((srcindex == 1) && (ins->sreg1 != -1) && defs [ins->sreg1] &&
648 ((defs [ins->sreg1]->opcode == OP_ICONST) || defs [ins->sreg1]->opcode == OP_PCONST) &&
649 defs [ins->sreg2]) {
650 /* Both arguments are constants, perform cfold */
651 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], defs [ins->sreg2], TRUE);
652 } else if ((srcindex == 0) && (ins->sreg2 != -1) && defs [ins->sreg2]) {
653 /* Arg 1 is constant, swap arguments if possible */
654 int opcode = ins->opcode;
655 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], defs [ins->sreg2], TRUE);
656 if (ins->opcode != opcode) {
657 /* Allow further iterations */
658 srcindex = -1;
659 continue;
661 } else if ((srcindex == 0) && (ins->sreg2 == -1)) {
662 /* Constant unop, perform cfold */
663 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], NULL, TRUE);
666 opcode2 = mono_op_to_op_imm (ins->opcode);
667 if ((opcode2 != -1) && mono_arch_is_inst_imm (ins->opcode, opcode2, def->inst_c0) && ((srcindex == 1) || (ins->sreg2 == -1))) {
668 ins->opcode = opcode2;
669 if ((def->opcode == OP_I8CONST) && TARGET_SIZEOF_VOID_P == 4)
670 ins->inst_l = def->inst_l;
671 else if (regtype == 'l' && TARGET_SIZEOF_VOID_P == 4)
672 /* This can happen if the def was a result of an iconst+conv.i8, which is transformed into just an iconst */
673 ins->inst_l = def->inst_c0;
674 else
675 ins->inst_imm = def->inst_c0;
676 sregs [srcindex] = -1;
677 mono_inst_set_src_registers (ins, sregs);
679 if ((opcode2 == OP_VOIDCALL) || (opcode2 == OP_CALL) || (opcode2 == OP_LCALL) || (opcode2 == OP_FCALL))
680 ((MonoCallInst*)ins)->fptr = (gpointer)(uintptr_t)ins->inst_imm;
682 /* Allow further iterations */
683 srcindex = -1;
684 continue;
686 else {
687 /* Special cases */
688 #if defined(TARGET_X86) || defined(TARGET_AMD64)
689 if ((ins->opcode == OP_X86_LEA) && (srcindex == 1)) {
690 #if SIZEOF_REGISTER == 8
691 /* FIXME: Use OP_PADD_IMM when the new JIT is done */
692 ins->opcode = OP_LADD_IMM;
693 #else
694 ins->opcode = OP_ADD_IMM;
695 #endif
696 ins->inst_imm += def->inst_c0 << ins->backend.shift_amount;
697 ins->sreg2 = -1;
699 #endif
700 opcode2 = mono_load_membase_to_load_mem (ins->opcode);
701 if ((srcindex == 0) && (opcode2 != -1) && mono_arch_is_inst_imm (ins->opcode, opcode2, def->inst_c0)) {
702 ins->opcode = opcode2;
703 ins->inst_imm = def->inst_c0 + ins->inst_offset;
704 ins->sreg1 = -1;
708 else if (((def->opcode == OP_ADD_IMM) || (def->opcode == OP_LADD_IMM)) && (MONO_IS_LOAD_MEMBASE (ins) || MONO_ARCH_IS_OP_MEMBASE (ins->opcode))) {
709 /* ADD_IMM is created by spill_global_vars */
711 * We have to guarantee that def->sreg1 haven't changed since def->dreg
712 * was defined. cfg->frame_reg is assumed to remain constant.
714 if ((def->sreg1 == cfg->frame_reg) || ((mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1))) {
715 ins->inst_basereg = def->sreg1;
716 ins->inst_offset += def->inst_imm;
718 } else if ((ins->opcode == OP_ISUB_IMM) && (def->opcode == OP_IADD_IMM) && (mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1)) {
719 ins->sreg1 = def->sreg1;
720 ins->inst_imm -= def->inst_imm;
721 } else if ((ins->opcode == OP_IADD_IMM) && (def->opcode == OP_ISUB_IMM) && (mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1)) {
722 ins->sreg1 = def->sreg1;
723 ins->inst_imm -= def->inst_imm;
724 } else if (ins->opcode == OP_STOREI1_MEMBASE_REG &&
725 (def->opcode == OP_ICONV_TO_U1 || def->opcode == OP_ICONV_TO_I1 || def->opcode == OP_SEXT_I4 || (SIZEOF_REGISTER == 8 && def->opcode == OP_LCONV_TO_U1)) &&
726 (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg]))) {
727 /* Avoid needless sign extension */
728 ins->sreg1 = def->sreg1;
729 } else if (ins->opcode == OP_STOREI2_MEMBASE_REG &&
730 (def->opcode == OP_ICONV_TO_U2 || def->opcode == OP_ICONV_TO_I2 || def->opcode == OP_SEXT_I4 || (SIZEOF_REGISTER == 8 && def->opcode == OP_LCONV_TO_I2)) &&
731 (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg]))) {
732 /* Avoid needless sign extension */
733 ins->sreg1 = def->sreg1;
734 } else if (ins->opcode == OP_COMPARE_IMM && def->opcode == OP_LDADDR && ins->inst_imm == 0) {
735 MonoInst dummy_arg1;
737 memset (&dummy_arg1, 0, sizeof (MonoInst));
738 dummy_arg1.opcode = OP_ICONST;
739 dummy_arg1.inst_c0 = 1;
741 mono_constant_fold_ins (cfg, ins, &dummy_arg1, NULL, TRUE);
742 } else if (srcindex == 0 && ins->opcode == OP_COMPARE && defs [ins->sreg1]->opcode == OP_PCONST && defs [ins->sreg2] && defs [ins->sreg2]->opcode == OP_PCONST) {
743 /* typeof(T) == typeof(..) */
744 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], defs [ins->sreg2], TRUE);
745 } else if (ins->opcode == OP_MOVE && def->opcode == OP_LDADDR) {
746 ins->opcode = OP_LDADDR;
747 ins->sreg1 = -1;
748 ins->inst_p0 = def->inst_p0;
749 ins->klass = def->klass;
753 g_assert (cfg->cbb == bb_opt);
754 g_assert (!bb_opt->code);
755 /* Do strength reduction here */
756 if (mono_strength_reduction_ins (cfg, ins, &spec) && max < cfg->next_vreg) {
757 MonoInst **defs_prev = defs;
758 gint32 *def_index_prev = def_index;
759 guint32 prev_max = max;
760 guint32 additional_vregs = cfg->next_vreg - initial_max_vregs;
762 /* We have more vregs so we need to reallocate defs and def_index arrays */
763 max = initial_max_vregs + additional_vregs * 2;
764 defs = (MonoInst **)mono_mempool_alloc (cfg->mempool, sizeof (MonoInst*) * max);
765 def_index = (gint32 *)mono_mempool_alloc (cfg->mempool, sizeof (guint32) * max);
767 /* Keep the entries for the previous vregs, zero the rest */
768 memcpy (defs, defs_prev, sizeof (MonoInst*) * prev_max);
769 memset (defs + prev_max, 0, sizeof (MonoInst*) * (max - prev_max));
770 memcpy (def_index, def_index_prev, sizeof (guint32) * prev_max);
771 memset (def_index + prev_max, 0, sizeof (guint32) * (max - prev_max));
774 if (cfg->cbb->code || (cfg->cbb != bb_opt)) {
775 MonoInst *saved_prev = ins->prev;
777 /* If we have code in cbb, we need to replace ins with the decomposition */
778 mono_replace_ins (cfg, bb, ins, &ins->prev, bb_opt, cfg->cbb);
779 bb_opt->code = bb_opt->last_ins = NULL;
780 bb_opt->in_count = bb_opt->out_count = 0;
781 cfg->cbb = bb_opt;
783 if (!saved_prev) {
784 /* first instruction of basic block got replaced, so create
785 * dummy inst that points to start of basic block */
786 MONO_INST_NEW (cfg, saved_prev, OP_NOP);
787 saved_prev = bb->code;
789 /* ins is hanging, continue scanning the emitted code */
790 ins = saved_prev;
791 continue;
794 if (spec [MONO_INST_DEST] != ' ') {
795 MonoInst *def = defs [ins->dreg];
797 if (def && (def->opcode == OP_ADD_IMM) && (def->sreg1 == cfg->frame_reg) && (MONO_IS_STORE_MEMBASE (ins))) {
798 /* ADD_IMM is created by spill_global_vars */
799 /* cfg->frame_reg is assumed to remain constant */
800 ins->inst_destbasereg = def->sreg1;
801 ins->inst_offset += def->inst_imm;
804 if (!MONO_IS_STORE_MEMBASE (ins) && !vreg_is_volatile (cfg, ins->dreg)) {
805 defs [ins->dreg] = ins;
806 def_index [ins->dreg] = ins_index;
810 if (MONO_IS_CALL (ins))
811 last_call_index = ins_index;
813 ins_index ++;
818 static gboolean
819 reg_is_softreg_no_fpstack (int reg, const char spec)
821 return (spec == 'i' && reg >= MONO_MAX_IREGS)
822 || ((spec == 'f' && reg >= MONO_MAX_FREGS) && !MONO_ARCH_USE_FPSTACK)
823 #ifdef MONO_ARCH_SIMD_INTRINSICS
824 || (spec == 'x' && reg >= MONO_MAX_XREGS)
825 #endif
826 || (spec == 'v');
829 static gboolean
830 reg_is_softreg (int reg, const char spec)
832 return (spec == 'i' && reg >= MONO_MAX_IREGS)
833 || (spec == 'f' && reg >= MONO_MAX_FREGS)
834 #ifdef MONO_ARCH_SIMD_INTRINSICS
835 || (spec == 'x' && reg >= MONO_MAX_XREGS)
836 #endif
837 || (spec == 'v');
840 static gboolean
841 mono_is_simd_accessor (MonoInst *ins)
843 switch (ins->opcode) {
844 #ifdef MONO_ARCH_SIMD_INTRINSICS
845 case OP_INSERT_I1:
846 case OP_INSERT_I2:
847 case OP_INSERT_I4:
848 case OP_INSERT_I8:
849 case OP_INSERT_R4:
850 case OP_INSERT_R8:
852 case OP_INSERTX_U1_SLOW:
853 case OP_INSERTX_I4_SLOW:
854 case OP_INSERTX_R4_SLOW:
855 case OP_INSERTX_R8_SLOW:
856 case OP_INSERTX_I8_SLOW:
857 return TRUE;
858 #endif
859 default:
860 return FALSE;
865 * mono_local_deadce:
867 * Get rid of the dead assignments to local vregs like the ones created by the
868 * copyprop pass.
870 void
871 mono_local_deadce (MonoCompile *cfg)
873 MonoBasicBlock *bb;
874 MonoInst *ins, *prev;
875 MonoBitSet *used, *defined;
877 //mono_print_code (cfg, "BEFORE LOCAL-DEADCE");
880 * Assignments to global vregs can't be eliminated so this pass must come
881 * after the handle_global_vregs () pass.
884 used = mono_bitset_mp_new_noinit (cfg->mempool, cfg->next_vreg + 1);
885 defined = mono_bitset_mp_new_noinit (cfg->mempool, cfg->next_vreg + 1);
887 /* First pass: collect liveness info */
888 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
889 /* Manually init the defs entries used by the bblock */
890 MONO_BB_FOR_EACH_INS (bb, ins) {
891 const char *spec = INS_INFO (ins->opcode);
892 int sregs [MONO_MAX_SRC_REGS];
893 int num_sregs, i;
895 if (spec [MONO_INST_DEST] != ' ') {
896 mono_bitset_clear_fast (used, ins->dreg);
897 mono_bitset_clear_fast (defined, ins->dreg);
898 #if SIZEOF_REGISTER == 4
899 /* Regpairs */
900 mono_bitset_clear_fast (used, ins->dreg + 1);
901 mono_bitset_clear_fast (defined, ins->dreg + 1);
902 #endif
904 num_sregs = mono_inst_get_src_registers (ins, sregs);
905 for (i = 0; i < num_sregs; ++i) {
906 mono_bitset_clear_fast (used, sregs [i]);
907 #if SIZEOF_REGISTER == 4
908 mono_bitset_clear_fast (used, sregs [i] + 1);
909 #endif
914 * Make a reverse pass over the instruction list
916 MONO_BB_FOR_EACH_INS_REVERSE_SAFE (bb, prev, ins) {
917 const char *spec = INS_INFO (ins->opcode);
918 int sregs [MONO_MAX_SRC_REGS];
919 int num_sregs, i;
920 MonoInst *prev_f = mono_inst_prev (ins, FILTER_NOP | FILTER_IL_SEQ_POINT);
922 if (ins->opcode == OP_NOP) {
923 MONO_DELETE_INS (bb, ins);
924 continue;
927 g_assert (ins->opcode > MONO_CEE_LAST);
929 if (MONO_IS_NON_FP_MOVE (ins) && prev_f) {
930 MonoInst *def;
931 const char *spec2;
933 def = prev_f;
934 spec2 = INS_INFO (def->opcode);
937 * Perform a limited kind of reverse copy propagation, i.e.
938 * transform B <- FOO; A <- B into A <- FOO
939 * This isn't copyprop, not deadce, but it can only be performed
940 * after handle_global_vregs () has run.
942 if (!get_vreg_to_inst (cfg, ins->sreg1) && (spec2 [MONO_INST_DEST] != ' ') && (def->dreg == ins->sreg1) && !mono_bitset_test_fast (used, ins->sreg1) && !MONO_IS_STORE_MEMBASE (def) && reg_is_softreg (ins->sreg1, spec [MONO_INST_DEST]) && !mono_is_simd_accessor (def)) {
943 if (cfg->verbose_level > 2) {
944 printf ("\tReverse copyprop in BB%d on ", bb->block_num);
945 mono_print_ins (ins);
948 def->dreg = ins->dreg;
949 MONO_DELETE_INS (bb, ins);
950 spec = INS_INFO (ins->opcode);
954 /* Enabling this on x86 could screw up the fp stack */
955 if (reg_is_softreg_no_fpstack (ins->dreg, spec [MONO_INST_DEST])) {
957 * Assignments to global vregs can only be eliminated if there is another
958 * assignment to the same vreg later in the same bblock.
960 if (!mono_bitset_test_fast (used, ins->dreg) &&
961 (!get_vreg_to_inst (cfg, ins->dreg) || (!bb->extended && !vreg_is_volatile (cfg, ins->dreg) && mono_bitset_test_fast (defined, ins->dreg))) &&
962 MONO_INS_HAS_NO_SIDE_EFFECT (ins)) {
963 /* Happens with CMOV instructions */
964 if (prev_f && prev_f->opcode == OP_ICOMPARE_IMM) {
965 MonoInst *prev = prev_f;
967 * Can't use DELETE_INS since that would interfere with the
968 * FOR_EACH_INS loop.
970 NULLIFY_INS (prev);
972 //printf ("DEADCE: "); mono_print_ins (ins);
973 MONO_DELETE_INS (bb, ins);
974 spec = INS_INFO (ins->opcode);
977 if (spec [MONO_INST_DEST] != ' ')
978 mono_bitset_clear_fast (used, ins->dreg);
981 if (spec [MONO_INST_DEST] != ' ')
982 mono_bitset_set_fast (defined, ins->dreg);
983 num_sregs = mono_inst_get_src_registers (ins, sregs);
984 for (i = 0; i < num_sregs; ++i)
985 mono_bitset_set_fast (used, sregs [i]);
986 if (MONO_IS_STORE_MEMBASE (ins))
987 mono_bitset_set_fast (used, ins->dreg);
989 if (MONO_IS_CALL (ins)) {
990 MonoCallInst *call = (MonoCallInst*)ins;
991 GSList *l;
993 if (call->out_ireg_args) {
994 for (l = call->out_ireg_args; l; l = l->next) {
995 guint32 regpair, reg;
997 regpair = (guint32)(gssize)(l->data);
998 reg = regpair & 0xffffff;
1000 mono_bitset_set_fast (used, reg);
1004 if (call->out_freg_args) {
1005 for (l = call->out_freg_args; l; l = l->next) {
1006 guint32 regpair, reg;
1008 regpair = (guint32)(gssize)(l->data);
1009 reg = regpair & 0xffffff;
1011 mono_bitset_set_fast (used, reg);
1018 //mono_print_code (cfg, "AFTER LOCAL-DEADCE");
1021 #else /* !DISABLE_JIT */
1023 MONO_EMPTY_SOURCE_FILE (local_propagation);
1025 #endif /* !DISABLE_JIT */