Relax constraints on Machine_Attribute argument types:
[official-gcc.git] / gcc / fwprop.c
blob669d03cc066e94ad4ccbcfb0e4452d71eeef6dbb
1 /* RTL-based forward propagation pass for GNU compiler.
2 Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Paolo Bonzini and Steven Bosscher.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "toplev.h"
27 #include "timevar.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "emit-rtl.h"
31 #include "insn-config.h"
32 #include "recog.h"
33 #include "flags.h"
34 #include "obstack.h"
35 #include "basic-block.h"
36 #include "output.h"
37 #include "df.h"
38 #include "target.h"
39 #include "cfgloop.h"
40 #include "tree-pass.h"
43 /* This pass does simple forward propagation and simplification when an
44 operand of an insn can only come from a single def. This pass uses
45 df.c, so it is global. However, we only do limited analysis of
46 available expressions.
48 1) The pass tries to propagate the source of the def into the use,
49 and checks if the result is independent of the substituted value.
50 For example, the high word of a (zero_extend:DI (reg:SI M)) is always
51 zero, independent of the source register.
53 In particular, we propagate constants into the use site. Sometimes
54 RTL expansion did not put the constant in the same insn on purpose,
55 to satisfy a predicate, and the result will fail to be recognized;
56 but this happens rarely and in this case we can still create a
57 REG_EQUAL note. For multi-word operations, this
59 (set (subreg:SI (reg:DI 120) 0) (const_int 0))
60 (set (subreg:SI (reg:DI 120) 4) (const_int -1))
61 (set (subreg:SI (reg:DI 122) 0)
62 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
63 (set (subreg:SI (reg:DI 122) 4)
64 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
66 can be simplified to the much simpler
68 (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
69 (set (subreg:SI (reg:DI 122) 4) (const_int -1))
71 This particular propagation is also effective at putting together
72 complex addressing modes. We are more aggressive inside MEMs, in
73 that all definitions are propagated if the use is in a MEM; if the
74 result is a valid memory address we check address_cost to decide
75 whether the substitution is worthwhile.
77 2) The pass propagates register copies. This is not as effective as
78 the copy propagation done by CSE's canon_reg, which works by walking
79 the instruction chain, it can help the other transformations.
81 We should consider removing this optimization, and instead reorder the
82 RTL passes, because GCSE does this transformation too. With some luck,
83 the CSE pass at the end of rest_of_handle_gcse could also go away.
85 3) The pass looks for paradoxical subregs that are actually unnecessary.
86 Things like this:
88 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
89 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
90 (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
91 (subreg:SI (reg:QI 121) 0)))
93 are very common on machines that can only do word-sized operations.
94 For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
95 if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
96 we can replace the paradoxical subreg with simply (reg:WIDE M). The
97 above will simplify this to
99 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
100 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
101 (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
103 where the first two insns are now dead. */
106 static int num_changes;
108 DEF_VEC_P(df_ref);
109 DEF_VEC_ALLOC_P(df_ref,heap);
110 VEC(df_ref,heap) *use_def_ref;
113 /* Return the only def in USE's use-def chain, or NULL if there is
114 more than one def in the chain. */
116 static inline df_ref
117 get_def_for_use (df_ref use)
119 return VEC_index (df_ref, use_def_ref, DF_REF_ID (use));
123 /* Return the only bit between FIRST and LAST that is set in B,
124 or -1 if there are zero or more than one such bits. */
126 static inline int
127 bitmap_only_bit_between (const_bitmap b, unsigned first, unsigned last)
129 bitmap_iterator bi;
130 unsigned bit, bit2;
132 if (last < first)
133 return -1;
135 bmp_iter_set_init (&bi, b, first, &bit);
136 if (bmp_iter_set (&bi, &bit) && bit <= last)
138 bit2 = bit;
139 bmp_iter_next (&bi, &bit2);
140 if (!bmp_iter_set (&bi, &bit2) || bit2 > last)
141 return bit;
143 return -1;
147 /* Fill the use_def_ref vector with values for the uses in USE_REC,
148 taking reaching definitions info from LOCAL_RD. TOP_FLAG says
149 which artificials uses should be used, when USE_REC is an
150 artificial use vector. */
152 static void
153 process_uses (bitmap local_rd, df_ref *use_rec, int top_flag)
155 df_ref use;
156 while ((use = *use_rec++) != NULL)
157 if (top_flag == (DF_REF_FLAGS (use) & DF_REF_AT_TOP))
159 unsigned int uregno = DF_REF_REGNO (use);
160 unsigned int first = DF_DEFS_BEGIN (uregno);
161 unsigned int last = first + DF_DEFS_COUNT (uregno) - 1;
162 int defno = bitmap_only_bit_between (local_rd, first, last);
163 df_ref def = (defno == -1) ? NULL : DF_DEFS_GET (defno);
165 VEC_replace (df_ref, use_def_ref, DF_REF_ID (use), def);
170 /* Do dataflow analysis and use reaching definitions to build
171 a vector holding the reaching definitions of uses that have a
172 single RD. */
174 static void
175 build_single_def_use_links (void)
177 basic_block bb;
178 bitmap local_rd = BITMAP_ALLOC (NULL);
180 /* We use reaching definitions to compute our restricted use-def chains. */
181 df_set_flags (DF_EQ_NOTES);
182 df_rd_add_problem ();
183 df_analyze ();
184 df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
186 use_def_ref = VEC_alloc (df_ref, heap, DF_USES_TABLE_SIZE ());
187 VEC_safe_grow (df_ref, heap, use_def_ref, DF_USES_TABLE_SIZE ());
189 FOR_EACH_BB (bb)
191 int bb_index = bb->index;
192 struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index);
193 rtx insn;
195 bitmap_copy (local_rd, bb_info->in);
196 process_uses (local_rd, df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
198 df_rd_simulate_artificial_defs_at_top (bb, local_rd);
199 FOR_BB_INSNS (bb, insn)
200 if (INSN_P (insn))
202 unsigned int uid = INSN_UID (insn);
203 process_uses (local_rd, DF_INSN_UID_USES (uid), 0);
204 process_uses (local_rd, DF_INSN_UID_EQ_USES (uid), 0);
205 df_rd_simulate_one_insn (bb, insn, local_rd);
208 process_uses (local_rd, df_get_artificial_uses (bb_index), 0);
211 BITMAP_FREE (local_rd);
214 /* Do not try to replace constant addresses or addresses of local and
215 argument slots. These MEM expressions are made only once and inserted
216 in many instructions, as well as being used to control symbol table
217 output. It is not safe to clobber them.
219 There are some uncommon cases where the address is already in a register
220 for some reason, but we cannot take advantage of that because we have
221 no easy way to unshare the MEM. In addition, looking up all stack
222 addresses is costly. */
224 static bool
225 can_simplify_addr (rtx addr)
227 rtx reg;
229 if (CONSTANT_ADDRESS_P (addr))
230 return false;
232 if (GET_CODE (addr) == PLUS)
233 reg = XEXP (addr, 0);
234 else
235 reg = addr;
237 return (!REG_P (reg)
238 || (REGNO (reg) != FRAME_POINTER_REGNUM
239 && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
240 && REGNO (reg) != ARG_POINTER_REGNUM));
243 /* Returns a canonical version of X for the address, from the point of view,
244 that all multiplications are represented as MULT instead of the multiply
245 by a power of 2 being represented as ASHIFT.
247 Every ASHIFT we find has been made by simplify_gen_binary and was not
248 there before, so it is not shared. So we can do this in place. */
250 static void
251 canonicalize_address (rtx x)
253 for (;;)
254 switch (GET_CODE (x))
256 case ASHIFT:
257 if (GET_CODE (XEXP (x, 1)) == CONST_INT
258 && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
259 && INTVAL (XEXP (x, 1)) >= 0)
261 HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
262 PUT_CODE (x, MULT);
263 XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
264 GET_MODE (x));
267 x = XEXP (x, 0);
268 break;
270 case PLUS:
271 if (GET_CODE (XEXP (x, 0)) == PLUS
272 || GET_CODE (XEXP (x, 0)) == ASHIFT
273 || GET_CODE (XEXP (x, 0)) == CONST)
274 canonicalize_address (XEXP (x, 0));
276 x = XEXP (x, 1);
277 break;
279 case CONST:
280 x = XEXP (x, 0);
281 break;
283 default:
284 return;
288 /* OLD is a memory address. Return whether it is good to use NEW instead,
289 for a memory access in the given MODE. */
291 static bool
292 should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode,
293 bool speed)
295 int gain;
297 if (rtx_equal_p (old_rtx, new_rtx) || !memory_address_p (mode, new_rtx))
298 return false;
300 /* Copy propagation is always ok. */
301 if (REG_P (old_rtx) && REG_P (new_rtx))
302 return true;
304 /* Prefer the new address if it is less expensive. */
305 gain = address_cost (old_rtx, mode, speed) - address_cost (new_rtx, mode, speed);
307 /* If the addresses have equivalent cost, prefer the new address
308 if it has the highest `rtx_cost'. That has the potential of
309 eliminating the most insns without additional costs, and it
310 is the same that cse.c used to do. */
311 if (gain == 0)
312 gain = rtx_cost (new_rtx, SET, speed) - rtx_cost (old_rtx, SET, speed);
314 return (gain > 0);
318 /* Flags for the last parameter of propagate_rtx_1. */
320 enum {
321 /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
322 if it is false, propagate_rtx_1 returns false if, for at least
323 one occurrence OLD, it failed to collapse the result to a constant.
324 For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
325 collapse to zero if replacing (reg:M B) with (reg:M A).
327 PR_CAN_APPEAR is disregarded inside MEMs: in that case,
328 propagate_rtx_1 just tries to make cheaper and valid memory
329 addresses. */
330 PR_CAN_APPEAR = 1,
332 /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
333 outside memory addresses. This is needed because propagate_rtx_1 does
334 not do any analysis on memory; thus it is very conservative and in general
335 it will fail if non-read-only MEMs are found in the source expression.
337 PR_HANDLE_MEM is set when the source of the propagation was not
338 another MEM. Then, it is safe not to treat non-read-only MEMs as
339 ``opaque'' objects. */
340 PR_HANDLE_MEM = 2,
342 /* Set when costs should be optimized for speed. */
343 PR_OPTIMIZE_FOR_SPEED = 4
347 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
348 resulting expression. Replace *PX with a new RTL expression if an
349 occurrence of OLD was found.
351 This is only a wrapper around simplify-rtx.c: do not add any pattern
352 matching code here. (The sole exception is the handling of LO_SUM, but
353 that is because there is no simplify_gen_* function for LO_SUM). */
355 static bool
356 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
358 rtx x = *px, tem = NULL_RTX, op0, op1, op2;
359 enum rtx_code code = GET_CODE (x);
360 enum machine_mode mode = GET_MODE (x);
361 enum machine_mode op_mode;
362 bool can_appear = (flags & PR_CAN_APPEAR) != 0;
363 bool valid_ops = true;
365 if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
367 /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
368 they have side effects or not). */
369 *px = (side_effects_p (x)
370 ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
371 : gen_rtx_SCRATCH (GET_MODE (x)));
372 return false;
375 /* If X is OLD_RTX, return NEW_RTX. But not if replacing only within an
376 address, and we are *not* inside one. */
377 if (x == old_rtx)
379 *px = new_rtx;
380 return can_appear;
383 /* If this is an expression, try recursive substitution. */
384 switch (GET_RTX_CLASS (code))
386 case RTX_UNARY:
387 op0 = XEXP (x, 0);
388 op_mode = GET_MODE (op0);
389 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
390 if (op0 == XEXP (x, 0))
391 return true;
392 tem = simplify_gen_unary (code, mode, op0, op_mode);
393 break;
395 case RTX_BIN_ARITH:
396 case RTX_COMM_ARITH:
397 op0 = XEXP (x, 0);
398 op1 = XEXP (x, 1);
399 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
400 valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
401 if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
402 return true;
403 tem = simplify_gen_binary (code, mode, op0, op1);
404 break;
406 case RTX_COMPARE:
407 case RTX_COMM_COMPARE:
408 op0 = XEXP (x, 0);
409 op1 = XEXP (x, 1);
410 op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
411 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
412 valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
413 if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
414 return true;
415 tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
416 break;
418 case RTX_TERNARY:
419 case RTX_BITFIELD_OPS:
420 op0 = XEXP (x, 0);
421 op1 = XEXP (x, 1);
422 op2 = XEXP (x, 2);
423 op_mode = GET_MODE (op0);
424 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
425 valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
426 valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
427 if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
428 return true;
429 if (op_mode == VOIDmode)
430 op_mode = GET_MODE (op0);
431 tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
432 break;
434 case RTX_EXTRA:
435 /* The only case we try to handle is a SUBREG. */
436 if (code == SUBREG)
438 op0 = XEXP (x, 0);
439 valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
440 if (op0 == XEXP (x, 0))
441 return true;
442 tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
443 SUBREG_BYTE (x));
445 break;
447 case RTX_OBJ:
448 if (code == MEM && x != new_rtx)
450 rtx new_op0;
451 op0 = XEXP (x, 0);
453 /* There are some addresses that we cannot work on. */
454 if (!can_simplify_addr (op0))
455 return true;
457 op0 = new_op0 = targetm.delegitimize_address (op0);
458 valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
459 flags | PR_CAN_APPEAR);
461 /* Dismiss transformation that we do not want to carry on. */
462 if (!valid_ops
463 || new_op0 == op0
464 || !(GET_MODE (new_op0) == GET_MODE (op0)
465 || GET_MODE (new_op0) == VOIDmode))
466 return true;
468 canonicalize_address (new_op0);
470 /* Copy propagations are always ok. Otherwise check the costs. */
471 if (!(REG_P (old_rtx) && REG_P (new_rtx))
472 && !should_replace_address (op0, new_op0, GET_MODE (x),
473 flags & PR_OPTIMIZE_FOR_SPEED))
474 return true;
476 tem = replace_equiv_address_nv (x, new_op0);
479 else if (code == LO_SUM)
481 op0 = XEXP (x, 0);
482 op1 = XEXP (x, 1);
484 /* The only simplification we do attempts to remove references to op0
485 or make it constant -- in both cases, op0's invalidity will not
486 make the result invalid. */
487 propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
488 valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
489 if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
490 return true;
492 /* (lo_sum (high x) x) -> x */
493 if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
494 tem = op1;
495 else
496 tem = gen_rtx_LO_SUM (mode, op0, op1);
498 /* OP1 is likely not a legitimate address, otherwise there would have
499 been no LO_SUM. We want it to disappear if it is invalid, return
500 false in that case. */
501 return memory_address_p (mode, tem);
504 else if (code == REG)
506 if (rtx_equal_p (x, old_rtx))
508 *px = new_rtx;
509 return can_appear;
512 break;
514 default:
515 break;
518 /* No change, no trouble. */
519 if (tem == NULL_RTX)
520 return true;
522 *px = tem;
524 /* The replacement we made so far is valid, if all of the recursive
525 replacements were valid, or we could simplify everything to
526 a constant. */
527 return valid_ops || can_appear || CONSTANT_P (tem);
531 /* for_each_rtx traversal function that returns 1 if BODY points to
532 a non-constant mem. */
534 static int
535 varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED)
537 rtx x = *body;
538 return MEM_P (x) && !MEM_READONLY_P (x);
542 /* Replace all occurrences of OLD in X with NEW and try to simplify the
543 resulting expression (in mode MODE). Return a new expression if it is
544 a constant, otherwise X.
546 Simplifications where occurrences of NEW collapse to a constant are always
547 accepted. All simplifications are accepted if NEW is a pseudo too.
548 Otherwise, we accept simplifications that have a lower or equal cost. */
550 static rtx
551 propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx,
552 bool speed)
554 rtx tem;
555 bool collapsed;
556 int flags;
558 if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
559 return NULL_RTX;
561 flags = 0;
562 if (REG_P (new_rtx) || CONSTANT_P (new_rtx))
563 flags |= PR_CAN_APPEAR;
564 if (!for_each_rtx (&new_rtx, varying_mem_p, NULL))
565 flags |= PR_HANDLE_MEM;
567 if (speed)
568 flags |= PR_OPTIMIZE_FOR_SPEED;
570 tem = x;
571 collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
572 if (tem == x || !collapsed)
573 return NULL_RTX;
575 /* gen_lowpart_common will not be able to process VOIDmode entities other
576 than CONST_INTs. */
577 if (GET_MODE (tem) == VOIDmode && GET_CODE (tem) != CONST_INT)
578 return NULL_RTX;
580 if (GET_MODE (tem) == VOIDmode)
581 tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
582 else
583 gcc_assert (GET_MODE (tem) == mode);
585 return tem;
591 /* Return true if the register from reference REF is killed
592 between FROM to (but not including) TO. */
594 static bool
595 local_ref_killed_between_p (df_ref ref, rtx from, rtx to)
597 rtx insn;
599 for (insn = from; insn != to; insn = NEXT_INSN (insn))
601 df_ref *def_rec;
602 if (!INSN_P (insn))
603 continue;
605 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
607 df_ref def = *def_rec;
608 if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
609 return true;
612 return false;
616 /* Check if the given DEF is available in INSN. This would require full
617 computation of available expressions; we check only restricted conditions:
618 - if DEF is the sole definition of its register, go ahead;
619 - in the same basic block, we check for no definitions killing the
620 definition of DEF_INSN;
621 - if USE's basic block has DEF's basic block as the sole predecessor,
622 we check if the definition is killed after DEF_INSN or before
623 TARGET_INSN insn, in their respective basic blocks. */
624 static bool
625 use_killed_between (df_ref use, rtx def_insn, rtx target_insn)
627 basic_block def_bb = BLOCK_FOR_INSN (def_insn);
628 basic_block target_bb = BLOCK_FOR_INSN (target_insn);
629 int regno;
630 df_ref def;
632 /* In some obscure situations we can have a def reaching a use
633 that is _before_ the def. In other words the def does not
634 dominate the use even though the use and def are in the same
635 basic block. This can happen when a register may be used
636 uninitialized in a loop. In such cases, we must assume that
637 DEF is not available. */
638 if (def_bb == target_bb
639 ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
640 : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
641 return true;
643 /* Check if the reg in USE has only one definition. We already
644 know that this definition reaches use, or we wouldn't be here.
645 However, this is invalid for hard registers because if they are
646 live at the beginning of the function it does not mean that we
647 have an uninitialized access. */
648 regno = DF_REF_REGNO (use);
649 def = DF_REG_DEF_CHAIN (regno);
650 if (def
651 && DF_REF_NEXT_REG (def) == NULL
652 && regno >= FIRST_PSEUDO_REGISTER)
653 return false;
655 /* Check locally if we are in the same basic block. */
656 if (def_bb == target_bb)
657 return local_ref_killed_between_p (use, def_insn, target_insn);
659 /* Finally, if DEF_BB is the sole predecessor of TARGET_BB. */
660 if (single_pred_p (target_bb)
661 && single_pred (target_bb) == def_bb)
663 df_ref x;
665 /* See if USE is killed between DEF_INSN and the last insn in the
666 basic block containing DEF_INSN. */
667 x = df_bb_regno_last_def_find (def_bb, regno);
668 if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
669 return true;
671 /* See if USE is killed between TARGET_INSN and the first insn in the
672 basic block containing TARGET_INSN. */
673 x = df_bb_regno_first_def_find (target_bb, regno);
674 if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
675 return true;
677 return false;
680 /* Otherwise assume the worst case. */
681 return true;
685 /* Check if all uses in DEF_INSN can be used in TARGET_INSN. This
686 would require full computation of available expressions;
687 we check only restricted conditions, see use_killed_between. */
688 static bool
689 all_uses_available_at (rtx def_insn, rtx target_insn)
691 df_ref *use_rec;
692 struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
693 rtx def_set = single_set (def_insn);
695 gcc_assert (def_set);
697 /* If target_insn comes right after def_insn, which is very common
698 for addresses, we can use a quicker test. */
699 if (NEXT_INSN (def_insn) == target_insn
700 && REG_P (SET_DEST (def_set)))
702 rtx def_reg = SET_DEST (def_set);
704 /* If the insn uses the reg that it defines, the substitution is
705 invalid. */
706 for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
708 df_ref use = *use_rec;
709 if (rtx_equal_p (DF_REF_REG (use), def_reg))
710 return false;
712 for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
714 df_ref use = *use_rec;
715 if (rtx_equal_p (DF_REF_REG (use), def_reg))
716 return false;
719 else
721 /* Look at all the uses of DEF_INSN, and see if they are not
722 killed between DEF_INSN and TARGET_INSN. */
723 for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
725 df_ref use = *use_rec;
726 if (use_killed_between (use, def_insn, target_insn))
727 return false;
729 for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
731 df_ref use = *use_rec;
732 if (use_killed_between (use, def_insn, target_insn))
733 return false;
737 return true;
741 struct find_occurrence_data
743 rtx find;
744 rtx *retval;
747 /* Callback for for_each_rtx, used in find_occurrence.
748 See if PX is the rtx we have to find. Return 1 to stop for_each_rtx
749 if successful, or 0 to continue traversing otherwise. */
751 static int
752 find_occurrence_callback (rtx *px, void *data)
754 struct find_occurrence_data *fod = (struct find_occurrence_data *) data;
755 rtx x = *px;
756 rtx find = fod->find;
758 if (x == find)
760 fod->retval = px;
761 return 1;
764 return 0;
767 /* Return a pointer to one of the occurrences of register FIND in *PX. */
769 static rtx *
770 find_occurrence (rtx *px, rtx find)
772 struct find_occurrence_data data;
774 gcc_assert (REG_P (find)
775 || (GET_CODE (find) == SUBREG
776 && REG_P (SUBREG_REG (find))));
778 data.find = find;
779 data.retval = NULL;
780 for_each_rtx (px, find_occurrence_callback, &data);
781 return data.retval;
785 /* Inside INSN, the expression rooted at *LOC has been changed, moving some
786 uses from USE_VEC. Find those that are present, and create new items
787 in the data flow object of the pass. Mark any new uses as having the
788 given TYPE. */
789 static void
790 update_df (rtx insn, rtx *loc, df_ref *use_rec, enum df_ref_type type,
791 int new_flags)
793 bool changed = false;
795 /* Add a use for the registers that were propagated. */
796 while (*use_rec)
798 df_ref use = *use_rec;
799 df_ref orig_use = use, new_use;
800 int width = -1;
801 int offset = -1;
802 enum machine_mode mode = VOIDmode;
803 rtx *new_loc = find_occurrence (loc, DF_REF_REG (orig_use));
804 use_rec++;
806 if (!new_loc)
807 continue;
809 if (DF_REF_FLAGS_IS_SET (orig_use, DF_REF_SIGN_EXTRACT | DF_REF_ZERO_EXTRACT))
811 width = DF_REF_EXTRACT_WIDTH (orig_use);
812 offset = DF_REF_EXTRACT_OFFSET (orig_use);
813 mode = DF_REF_EXTRACT_MODE (orig_use);
816 /* Add a new insn use. Use the original type, because it says if the
817 use was within a MEM. */
818 new_use = df_ref_create (DF_REF_REG (orig_use), new_loc,
819 insn, BLOCK_FOR_INSN (insn),
820 type, DF_REF_FLAGS (orig_use) | new_flags,
821 width, offset, mode);
823 /* Set up the use-def chain. */
824 gcc_assert (DF_REF_ID (new_use) == (int) VEC_length (df_ref, use_def_ref));
825 VEC_safe_push (df_ref, heap, use_def_ref, get_def_for_use (orig_use));
826 changed = true;
828 if (changed)
829 df_insn_rescan (insn);
833 /* Try substituting NEW into LOC, which originated from forward propagation
834 of USE's value from DEF_INSN. SET_REG_EQUAL says whether we are
835 substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
836 new insn is not recognized. Return whether the substitution was
837 performed. */
839 static bool
840 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal)
842 rtx insn = DF_REF_INSN (use);
843 enum df_ref_type type = DF_REF_TYPE (use);
844 int flags = DF_REF_FLAGS (use);
845 rtx set = single_set (insn);
846 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
847 int old_cost = rtx_cost (SET_SRC (set), SET, speed);
848 bool ok;
850 if (dump_file)
852 fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
853 print_inline_rtx (dump_file, *loc, 2);
854 fprintf (dump_file, "\n with ");
855 print_inline_rtx (dump_file, new_rtx, 2);
856 fprintf (dump_file, "\n");
859 validate_unshare_change (insn, loc, new_rtx, true);
860 if (!verify_changes (0))
862 if (dump_file)
863 fprintf (dump_file, "Changes to insn %d not recognized\n",
864 INSN_UID (insn));
865 ok = false;
868 else if (DF_REF_TYPE (use) == DF_REF_REG_USE
869 && rtx_cost (SET_SRC (set), SET, speed) > old_cost)
871 if (dump_file)
872 fprintf (dump_file, "Changes to insn %d not profitable\n",
873 INSN_UID (insn));
874 ok = false;
877 else
879 if (dump_file)
880 fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
881 ok = true;
884 if (ok)
886 confirm_change_group ();
887 num_changes++;
889 df_ref_remove (use);
890 if (!CONSTANT_P (new_rtx))
892 struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
893 update_df (insn, loc, DF_INSN_INFO_USES (insn_info), type, flags);
894 update_df (insn, loc, DF_INSN_INFO_EQ_USES (insn_info), type, flags);
897 else
899 cancel_changes (0);
901 /* Can also record a simplified value in a REG_EQUAL note,
902 making a new one if one does not already exist. */
903 if (set_reg_equal)
905 if (dump_file)
906 fprintf (dump_file, " Setting REG_EQUAL note\n");
908 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
910 /* ??? Is this still necessary if we add the note through
911 set_unique_reg_note? */
912 if (!CONSTANT_P (new_rtx))
914 struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
915 update_df (insn, loc, DF_INSN_INFO_USES (insn_info),
916 type, DF_REF_IN_NOTE);
917 update_df (insn, loc, DF_INSN_INFO_EQ_USES (insn_info),
918 type, DF_REF_IN_NOTE);
923 return ok;
927 /* If USE is a paradoxical subreg, see if it can be replaced by a pseudo. */
929 static bool
930 forward_propagate_subreg (df_ref use, rtx def_insn, rtx def_set)
932 rtx use_reg = DF_REF_REG (use);
933 rtx use_insn, src;
935 /* Only consider paradoxical subregs... */
936 enum machine_mode use_mode = GET_MODE (use_reg);
937 if (GET_CODE (use_reg) != SUBREG
938 || !REG_P (SET_DEST (def_set))
939 || GET_MODE_SIZE (use_mode)
940 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
941 return false;
943 /* If this is a paradoxical SUBREG, we have no idea what value the
944 extra bits would have. However, if the operand is equivalent to
945 a SUBREG whose operand is the same as our mode, and all the modes
946 are within a word, we can just use the inner operand because
947 these SUBREGs just say how to treat the register. */
948 use_insn = DF_REF_INSN (use);
949 src = SET_SRC (def_set);
950 if (GET_CODE (src) == SUBREG
951 && REG_P (SUBREG_REG (src))
952 && GET_MODE (SUBREG_REG (src)) == use_mode
953 && subreg_lowpart_p (src)
954 && all_uses_available_at (def_insn, use_insn))
955 return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
956 def_insn, false);
957 else
958 return false;
961 /* Try to replace USE with SRC (defined in DEF_INSN) in __asm. */
963 static bool
964 forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg)
966 rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc;
967 int speed_p, i;
968 df_ref *use_vec;
970 gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
972 src = SET_SRC (def_set);
973 use_pat = PATTERN (use_insn);
975 /* In __asm don't replace if src might need more registers than
976 reg, as that could increase register pressure on the __asm. */
977 use_vec = DF_INSN_USES (def_insn);
978 if (use_vec[0] && use_vec[1])
979 return false;
981 speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
982 asm_operands = NULL_RTX;
983 switch (GET_CODE (use_pat))
985 case ASM_OPERANDS:
986 asm_operands = use_pat;
987 break;
988 case SET:
989 if (MEM_P (SET_DEST (use_pat)))
991 loc = &SET_DEST (use_pat);
992 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
993 if (new_rtx)
994 validate_unshare_change (use_insn, loc, new_rtx, true);
996 asm_operands = SET_SRC (use_pat);
997 break;
998 case PARALLEL:
999 for (i = 0; i < XVECLEN (use_pat, 0); i++)
1000 if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1002 if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1004 loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1005 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1006 src, speed_p);
1007 if (new_rtx)
1008 validate_unshare_change (use_insn, loc, new_rtx, true);
1010 asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1012 else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1013 asm_operands = XVECEXP (use_pat, 0, i);
1014 break;
1015 default:
1016 gcc_unreachable ();
1019 gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1020 for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1022 loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1023 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1024 if (new_rtx)
1025 validate_unshare_change (use_insn, loc, new_rtx, true);
1028 if (num_changes_pending () == 0 || !apply_change_group ())
1029 return false;
1031 num_changes++;
1032 return true;
1035 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1036 result. */
1038 static bool
1039 forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set)
1041 rtx use_insn = DF_REF_INSN (use);
1042 rtx use_set = single_set (use_insn);
1043 rtx src, reg, new_rtx, *loc;
1044 bool set_reg_equal;
1045 enum machine_mode mode;
1046 int asm_use = -1;
1048 if (INSN_CODE (use_insn) < 0)
1049 asm_use = asm_noperands (PATTERN (use_insn));
1051 if (!use_set && asm_use < 0)
1052 return false;
1054 /* Do not propagate into PC, CC0, etc. */
1055 if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1056 return false;
1058 /* If def and use are subreg, check if they match. */
1059 reg = DF_REF_REG (use);
1060 if (GET_CODE (reg) == SUBREG
1061 && GET_CODE (SET_DEST (def_set)) == SUBREG
1062 && (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg)
1063 || GET_MODE (SET_DEST (def_set)) != GET_MODE (reg)))
1064 return false;
1066 /* Check if the def had a subreg, but the use has the whole reg. */
1067 if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1068 return false;
1070 /* Check if the use has a subreg, but the def had the whole reg. Unlike the
1071 previous case, the optimization is possible and often useful indeed. */
1072 if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1073 reg = SUBREG_REG (reg);
1075 /* Check if the substitution is valid (last, because it's the most
1076 expensive check!). */
1077 src = SET_SRC (def_set);
1078 if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1079 return false;
1081 /* Check if the def is loading something from the constant pool; in this
1082 case we would undo optimization such as compress_float_constant.
1083 Still, we can set a REG_EQUAL note. */
1084 if (MEM_P (src) && MEM_READONLY_P (src))
1086 rtx x = avoid_constant_pool_reference (src);
1087 if (x != src && use_set)
1089 rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1090 rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1091 rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1092 if (old_rtx != new_rtx)
1093 set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1095 return false;
1098 if (asm_use >= 0)
1099 return forward_propagate_asm (use, def_insn, def_set, reg);
1101 /* Else try simplifying. */
1103 if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1105 loc = &SET_DEST (use_set);
1106 set_reg_equal = false;
1108 else
1110 rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1111 if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1112 loc = &XEXP (note, 0);
1113 else
1114 loc = &SET_SRC (use_set);
1116 /* Do not replace an existing REG_EQUAL note if the insn is not
1117 recognized. Either we're already replacing in the note, or
1118 we'll separately try plugging the definition in the note and
1119 simplifying. */
1120 set_reg_equal = (note == NULL_RTX);
1123 if (GET_MODE (*loc) == VOIDmode)
1124 mode = GET_MODE (SET_DEST (use_set));
1125 else
1126 mode = GET_MODE (*loc);
1128 new_rtx = propagate_rtx (*loc, mode, reg, src,
1129 optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1131 if (!new_rtx)
1132 return false;
1134 return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1138 /* Given a use USE of an insn, if it has a single reaching
1139 definition, try to forward propagate it into that insn. */
1141 static void
1142 forward_propagate_into (df_ref use)
1144 df_ref def;
1145 rtx def_insn, def_set, use_insn;
1146 rtx parent;
1148 if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1149 return;
1150 if (DF_REF_IS_ARTIFICIAL (use))
1151 return;
1153 /* Only consider uses that have a single definition. */
1154 def = get_def_for_use (use);
1155 if (!def)
1156 return;
1157 if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1158 return;
1159 if (DF_REF_IS_ARTIFICIAL (def))
1160 return;
1162 /* Do not propagate loop invariant definitions inside the loop. */
1163 if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1164 return;
1166 /* Check if the use is still present in the insn! */
1167 use_insn = DF_REF_INSN (use);
1168 if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1169 parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1170 else
1171 parent = PATTERN (use_insn);
1173 if (!reg_mentioned_p (DF_REF_REG (use), parent))
1174 return;
1176 def_insn = DF_REF_INSN (def);
1177 if (multiple_sets (def_insn))
1178 return;
1179 def_set = single_set (def_insn);
1180 if (!def_set)
1181 return;
1183 /* Only try one kind of propagation. If two are possible, we'll
1184 do it on the following iterations. */
1185 if (!forward_propagate_and_simplify (use, def_insn, def_set))
1186 forward_propagate_subreg (use, def_insn, def_set);
1190 static void
1191 fwprop_init (void)
1193 num_changes = 0;
1194 calculate_dominance_info (CDI_DOMINATORS);
1196 /* We do not always want to propagate into loops, so we have to find
1197 loops and be careful about them. But we have to call flow_loops_find
1198 before df_analyze, because flow_loops_find may introduce new jump
1199 insns (sadly) if we are not working in cfglayout mode. */
1200 loop_optimizer_init (0);
1202 build_single_def_use_links ();
1203 df_set_flags (DF_DEFER_INSN_RESCAN);
1206 static void
1207 fwprop_done (void)
1209 loop_optimizer_finalize ();
1211 VEC_free (df_ref, heap, use_def_ref);
1212 free_dominance_info (CDI_DOMINATORS);
1213 cleanup_cfg (0);
1214 delete_trivially_dead_insns (get_insns (), max_reg_num ());
1216 if (dump_file)
1217 fprintf (dump_file,
1218 "\nNumber of successful forward propagations: %d\n\n",
1219 num_changes);
1220 df_remove_problem (df_chain);
1225 /* Main entry point. */
1227 static bool
1228 gate_fwprop (void)
1230 return optimize > 0 && flag_forward_propagate;
1233 static unsigned int
1234 fwprop (void)
1236 unsigned i;
1238 fwprop_init ();
1240 /* Go through all the uses. update_df will create new ones at the
1241 end, and we'll go through them as well.
1243 Do not forward propagate addresses into loops until after unrolling.
1244 CSE did so because it was able to fix its own mess, but we are not. */
1246 for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1248 df_ref use = DF_USES_GET (i);
1249 if (use)
1250 if (DF_REF_TYPE (use) == DF_REF_REG_USE
1251 || DF_REF_BB (use)->loop_father == NULL
1252 /* The outer most loop is not really a loop. */
1253 || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1254 forward_propagate_into (use);
1257 fwprop_done ();
1258 return 0;
1261 struct rtl_opt_pass pass_rtl_fwprop =
1264 RTL_PASS,
1265 "fwprop1", /* name */
1266 gate_fwprop, /* gate */
1267 fwprop, /* execute */
1268 NULL, /* sub */
1269 NULL, /* next */
1270 0, /* static_pass_number */
1271 TV_FWPROP, /* tv_id */
1272 0, /* properties_required */
1273 0, /* properties_provided */
1274 0, /* properties_destroyed */
1275 0, /* todo_flags_start */
1276 TODO_df_finish | TODO_verify_rtl_sharing |
1277 TODO_dump_func /* todo_flags_finish */
1281 static unsigned int
1282 fwprop_addr (void)
1284 unsigned i;
1285 fwprop_init ();
1287 /* Go through all the uses. update_df will create new ones at the
1288 end, and we'll go through them as well. */
1289 for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1291 df_ref use = DF_USES_GET (i);
1292 if (use)
1293 if (DF_REF_TYPE (use) != DF_REF_REG_USE
1294 && DF_REF_BB (use)->loop_father != NULL
1295 /* The outer most loop is not really a loop. */
1296 && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1297 forward_propagate_into (use);
1300 fwprop_done ();
1302 return 0;
1305 struct rtl_opt_pass pass_rtl_fwprop_addr =
1308 RTL_PASS,
1309 "fwprop2", /* name */
1310 gate_fwprop, /* gate */
1311 fwprop_addr, /* execute */
1312 NULL, /* sub */
1313 NULL, /* next */
1314 0, /* static_pass_number */
1315 TV_FWPROP, /* tv_id */
1316 0, /* properties_required */
1317 0, /* properties_provided */
1318 0, /* properties_destroyed */
1319 0, /* todo_flags_start */
1320 TODO_df_finish | TODO_verify_rtl_sharing |
1321 TODO_dump_func /* todo_flags_finish */