Daily bump.
[official-gcc.git] / gcc / fwprop.cc
blob2ebb2f146cc6a3f2480bcec60079b941c110cb3d
1 /* RTL-based forward propagation pass for GNU compiler.
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 Contributed by Paolo Bonzini and Steven Bosscher.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define INCLUDE_ALGORITHM
22 #define INCLUDE_FUNCTIONAL
23 #define INCLUDE_ARRAY
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "rtlanal.h"
30 #include "df.h"
31 #include "rtl-ssa.h"
33 #include "predict.h"
34 #include "cfgrtl.h"
35 #include "cfgcleanup.h"
36 #include "cfgloop.h"
37 #include "tree-pass.h"
38 #include "rtl-iter.h"
39 #include "target.h"
41 /* This pass does simple forward propagation and simplification when an
42 operand of an insn can only come from a single def. This pass uses
43 RTL SSA, so it is global. However, we only do limited analysis of
44 available expressions.
46 1) The pass tries to propagate the source of the def into the use,
47 and checks if the result is independent of the substituted value.
48 For example, the high word of a (zero_extend:DI (reg:SI M)) is always
49 zero, independent of the source register.
51 In particular, we propagate constants into the use site. Sometimes
52 RTL expansion did not put the constant in the same insn on purpose,
53 to satisfy a predicate, and the result will fail to be recognized;
54 but this happens rarely and in this case we can still create a
55 REG_EQUAL note. For multi-word operations, this
57 (set (subreg:SI (reg:DI 120) 0) (const_int 0))
58 (set (subreg:SI (reg:DI 120) 4) (const_int -1))
59 (set (subreg:SI (reg:DI 122) 0)
60 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
61 (set (subreg:SI (reg:DI 122) 4)
62 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
64 can be simplified to the much simpler
66 (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
67 (set (subreg:SI (reg:DI 122) 4) (const_int -1))
69 This particular propagation is also effective at putting together
70 complex addressing modes. We are more aggressive inside MEMs, in
71 that all definitions are propagated if the use is in a MEM; if the
72 result is a valid memory address we check address_cost to decide
73 whether the substitution is worthwhile.
75 2) The pass propagates register copies. This is not as effective as
76 the copy propagation done by CSE's canon_reg, which works by walking
77 the instruction chain, it can help the other transformations.
79 We should consider removing this optimization, and instead reorder the
80 RTL passes, because GCSE does this transformation too. With some luck,
81 the CSE pass at the end of rest_of_handle_gcse could also go away.
83 3) The pass looks for paradoxical subregs that are actually unnecessary.
84 Things like this:
86 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
87 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
88 (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
89 (subreg:SI (reg:QI 121) 0)))
91 are very common on machines that can only do word-sized operations.
92 For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
93 if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
94 we can replace the paradoxical subreg with simply (reg:WIDE M). The
95 above will simplify this to
97 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
98 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
99 (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
101 where the first two insns are now dead. */
103 using namespace rtl_ssa;
105 static int num_changes;
107 /* Do not try to replace constant addresses or addresses of local and
108 argument slots. These MEM expressions are made only once and inserted
109 in many instructions, as well as being used to control symbol table
110 output. It is not safe to clobber them.
112 There are some uncommon cases where the address is already in a register
113 for some reason, but we cannot take advantage of that because we have
114 no easy way to unshare the MEM. In addition, looking up all stack
115 addresses is costly. */
117 static bool
118 can_simplify_addr (rtx addr)
120 rtx reg;
122 if (CONSTANT_ADDRESS_P (addr))
123 return false;
125 if (GET_CODE (addr) == PLUS)
126 reg = XEXP (addr, 0);
127 else
128 reg = addr;
130 return (!REG_P (reg)
131 || (REGNO (reg) != FRAME_POINTER_REGNUM
132 && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
133 && REGNO (reg) != ARG_POINTER_REGNUM));
136 /* MEM is the result of an address simplification, and temporarily
137 undoing changes OLD_NUM_CHANGES onwards restores the original address.
138 Return whether it is good to use the new address instead of the
139 old one. INSN is the containing instruction. */
141 static bool
142 should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
144 int gain;
146 /* Prefer the new address if it is less expensive. */
147 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
148 temporarily_undo_changes (old_num_changes);
149 gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
150 MEM_ADDR_SPACE (mem), speed);
151 redo_changes (old_num_changes);
152 gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
153 MEM_ADDR_SPACE (mem), speed);
155 /* If the addresses have equivalent cost, prefer the new address
156 if it has the highest `set_src_cost'. That has the potential of
157 eliminating the most insns without additional costs, and it
158 is the same that cse.cc used to do. */
159 if (gain == 0)
161 gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
162 temporarily_undo_changes (old_num_changes);
163 gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
164 redo_changes (old_num_changes);
167 return (gain > 0);
171 namespace
173 class fwprop_propagation : public insn_propagation
175 public:
176 static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
177 static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
178 static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
180 fwprop_propagation (insn_info *, set_info *, rtx, rtx);
182 bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
183 bool folded_to_constants_p () const;
184 bool likely_profitable_p () const;
186 bool check_mem (int, rtx) final override;
187 void note_simplification (int, uint16_t, rtx, rtx) final override;
188 uint16_t classify_result (rtx, rtx);
190 private:
191 const bool single_use_p;
192 const bool single_ebb_p;
196 /* Prepare to replace FROM with TO in USE_INSN. */
198 fwprop_propagation::fwprop_propagation (insn_info *use_insn,
199 set_info *def, rtx from, rtx to)
200 : insn_propagation (use_insn->rtl (), from, to),
201 single_use_p (def->single_nondebug_use ()),
202 single_ebb_p (use_insn->ebb () == def->ebb ())
204 should_check_mems = true;
205 should_note_simplifications = true;
208 /* MEM is the result of an address simplification, and temporarily
209 undoing changes OLD_NUM_CHANGES onwards restores the original address.
210 Return true if the propagation should continue, false if it has failed. */
212 bool
213 fwprop_propagation::check_mem (int old_num_changes, rtx mem)
215 if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
216 MEM_ADDR_SPACE (mem)))
218 failure_reason = "would create an invalid MEM";
219 return false;
222 temporarily_undo_changes (old_num_changes);
223 bool can_simplify = can_simplify_addr (XEXP (mem, 0));
224 redo_changes (old_num_changes);
225 if (!can_simplify)
227 failure_reason = "would replace a frame address";
228 return false;
231 /* Copy propagations are always ok. Otherwise check the costs. */
232 if (!(REG_P (from) && REG_P (to))
233 && !should_replace_address (old_num_changes, mem, insn))
235 failure_reason = "would increase the cost of a MEM";
236 return false;
239 result_flags |= CHANGED_MEM;
240 return true;
243 /* OLDX has been simplified to NEWX. Describe the change in terms of
244 result_flags. */
246 uint16_t
247 fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
249 if (CONSTANT_P (new_rtx))
251 /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
252 and NEW_RTX is likely not a legitimate address. We want it to
253 disappear if it is invalid.
255 ??? Using the mode of the LO_SUM as the mode of the address
256 seems odd, but it was what the pre-SSA code did. */
257 if (GET_CODE (old_rtx) == LO_SUM
258 && !memory_address_p (GET_MODE (old_rtx), new_rtx))
259 return CONSTANT;
260 return CONSTANT | PROFITABLE;
263 /* Allow replacements that simplify operations on a vector or complex
264 value to a component. The most prominent case is
265 (subreg ([vec_]concat ...)). */
266 if (REG_P (new_rtx)
267 && !HARD_REGISTER_P (new_rtx)
268 && (VECTOR_MODE_P (GET_MODE (from))
269 || COMPLEX_MODE_P (GET_MODE (from)))
270 && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
271 return PROFITABLE;
273 /* Allow (subreg (mem)) -> (mem) simplifications with the following
274 exceptions:
275 1) Propagating (mem)s into multiple uses is not profitable.
276 2) Propagating (mem)s across EBBs may not be profitable if the source EBB
277 runs less frequently.
278 3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
279 4) Creating new (mem/v)s is not correct, since DCE will not remove the old
280 ones. */
281 if (single_use_p
282 && single_ebb_p
283 && SUBREG_P (old_rtx)
284 && !paradoxical_subreg_p (old_rtx)
285 && MEM_P (new_rtx)
286 && !MEM_VOLATILE_P (new_rtx))
287 return PROFITABLE;
289 return 0;
292 /* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES
293 is the number of unrelated changes that had been made before processing
294 OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags
295 had at that point. */
297 void
298 fwprop_propagation::note_simplification (int old_num_changes,
299 uint16_t old_result_flags,
300 rtx old_rtx, rtx new_rtx)
302 result_flags &= ~(CONSTANT | PROFITABLE);
303 uint16_t new_flags = classify_result (old_rtx, new_rtx);
304 if (old_num_changes)
305 new_flags &= old_result_flags;
306 result_flags |= new_flags;
309 /* Return true if all substitutions eventually folded to constants. */
311 bool
312 fwprop_propagation::folded_to_constants_p () const
314 /* If we're propagating a HIGH, require it to be folded with a
315 partnering LO_SUM. For example, a REG_EQUAL note with a register
316 replaced by an unfolded HIGH is not useful. */
317 if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
318 return true;
319 return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
323 /* Return true if it is worth keeping the result of the propagation,
324 false if it would increase the complexity of the pattern too much. */
326 bool
327 fwprop_propagation::likely_profitable_p () const
329 if (changed_mem_p ())
330 return true;
332 if (!(result_flags & UNSIMPLIFIED)
333 && (result_flags & PROFITABLE))
334 return true;
336 if (REG_P (to))
337 return true;
339 if (GET_CODE (to) == SUBREG
340 && REG_P (SUBREG_REG (to))
341 && !paradoxical_subreg_p (to))
342 return true;
344 if (CONSTANT_P (to))
345 return true;
347 return false;
350 /* Check that X has a single def. */
352 static bool
353 reg_single_def_p (rtx x)
355 return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
358 /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
359 USE_INSN. Return the number of substitutions on success, otherwise return
360 -1 and leave USE_INSN unchanged.
362 If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC
363 to fold to a constant, so that the note does not use any more registers
364 than it did previously. If REQUIRE_CONSTANT is false, also allow the
365 substitution if it's something we'd normally allow for the main
366 instruction pattern. */
368 static int
369 try_fwprop_subst_note (insn_info *use_insn, set_info *def,
370 rtx note, rtx dest, rtx src, bool require_constant)
372 rtx_insn *use_rtl = use_insn->rtl ();
373 insn_info *def_insn = def->insn ();
375 insn_change_watermark watermark;
376 fwprop_propagation prop (use_insn, def, dest, src);
377 if (!prop.apply_to_rvalue (&XEXP (note, 0)))
379 if (dump_file && (dump_flags & TDF_DETAILS))
380 fprintf (dump_file, "cannot propagate from insn %d into"
381 " notes of insn %d: %s\n", def_insn->uid (),
382 use_insn->uid (), prop.failure_reason);
383 return -1;
386 if (prop.num_replacements == 0)
387 return 0;
389 if (require_constant)
391 if (!prop.folded_to_constants_p ())
393 if (dump_file && (dump_flags & TDF_DETAILS))
394 fprintf (dump_file, "cannot propagate from insn %d into"
395 " notes of insn %d: %s\n", def_insn->uid (),
396 use_insn->uid (), "wouldn't fold to constants");
397 return -1;
400 else
402 if (!prop.folded_to_constants_p () && !prop.likely_profitable_p ())
404 if (dump_file && (dump_flags & TDF_DETAILS))
405 fprintf (dump_file, "cannot propagate from insn %d into"
406 " notes of insn %d: %s\n", def_insn->uid (),
407 use_insn->uid (), "would increase complexity of node");
408 return -1;
412 if (dump_file && (dump_flags & TDF_DETAILS))
414 fprintf (dump_file, "\nin notes of insn %d, replacing:\n ",
415 INSN_UID (use_rtl));
416 temporarily_undo_changes (0);
417 print_inline_rtx (dump_file, note, 2);
418 redo_changes (0);
419 fprintf (dump_file, "\n with:\n ");
420 print_inline_rtx (dump_file, note, 2);
421 fprintf (dump_file, "\n");
423 watermark.keep ();
424 return prop.num_replacements;
427 /* Try to substitute (set DEST SRC), which defines DEF, into location LOC of
428 USE_INSN's pattern. Return true on success, otherwise leave USE_INSN
429 unchanged. */
431 static bool
432 try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
433 set_info *def, rtx *loc, rtx dest, rtx src)
435 insn_info *use_insn = use_change.insn ();
436 rtx_insn *use_rtl = use_insn->rtl ();
437 insn_info *def_insn = def->insn ();
439 insn_change_watermark watermark;
440 fwprop_propagation prop (use_insn, def, dest, src);
441 if (!prop.apply_to_pattern (loc))
443 if (dump_file && (dump_flags & TDF_DETAILS))
444 fprintf (dump_file, "cannot propagate from insn %d into"
445 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
446 prop.failure_reason);
447 return false;
450 if (prop.num_replacements == 0)
451 return false;
453 if (!prop.likely_profitable_p ()
454 && (prop.changed_mem_p ()
455 || contains_mem_rtx_p (src)
456 || use_insn->is_asm ()
457 || use_insn->is_debug_insn ()))
459 if (dump_file && (dump_flags & TDF_DETAILS))
460 fprintf (dump_file, "cannot propagate from insn %d into"
461 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
462 "would increase complexity of pattern");
463 return false;
466 if (dump_file && (dump_flags & TDF_DETAILS))
468 fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
469 def_insn->uid (), use_insn->uid ());
470 temporarily_undo_changes (0);
471 print_rtl_single (dump_file, PATTERN (use_rtl));
472 redo_changes (0);
475 bool ok = recog (attempt, use_change);
476 if (ok
477 && !prop.changed_mem_p ()
478 && !use_insn->is_asm ()
479 && !use_insn->is_debug_insn ())
481 bool strict_p = !prop.likely_profitable_p ();
482 if (!change_is_worthwhile (use_change, strict_p))
484 if (dump_file)
485 fprintf (dump_file, "change not profitable");
486 ok = false;
490 if (!ok)
492 /* The pattern didn't match, but if all uses of SRC folded to
493 constants, we can add a REG_EQUAL note for the result, if there
494 isn't one already. */
495 if (!prop.folded_to_constants_p ())
496 return false;
498 /* Test this first to avoid creating an unnecessary copy of SRC. */
499 if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
500 return false;
502 rtx set = set_for_reg_notes (use_rtl);
503 if (!set || !REG_P (SET_DEST (set)))
504 return false;
506 rtx value = copy_rtx (SET_SRC (set));
507 cancel_changes (0);
509 /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
510 because the bits in there can be anything and so might not
511 match the REG_EQUAL note content. See PR70574. */
512 if (contains_paradoxical_subreg_p (SET_SRC (set)))
513 return false;
515 if (dump_file && (dump_flags & TDF_DETAILS))
516 fprintf (dump_file, " Setting REG_EQUAL note\n");
518 return set_unique_reg_note (use_rtl, REG_EQUAL, value);
521 rtx *note_ptr = &REG_NOTES (use_rtl);
522 while (rtx note = *note_ptr)
524 if ((REG_NOTE_KIND (note) == REG_EQUAL
525 || REG_NOTE_KIND (note) == REG_EQUIV)
526 && try_fwprop_subst_note (use_insn, def, note, dest, src, false) < 0)
528 *note_ptr = XEXP (note, 1);
529 free_EXPR_LIST_node (note);
531 else
532 note_ptr = &XEXP (note, 1);
535 confirm_change_group ();
536 crtl->ssa->change_insn (use_change);
537 num_changes++;
538 return true;
541 /* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes,
542 given that it was not possible to do this for USE_INSN's main pattern.
543 Return true on success, otherwise leave USE_INSN unchanged. */
545 static bool
546 try_fwprop_subst_notes (insn_info *use_insn, set_info *def,
547 rtx dest, rtx src)
549 rtx_insn *use_rtl = use_insn->rtl ();
550 for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
551 if ((REG_NOTE_KIND (note) == REG_EQUAL
552 || REG_NOTE_KIND (note) == REG_EQUIV)
553 && try_fwprop_subst_note (use_insn, def, note, dest, src, true) > 0)
555 confirm_change_group ();
556 return true;
559 return false;
562 /* Check whether we could validly substitute (set DEST SRC), which defines DEF,
563 into USE. If so, first try performing the substitution in location LOC
564 of USE->insn ()'s pattern. If that fails, try instead to substitute
565 into the notes.
567 Return true on success, otherwise leave USE_INSN unchanged. */
569 static bool
570 try_fwprop_subst (use_info *use, set_info *def,
571 rtx *loc, rtx dest, rtx src)
573 insn_info *use_insn = use->insn ();
574 insn_info *def_insn = def->insn ();
576 auto attempt = crtl->ssa->new_change_attempt ();
577 use_array src_uses = remove_note_accesses (attempt, def_insn->uses ());
579 /* ??? Not really a meaningful test: it means we can propagate arithmetic
580 involving hard registers but not bare references to them. A better
581 test would be to iterate over src_uses looking for hard registers
582 that are not fixed. */
583 if (REG_P (src) && HARD_REGISTER_P (src))
584 return false;
586 /* ??? It would be better to make this EBB-based instead. That would
587 involve checking for equal EBBs rather than equal BBs and trying
588 to make the uses available at use_insn->ebb ()->first_bb (). */
589 if (def_insn->bb () != use_insn->bb ())
591 src_uses = crtl->ssa->make_uses_available (attempt, src_uses,
592 use_insn->bb (),
593 use_insn->is_debug_insn ());
594 if (!src_uses.is_valid ())
595 return false;
598 insn_change use_change (use_insn);
599 use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses,
600 src_uses);
601 if (!use_change.new_uses.is_valid ())
602 return false;
604 /* ??? We could allow movement within the EBB by adding:
606 use_change.move_range = use_insn->ebb ()->insn_range (); */
607 if (!restrict_movement (use_change))
608 return false;
610 return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src)
611 || try_fwprop_subst_notes (use_insn, def, dest, src));
614 /* For the given single_set INSN, containing SRC known to be a
615 ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
616 is redundant due to the register being set by a LOAD_EXTEND_OP
617 load from memory. */
619 static bool
620 free_load_extend (rtx src, insn_info *insn)
622 rtx reg = XEXP (src, 0);
623 if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
624 return false;
626 def_info *def = nullptr;
627 for (use_info *use : insn->uses ())
628 if (use->regno () == REGNO (reg))
630 def = use->def ();
631 break;
634 if (!def)
635 return false;
637 insn_info *def_insn = def->insn ();
638 if (def_insn->is_artificial ())
639 return false;
641 rtx_insn *def_rtl = def_insn->rtl ();
642 if (NONJUMP_INSN_P (def_rtl))
644 rtx patt = PATTERN (def_rtl);
646 if (GET_CODE (patt) == SET
647 && GET_CODE (SET_SRC (patt)) == MEM
648 && rtx_equal_p (SET_DEST (patt), reg))
649 return true;
651 return false;
654 /* Subroutine of forward_propagate_subreg that handles a use of DEST
655 in REF. The other parameters are the same. */
657 static bool
658 forward_propagate_subreg (use_info *use, set_info *def,
659 rtx dest, rtx src, df_ref ref)
661 scalar_int_mode int_use_mode, src_mode;
663 /* Only consider subregs... */
664 rtx use_reg = DF_REF_REG (ref);
665 machine_mode use_mode = GET_MODE (use_reg);
666 if (GET_CODE (use_reg) != SUBREG
667 || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
668 return false;
670 /* ??? Replacing throughout the pattern would help for match_dups. */
671 rtx *loc = DF_REF_LOC (ref);
672 if (paradoxical_subreg_p (use_reg))
674 /* If this is a paradoxical SUBREG, we have no idea what value the
675 extra bits would have. However, if the operand is equivalent to
676 a SUBREG whose operand is the same as our mode, and all the modes
677 are within a word, we can just use the inner operand because
678 these SUBREGs just say how to treat the register. */
679 if (GET_CODE (src) == SUBREG
680 && REG_P (SUBREG_REG (src))
681 && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
682 && GET_MODE (SUBREG_REG (src)) == use_mode
683 && subreg_lowpart_p (src))
684 return try_fwprop_subst (use, def, loc, use_reg, SUBREG_REG (src));
687 /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
688 is the low part of the reg being extended then just use the inner
689 operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
690 be removed due to it matching a LOAD_EXTEND_OP load from memory,
691 or due to the operation being a no-op when applied to registers.
692 For example, if we have:
694 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
695 B: (... (subreg:SI (reg:DI X)) ...)
697 and mode_rep_extended says that Y is already sign-extended,
698 the backend will typically allow A to be combined with the
699 definition of Y or, failing that, allow A to be deleted after
700 reload through register tying. Introducing more uses of Y
701 prevents both optimisations. */
702 else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
703 && subreg_lowpart_p (use_reg))
705 if ((GET_CODE (src) == ZERO_EXTEND
706 || GET_CODE (src) == SIGN_EXTEND)
707 && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
708 && REG_P (XEXP (src, 0))
709 && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
710 && GET_MODE (XEXP (src, 0)) == use_mode
711 && !free_load_extend (src, def->insn ())
712 && (targetm.mode_rep_extended (int_use_mode, src_mode)
713 != (int) GET_CODE (src)))
714 return try_fwprop_subst (use, def, loc, use_reg, XEXP (src, 0));
717 return false;
720 /* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify
721 the result, handling cases where DEST is used in a subreg and where
722 applying that subreg to SRC results in a useful simplification. */
724 static bool
725 forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src)
727 if (!use->includes_subregs () || !REG_P (dest))
728 return false;
730 if (GET_CODE (src) != SUBREG
731 && GET_CODE (src) != ZERO_EXTEND
732 && GET_CODE (src) != SIGN_EXTEND)
733 return false;
735 rtx_insn *use_rtl = use->insn ()->rtl ();
736 df_ref ref;
738 FOR_EACH_INSN_USE (ref, use_rtl)
739 if (DF_REF_REGNO (ref) == use->regno ()
740 && forward_propagate_subreg (use, def, dest, src, ref))
741 return true;
743 FOR_EACH_INSN_EQ_USE (ref, use_rtl)
744 if (DF_REF_REGNO (ref) == use->regno ()
745 && forward_propagate_subreg (use, def, dest, src, ref))
746 return true;
748 return false;
751 /* Try to substitute (set DEST SRC), which defines DEF, into USE and
752 simplify the result. */
754 static bool
755 forward_propagate_and_simplify (use_info *use, set_info *def,
756 rtx dest, rtx src)
758 insn_info *use_insn = use->insn ();
759 rtx_insn *use_rtl = use_insn->rtl ();
760 insn_info *def_insn = def->insn ();
762 /* ??? This check seems unnecessary. We should be able to propagate
763 into any kind of instruction, regardless of whether it's a single set.
764 It seems odd to be more permissive with asms than normal instructions. */
765 bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
766 rtx use_set = single_set (use_rtl);
767 if (need_single_set && !use_set)
768 return false;
770 /* Do not propagate into PC etc.
772 ??? This too seems unnecessary. The current code should work correctly
773 without it, including cases where jumps become unconditional. */
774 if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
775 return false;
777 /* In __asm don't replace if src might need more registers than
778 reg, as that could increase register pressure on the __asm. */
779 if (use_insn->is_asm () && def_insn->uses ().size () > 1)
780 return false;
782 /* Check if the def is loading something from the constant pool; in this
783 case we would undo optimization such as compress_float_constant.
784 Still, we can set a REG_EQUAL note. */
785 if (MEM_P (src) && MEM_READONLY_P (src))
787 rtx x = avoid_constant_pool_reference (src);
788 rtx note_set;
789 if (x != src
790 && (note_set = set_for_reg_notes (use_rtl))
791 && REG_P (SET_DEST (note_set))
792 && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
794 rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
795 rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
796 rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
797 if (old_rtx != new_rtx)
798 set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
800 return false;
803 /* ??? Unconditionally propagating into PATTERN would work better
804 for instructions that have match_dups. */
805 rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl);
806 return try_fwprop_subst (use, def, loc, dest, src);
809 /* Given a use USE of an insn, if it has a single reaching
810 definition, try to forward propagate it into that insn.
811 Return true if something changed.
813 REG_PROP_ONLY is true if we should only propagate register copies. */
815 static bool
816 forward_propagate_into (use_info *use, bool reg_prop_only = false)
818 if (use->includes_read_writes ())
819 return false;
821 /* Disregard uninitialized uses. */
822 set_info *def = use->def ();
823 if (!def)
824 return false;
826 /* Only consider single-register definitions. This could be relaxed,
827 but it should rarely be needed before RA. */
828 def = look_through_degenerate_phi (def);
829 if (def->includes_multiregs ())
830 return false;
832 /* Only consider uses whose definition comes from a real instruction. */
833 insn_info *def_insn = def->insn ();
834 if (def_insn->is_artificial ())
835 return false;
837 rtx_insn *def_rtl = def_insn->rtl ();
838 if (!NONJUMP_INSN_P (def_rtl))
839 return false;
840 /* ??? This seems an unnecessary restriction. We can easily tell
841 which set the definition comes from. */
842 if (multiple_sets (def_rtl))
843 return false;
844 rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ());
845 if (!def_set)
846 return false;
848 rtx dest = SET_DEST (def_set);
849 rtx src = SET_SRC (def_set);
850 if (volatile_refs_p (src))
851 return false;
853 /* Allow propagations into a loop only for reg-to-reg copies, since
854 replacing one register by another shouldn't increase the cost.
855 Propagations from inner loop to outer loop should also be ok. */
856 struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
857 struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
858 if ((reg_prop_only
859 || (def_loop != use_loop
860 && !flow_loop_nested_p (use_loop, def_loop)))
861 && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
862 return false;
864 /* Don't substitute into a non-local goto, this confuses CFG. */
865 insn_info *use_insn = use->insn ();
866 rtx_insn *use_rtl = use_insn->rtl ();
867 if (JUMP_P (use_rtl)
868 && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
869 return false;
871 if (forward_propagate_and_simplify (use, def, dest, src)
872 || forward_propagate_subreg (use, def, dest, src))
873 return true;
875 return false;
878 static void
879 fwprop_init (void)
881 num_changes = 0;
882 calculate_dominance_info (CDI_DOMINATORS);
884 /* We do not always want to propagate into loops, so we have to find
885 loops and be careful about them. Avoid CFG modifications so that
886 we don't have to update dominance information afterwards for
887 build_single_def_use_links. */
888 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
890 df_analyze ();
891 crtl->ssa = new rtl_ssa::function_info (cfun);
894 static void
895 fwprop_done (void)
897 loop_optimizer_finalize ();
899 crtl->ssa->perform_pending_updates ();
900 free_dominance_info (CDI_DOMINATORS);
901 cleanup_cfg (0);
903 delete crtl->ssa;
904 crtl->ssa = nullptr;
906 delete_trivially_dead_insns (get_insns (), max_reg_num ());
908 if (dump_file)
909 fprintf (dump_file,
910 "\nNumber of successful forward propagations: %d\n\n",
911 num_changes);
914 /* Try to optimize INSN, returning true if something changes.
915 FWPROP_ADDR_P is true if we are running fwprop_addr rather than
916 the full fwprop. */
918 static bool
919 fwprop_insn (insn_info *insn, bool fwprop_addr_p)
921 for (use_info *use : insn->uses ())
923 if (use->is_mem ())
924 continue;
925 /* ??? The choices here follow those in the pre-SSA code. */
926 if (!use->includes_address_uses ())
928 if (forward_propagate_into (use, fwprop_addr_p))
929 return true;
931 else
933 struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
934 /* The outermost loop is not really a loop. */
935 if (loop == NULL || loop_outer (loop) == NULL)
937 if (forward_propagate_into (use, fwprop_addr_p))
938 return true;
940 else if (fwprop_addr_p)
942 if (forward_propagate_into (use, false))
943 return true;
947 return false;
950 /* Main entry point. */
952 static bool
953 gate_fwprop (void)
955 return optimize > 0 && flag_forward_propagate;
958 static unsigned int
959 fwprop (bool fwprop_addr_p)
961 fwprop_init ();
963 /* Go through all the instructions (including debug instructions) looking
964 for uses that we could propagate into.
966 Do not forward propagate addresses into loops until after unrolling.
967 CSE did so because it was able to fix its own mess, but we are not. */
969 insn_info *next;
971 /* ??? This code uses a worklist in order to preserve the behavior
972 of the pre-SSA implementation. It would be better to instead
973 iterate on each instruction until no more propagations are
974 possible, then move on to the next. */
975 auto_vec<insn_info *> worklist;
976 for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
978 next = insn->next_any_insn ();
979 if (insn->can_be_optimized () || insn->is_debug_insn ())
980 if (fwprop_insn (insn, fwprop_addr_p))
981 worklist.safe_push (insn);
983 for (unsigned int i = 0; i < worklist.length (); ++i)
985 insn_info *insn = worklist[i];
986 if (fwprop_insn (insn, fwprop_addr_p))
987 worklist.safe_push (insn);
990 fwprop_done ();
991 return 0;
994 namespace {
996 const pass_data pass_data_rtl_fwprop =
998 RTL_PASS, /* type */
999 "fwprop1", /* name */
1000 OPTGROUP_NONE, /* optinfo_flags */
1001 TV_FWPROP, /* tv_id */
1002 0, /* properties_required */
1003 0, /* properties_provided */
1004 0, /* properties_destroyed */
1005 0, /* todo_flags_start */
1006 TODO_df_finish, /* todo_flags_finish */
1009 class pass_rtl_fwprop : public rtl_opt_pass
1011 public:
1012 pass_rtl_fwprop (gcc::context *ctxt)
1013 : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1016 /* opt_pass methods: */
1017 bool gate (function *) final override { return gate_fwprop (); }
1018 unsigned int execute (function *) final override { return fwprop (false); }
1020 }; // class pass_rtl_fwprop
1022 } // anon namespace
1024 rtl_opt_pass *
1025 make_pass_rtl_fwprop (gcc::context *ctxt)
1027 return new pass_rtl_fwprop (ctxt);
1030 namespace {
1032 const pass_data pass_data_rtl_fwprop_addr =
1034 RTL_PASS, /* type */
1035 "fwprop2", /* name */
1036 OPTGROUP_NONE, /* optinfo_flags */
1037 TV_FWPROP, /* tv_id */
1038 0, /* properties_required */
1039 0, /* properties_provided */
1040 0, /* properties_destroyed */
1041 0, /* todo_flags_start */
1042 TODO_df_finish, /* todo_flags_finish */
1045 class pass_rtl_fwprop_addr : public rtl_opt_pass
1047 public:
1048 pass_rtl_fwprop_addr (gcc::context *ctxt)
1049 : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1052 /* opt_pass methods: */
1053 bool gate (function *) final override { return gate_fwprop (); }
1054 unsigned int execute (function *) final override { return fwprop (true); }
1056 }; // class pass_rtl_fwprop_addr
1058 } // anon namespace
1060 rtl_opt_pass *
1061 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1063 return new pass_rtl_fwprop_addr (ctxt);