hppa: Fix bug in atomic_storedi_1 pattern
[official-gcc.git] / gcc / fwprop.cc
blob7872609b336a9939035193e8bb334a664796a004
1 /* RTL-based forward propagation pass for GNU compiler.
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 Contributed by Paolo Bonzini and Steven Bosscher.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define INCLUDE_ALGORITHM
22 #define INCLUDE_FUNCTIONAL
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "rtlanal.h"
29 #include "df.h"
30 #include "rtl-ssa.h"
32 #include "predict.h"
33 #include "cfgrtl.h"
34 #include "cfgcleanup.h"
35 #include "cfgloop.h"
36 #include "tree-pass.h"
37 #include "rtl-iter.h"
38 #include "target.h"
40 /* This pass does simple forward propagation and simplification when an
41 operand of an insn can only come from a single def. This pass uses
42 RTL SSA, so it is global. However, we only do limited analysis of
43 available expressions.
45 1) The pass tries to propagate the source of the def into the use,
46 and checks if the result is independent of the substituted value.
47 For example, the high word of a (zero_extend:DI (reg:SI M)) is always
48 zero, independent of the source register.
50 In particular, we propagate constants into the use site. Sometimes
51 RTL expansion did not put the constant in the same insn on purpose,
52 to satisfy a predicate, and the result will fail to be recognized;
53 but this happens rarely and in this case we can still create a
54 REG_EQUAL note. For multi-word operations, this
56 (set (subreg:SI (reg:DI 120) 0) (const_int 0))
57 (set (subreg:SI (reg:DI 120) 4) (const_int -1))
58 (set (subreg:SI (reg:DI 122) 0)
59 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
60 (set (subreg:SI (reg:DI 122) 4)
61 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
63 can be simplified to the much simpler
65 (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
66 (set (subreg:SI (reg:DI 122) 4) (const_int -1))
68 This particular propagation is also effective at putting together
69 complex addressing modes. We are more aggressive inside MEMs, in
70 that all definitions are propagated if the use is in a MEM; if the
71 result is a valid memory address we check address_cost to decide
72 whether the substitution is worthwhile.
74 2) The pass propagates register copies. This is not as effective as
75 the copy propagation done by CSE's canon_reg, which works by walking
76 the instruction chain, it can help the other transformations.
78 We should consider removing this optimization, and instead reorder the
79 RTL passes, because GCSE does this transformation too. With some luck,
80 the CSE pass at the end of rest_of_handle_gcse could also go away.
82 3) The pass looks for paradoxical subregs that are actually unnecessary.
83 Things like this:
85 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
86 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
87 (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
88 (subreg:SI (reg:QI 121) 0)))
90 are very common on machines that can only do word-sized operations.
91 For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
92 if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
93 we can replace the paradoxical subreg with simply (reg:WIDE M). The
94 above will simplify this to
96 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
97 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
98 (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
100 where the first two insns are now dead. */
102 using namespace rtl_ssa;
104 static int num_changes;
106 /* Do not try to replace constant addresses or addresses of local and
107 argument slots. These MEM expressions are made only once and inserted
108 in many instructions, as well as being used to control symbol table
109 output. It is not safe to clobber them.
111 There are some uncommon cases where the address is already in a register
112 for some reason, but we cannot take advantage of that because we have
113 no easy way to unshare the MEM. In addition, looking up all stack
114 addresses is costly. */
116 static bool
117 can_simplify_addr (rtx addr)
119 rtx reg;
121 if (CONSTANT_ADDRESS_P (addr))
122 return false;
124 if (GET_CODE (addr) == PLUS)
125 reg = XEXP (addr, 0);
126 else
127 reg = addr;
129 return (!REG_P (reg)
130 || (REGNO (reg) != FRAME_POINTER_REGNUM
131 && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
132 && REGNO (reg) != ARG_POINTER_REGNUM));
135 /* MEM is the result of an address simplification, and temporarily
136 undoing changes OLD_NUM_CHANGES onwards restores the original address.
137 Return whether it is good to use the new address instead of the
138 old one. INSN is the containing instruction. */
140 static bool
141 should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
143 int gain;
145 /* Prefer the new address if it is less expensive. */
146 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
147 temporarily_undo_changes (old_num_changes);
148 gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
149 MEM_ADDR_SPACE (mem), speed);
150 redo_changes (old_num_changes);
151 gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
152 MEM_ADDR_SPACE (mem), speed);
154 /* If the addresses have equivalent cost, prefer the new address
155 if it has the highest `set_src_cost'. That has the potential of
156 eliminating the most insns without additional costs, and it
157 is the same that cse.cc used to do. */
158 if (gain == 0)
160 gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
161 temporarily_undo_changes (old_num_changes);
162 gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
163 redo_changes (old_num_changes);
166 return (gain > 0);
170 namespace
172 class fwprop_propagation : public insn_propagation
174 public:
175 static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
176 static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
177 static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
179 fwprop_propagation (insn_info *, set_info *, rtx, rtx);
181 bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
182 bool folded_to_constants_p () const;
183 bool likely_profitable_p () const;
185 bool check_mem (int, rtx) final override;
186 void note_simplification (int, uint16_t, rtx, rtx) final override;
187 uint16_t classify_result (rtx, rtx);
189 private:
190 const bool single_use_p;
191 const bool single_ebb_p;
195 /* Prepare to replace FROM with TO in USE_INSN. */
197 fwprop_propagation::fwprop_propagation (insn_info *use_insn,
198 set_info *def, rtx from, rtx to)
199 : insn_propagation (use_insn->rtl (), from, to),
200 single_use_p (def->single_nondebug_use ()),
201 single_ebb_p (use_insn->ebb () == def->ebb ())
203 should_check_mems = true;
204 should_note_simplifications = true;
207 /* MEM is the result of an address simplification, and temporarily
208 undoing changes OLD_NUM_CHANGES onwards restores the original address.
209 Return true if the propagation should continue, false if it has failed. */
211 bool
212 fwprop_propagation::check_mem (int old_num_changes, rtx mem)
214 if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
215 MEM_ADDR_SPACE (mem)))
217 failure_reason = "would create an invalid MEM";
218 return false;
221 temporarily_undo_changes (old_num_changes);
222 bool can_simplify = can_simplify_addr (XEXP (mem, 0));
223 redo_changes (old_num_changes);
224 if (!can_simplify)
226 failure_reason = "would replace a frame address";
227 return false;
230 /* Copy propagations are always ok. Otherwise check the costs. */
231 if (!(REG_P (from) && REG_P (to))
232 && !should_replace_address (old_num_changes, mem, insn))
234 failure_reason = "would increase the cost of a MEM";
235 return false;
238 result_flags |= CHANGED_MEM;
239 return true;
242 /* OLDX has been simplified to NEWX. Describe the change in terms of
243 result_flags. */
245 uint16_t
246 fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
248 if (CONSTANT_P (new_rtx))
250 /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
251 and NEW_RTX is likely not a legitimate address. We want it to
252 disappear if it is invalid.
254 ??? Using the mode of the LO_SUM as the mode of the address
255 seems odd, but it was what the pre-SSA code did. */
256 if (GET_CODE (old_rtx) == LO_SUM
257 && !memory_address_p (GET_MODE (old_rtx), new_rtx))
258 return CONSTANT;
259 return CONSTANT | PROFITABLE;
262 /* Allow replacements that simplify operations on a vector or complex
263 value to a component. The most prominent case is
264 (subreg ([vec_]concat ...)). */
265 if (REG_P (new_rtx)
266 && !HARD_REGISTER_P (new_rtx)
267 && (VECTOR_MODE_P (GET_MODE (from))
268 || COMPLEX_MODE_P (GET_MODE (from)))
269 && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
270 return PROFITABLE;
272 /* Allow (subreg (mem)) -> (mem) simplifications with the following
273 exceptions:
274 1) Propagating (mem)s into multiple uses is not profitable.
275 2) Propagating (mem)s across EBBs may not be profitable if the source EBB
276 runs less frequently.
277 3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
278 4) Creating new (mem/v)s is not correct, since DCE will not remove the old
279 ones. */
280 if (single_use_p
281 && single_ebb_p
282 && SUBREG_P (old_rtx)
283 && !paradoxical_subreg_p (old_rtx)
284 && MEM_P (new_rtx)
285 && !MEM_VOLATILE_P (new_rtx))
286 return PROFITABLE;
288 return 0;
291 /* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES
292 is the number of unrelated changes that had been made before processing
293 OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags
294 had at that point. */
296 void
297 fwprop_propagation::note_simplification (int old_num_changes,
298 uint16_t old_result_flags,
299 rtx old_rtx, rtx new_rtx)
301 result_flags &= ~(CONSTANT | PROFITABLE);
302 uint16_t new_flags = classify_result (old_rtx, new_rtx);
303 if (old_num_changes)
304 new_flags &= old_result_flags;
305 result_flags |= new_flags;
308 /* Return true if all substitutions eventually folded to constants. */
310 bool
311 fwprop_propagation::folded_to_constants_p () const
313 /* If we're propagating a HIGH, require it to be folded with a
314 partnering LO_SUM. For example, a REG_EQUAL note with a register
315 replaced by an unfolded HIGH is not useful. */
316 if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
317 return true;
318 return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
322 /* Return true if it is worth keeping the result of the propagation,
323 false if it would increase the complexity of the pattern too much. */
325 bool
326 fwprop_propagation::likely_profitable_p () const
328 if (changed_mem_p ())
329 return true;
331 if (!(result_flags & UNSIMPLIFIED)
332 && (result_flags & PROFITABLE))
333 return true;
335 if (REG_P (to))
336 return true;
338 if (GET_CODE (to) == SUBREG
339 && REG_P (SUBREG_REG (to))
340 && !paradoxical_subreg_p (to))
341 return true;
343 if (CONSTANT_P (to))
344 return true;
346 return false;
349 /* Check that X has a single def. */
351 static bool
352 reg_single_def_p (rtx x)
354 return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
357 /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
358 USE_INSN. Return the number of substitutions on success, otherwise return
359 -1 and leave USE_INSN unchanged.
361 If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC
362 to fold to a constant, so that the note does not use any more registers
363 than it did previously. If REQUIRE_CONSTANT is false, also allow the
364 substitution if it's something we'd normally allow for the main
365 instruction pattern. */
367 static int
368 try_fwprop_subst_note (insn_info *use_insn, set_info *def,
369 rtx note, rtx dest, rtx src, bool require_constant)
371 rtx_insn *use_rtl = use_insn->rtl ();
372 insn_info *def_insn = def->insn ();
374 insn_change_watermark watermark;
375 fwprop_propagation prop (use_insn, def, dest, src);
376 if (!prop.apply_to_rvalue (&XEXP (note, 0)))
378 if (dump_file && (dump_flags & TDF_DETAILS))
379 fprintf (dump_file, "cannot propagate from insn %d into"
380 " notes of insn %d: %s\n", def_insn->uid (),
381 use_insn->uid (), prop.failure_reason);
382 return -1;
385 if (prop.num_replacements == 0)
386 return 0;
388 if (require_constant)
390 if (!prop.folded_to_constants_p ())
392 if (dump_file && (dump_flags & TDF_DETAILS))
393 fprintf (dump_file, "cannot propagate from insn %d into"
394 " notes of insn %d: %s\n", def_insn->uid (),
395 use_insn->uid (), "wouldn't fold to constants");
396 return -1;
399 else
401 if (!prop.folded_to_constants_p () && !prop.likely_profitable_p ())
403 if (dump_file && (dump_flags & TDF_DETAILS))
404 fprintf (dump_file, "cannot propagate from insn %d into"
405 " notes of insn %d: %s\n", def_insn->uid (),
406 use_insn->uid (), "would increase complexity of node");
407 return -1;
411 if (dump_file && (dump_flags & TDF_DETAILS))
413 fprintf (dump_file, "\nin notes of insn %d, replacing:\n ",
414 INSN_UID (use_rtl));
415 temporarily_undo_changes (0);
416 print_inline_rtx (dump_file, note, 2);
417 redo_changes (0);
418 fprintf (dump_file, "\n with:\n ");
419 print_inline_rtx (dump_file, note, 2);
420 fprintf (dump_file, "\n");
422 watermark.keep ();
423 return prop.num_replacements;
426 /* Try to substitute (set DEST SRC), which defines DEF, into location LOC of
427 USE_INSN's pattern. Return true on success, otherwise leave USE_INSN
428 unchanged. */
430 static bool
431 try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
432 set_info *def, rtx *loc, rtx dest, rtx src)
434 insn_info *use_insn = use_change.insn ();
435 rtx_insn *use_rtl = use_insn->rtl ();
436 insn_info *def_insn = def->insn ();
438 insn_change_watermark watermark;
439 fwprop_propagation prop (use_insn, def, dest, src);
440 if (!prop.apply_to_pattern (loc))
442 if (dump_file && (dump_flags & TDF_DETAILS))
443 fprintf (dump_file, "cannot propagate from insn %d into"
444 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
445 prop.failure_reason);
446 return false;
449 if (prop.num_replacements == 0)
450 return false;
452 if (!prop.likely_profitable_p ()
453 && (prop.changed_mem_p ()
454 || use_insn->is_asm ()
455 || !single_set (use_rtl)))
457 if (dump_file && (dump_flags & TDF_DETAILS))
458 fprintf (dump_file, "cannot propagate from insn %d into"
459 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
460 "would increase complexity of pattern");
461 return false;
464 if (dump_file && (dump_flags & TDF_DETAILS))
466 fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
467 def_insn->uid (), use_insn->uid ());
468 temporarily_undo_changes (0);
469 print_rtl_single (dump_file, PATTERN (use_rtl));
470 redo_changes (0);
473 /* ??? In theory, it should be better to use insn costs rather than
474 set_src_costs here. That would involve replacing this code with
475 change_is_worthwhile. */
476 bool ok = recog (attempt, use_change);
477 if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
478 if (rtx use_set = single_set (use_rtl))
480 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl));
481 temporarily_undo_changes (0);
482 auto old_cost = set_src_cost (SET_SRC (use_set),
483 GET_MODE (SET_DEST (use_set)), speed);
484 redo_changes (0);
485 auto new_cost = set_src_cost (SET_SRC (use_set),
486 GET_MODE (SET_DEST (use_set)), speed);
487 if (new_cost > old_cost
488 || (new_cost == old_cost && !prop.likely_profitable_p ()))
490 if (dump_file)
491 fprintf (dump_file, "change not profitable"
492 " (cost %d -> cost %d)\n", old_cost, new_cost);
493 ok = false;
497 if (!ok)
499 /* The pattern didn't match, but if all uses of SRC folded to
500 constants, we can add a REG_EQUAL note for the result, if there
501 isn't one already. */
502 if (!prop.folded_to_constants_p ())
503 return false;
505 /* Test this first to avoid creating an unnecessary copy of SRC. */
506 if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
507 return false;
509 rtx set = set_for_reg_notes (use_rtl);
510 if (!set || !REG_P (SET_DEST (set)))
511 return false;
513 rtx value = copy_rtx (SET_SRC (set));
514 cancel_changes (0);
516 /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
517 because the bits in there can be anything and so might not
518 match the REG_EQUAL note content. See PR70574. */
519 if (contains_paradoxical_subreg_p (SET_SRC (set)))
520 return false;
522 if (dump_file && (dump_flags & TDF_DETAILS))
523 fprintf (dump_file, " Setting REG_EQUAL note\n");
525 return set_unique_reg_note (use_rtl, REG_EQUAL, value);
528 rtx *note_ptr = &REG_NOTES (use_rtl);
529 while (rtx note = *note_ptr)
531 if ((REG_NOTE_KIND (note) == REG_EQUAL
532 || REG_NOTE_KIND (note) == REG_EQUIV)
533 && try_fwprop_subst_note (use_insn, def, note, dest, src, false) < 0)
535 *note_ptr = XEXP (note, 1);
536 free_EXPR_LIST_node (note);
538 else
539 note_ptr = &XEXP (note, 1);
542 confirm_change_group ();
543 crtl->ssa->change_insn (use_change);
544 num_changes++;
545 return true;
548 /* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes,
549 given that it was not possible to do this for USE_INSN's main pattern.
550 Return true on success, otherwise leave USE_INSN unchanged. */
552 static bool
553 try_fwprop_subst_notes (insn_info *use_insn, set_info *def,
554 rtx dest, rtx src)
556 rtx_insn *use_rtl = use_insn->rtl ();
557 for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
558 if ((REG_NOTE_KIND (note) == REG_EQUAL
559 || REG_NOTE_KIND (note) == REG_EQUIV)
560 && try_fwprop_subst_note (use_insn, def, note, dest, src, true) > 0)
562 confirm_change_group ();
563 return true;
566 return false;
569 /* Check whether we could validly substitute (set DEST SRC), which defines DEF,
570 into USE. If so, first try performing the substitution in location LOC
571 of USE->insn ()'s pattern. If that fails, try instead to substitute
572 into the notes.
574 Return true on success, otherwise leave USE_INSN unchanged. */
576 static bool
577 try_fwprop_subst (use_info *use, set_info *def,
578 rtx *loc, rtx dest, rtx src)
580 insn_info *use_insn = use->insn ();
581 insn_info *def_insn = def->insn ();
583 auto attempt = crtl->ssa->new_change_attempt ();
584 use_array src_uses = remove_note_accesses (attempt, def_insn->uses ());
586 /* ??? Not really a meaningful test: it means we can propagate arithmetic
587 involving hard registers but not bare references to them. A better
588 test would be to iterate over src_uses looking for hard registers
589 that are not fixed. */
590 if (REG_P (src) && HARD_REGISTER_P (src))
591 return false;
593 /* ??? It would be better to make this EBB-based instead. That would
594 involve checking for equal EBBs rather than equal BBs and trying
595 to make the uses available at use_insn->ebb ()->first_bb (). */
596 if (def_insn->bb () != use_insn->bb ())
598 src_uses = crtl->ssa->make_uses_available (attempt, src_uses,
599 use_insn->bb (),
600 use_insn->is_debug_insn ());
601 if (!src_uses.is_valid ())
602 return false;
605 insn_change use_change (use_insn);
606 use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses,
607 src_uses);
608 if (!use_change.new_uses.is_valid ())
609 return false;
611 /* ??? We could allow movement within the EBB by adding:
613 use_change.move_range = use_insn->ebb ()->insn_range (); */
614 if (!restrict_movement (use_change))
615 return false;
617 return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src)
618 || try_fwprop_subst_notes (use_insn, def, dest, src));
621 /* For the given single_set INSN, containing SRC known to be a
622 ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
623 is redundant due to the register being set by a LOAD_EXTEND_OP
624 load from memory. */
626 static bool
627 free_load_extend (rtx src, insn_info *insn)
629 rtx reg = XEXP (src, 0);
630 if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
631 return false;
633 def_info *def = nullptr;
634 for (use_info *use : insn->uses ())
635 if (use->regno () == REGNO (reg))
637 def = use->def ();
638 break;
641 if (!def)
642 return false;
644 insn_info *def_insn = def->insn ();
645 if (def_insn->is_artificial ())
646 return false;
648 rtx_insn *def_rtl = def_insn->rtl ();
649 if (NONJUMP_INSN_P (def_rtl))
651 rtx patt = PATTERN (def_rtl);
653 if (GET_CODE (patt) == SET
654 && GET_CODE (SET_SRC (patt)) == MEM
655 && rtx_equal_p (SET_DEST (patt), reg))
656 return true;
658 return false;
661 /* Subroutine of forward_propagate_subreg that handles a use of DEST
662 in REF. The other parameters are the same. */
664 static bool
665 forward_propagate_subreg (use_info *use, set_info *def,
666 rtx dest, rtx src, df_ref ref)
668 scalar_int_mode int_use_mode, src_mode;
670 /* Only consider subregs... */
671 rtx use_reg = DF_REF_REG (ref);
672 machine_mode use_mode = GET_MODE (use_reg);
673 if (GET_CODE (use_reg) != SUBREG
674 || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
675 return false;
677 /* ??? Replacing throughout the pattern would help for match_dups. */
678 rtx *loc = DF_REF_LOC (ref);
679 if (paradoxical_subreg_p (use_reg))
681 /* If this is a paradoxical SUBREG, we have no idea what value the
682 extra bits would have. However, if the operand is equivalent to
683 a SUBREG whose operand is the same as our mode, and all the modes
684 are within a word, we can just use the inner operand because
685 these SUBREGs just say how to treat the register. */
686 if (GET_CODE (src) == SUBREG
687 && REG_P (SUBREG_REG (src))
688 && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
689 && GET_MODE (SUBREG_REG (src)) == use_mode
690 && subreg_lowpart_p (src))
691 return try_fwprop_subst (use, def, loc, use_reg, SUBREG_REG (src));
694 /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
695 is the low part of the reg being extended then just use the inner
696 operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
697 be removed due to it matching a LOAD_EXTEND_OP load from memory,
698 or due to the operation being a no-op when applied to registers.
699 For example, if we have:
701 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
702 B: (... (subreg:SI (reg:DI X)) ...)
704 and mode_rep_extended says that Y is already sign-extended,
705 the backend will typically allow A to be combined with the
706 definition of Y or, failing that, allow A to be deleted after
707 reload through register tying. Introducing more uses of Y
708 prevents both optimisations. */
709 else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
710 && subreg_lowpart_p (use_reg))
712 if ((GET_CODE (src) == ZERO_EXTEND
713 || GET_CODE (src) == SIGN_EXTEND)
714 && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
715 && REG_P (XEXP (src, 0))
716 && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
717 && GET_MODE (XEXP (src, 0)) == use_mode
718 && !free_load_extend (src, def->insn ())
719 && (targetm.mode_rep_extended (int_use_mode, src_mode)
720 != (int) GET_CODE (src)))
721 return try_fwprop_subst (use, def, loc, use_reg, XEXP (src, 0));
724 return false;
727 /* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify
728 the result, handling cases where DEST is used in a subreg and where
729 applying that subreg to SRC results in a useful simplification. */
731 static bool
732 forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src)
734 if (!use->includes_subregs () || !REG_P (dest))
735 return false;
737 if (GET_CODE (src) != SUBREG
738 && GET_CODE (src) != ZERO_EXTEND
739 && GET_CODE (src) != SIGN_EXTEND)
740 return false;
742 rtx_insn *use_rtl = use->insn ()->rtl ();
743 df_ref ref;
745 FOR_EACH_INSN_USE (ref, use_rtl)
746 if (DF_REF_REGNO (ref) == use->regno ()
747 && forward_propagate_subreg (use, def, dest, src, ref))
748 return true;
750 FOR_EACH_INSN_EQ_USE (ref, use_rtl)
751 if (DF_REF_REGNO (ref) == use->regno ()
752 && forward_propagate_subreg (use, def, dest, src, ref))
753 return true;
755 return false;
758 /* Try to substitute (set DEST SRC), which defines DEF, into USE and
759 simplify the result. */
761 static bool
762 forward_propagate_and_simplify (use_info *use, set_info *def,
763 rtx dest, rtx src)
765 insn_info *use_insn = use->insn ();
766 rtx_insn *use_rtl = use_insn->rtl ();
767 insn_info *def_insn = def->insn ();
769 /* ??? This check seems unnecessary. We should be able to propagate
770 into any kind of instruction, regardless of whether it's a single set.
771 It seems odd to be more permissive with asms than normal instructions. */
772 bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
773 rtx use_set = single_set (use_rtl);
774 if (need_single_set && !use_set)
775 return false;
777 /* Do not propagate into PC etc.
779 ??? This too seems unnecessary. The current code should work correctly
780 without it, including cases where jumps become unconditional. */
781 if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
782 return false;
784 /* In __asm don't replace if src might need more registers than
785 reg, as that could increase register pressure on the __asm. */
786 if (use_insn->is_asm () && def_insn->uses ().size () > 1)
787 return false;
789 /* Check if the def is loading something from the constant pool; in this
790 case we would undo optimization such as compress_float_constant.
791 Still, we can set a REG_EQUAL note. */
792 if (MEM_P (src) && MEM_READONLY_P (src))
794 rtx x = avoid_constant_pool_reference (src);
795 rtx note_set;
796 if (x != src
797 && (note_set = set_for_reg_notes (use_rtl))
798 && REG_P (SET_DEST (note_set))
799 && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
801 rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
802 rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
803 rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
804 if (old_rtx != new_rtx)
805 set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
807 return false;
810 /* ??? Unconditionally propagating into PATTERN would work better
811 for instructions that have match_dups. */
812 rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl);
813 return try_fwprop_subst (use, def, loc, dest, src);
816 /* Given a use USE of an insn, if it has a single reaching
817 definition, try to forward propagate it into that insn.
818 Return true if something changed.
820 REG_PROP_ONLY is true if we should only propagate register copies. */
822 static bool
823 forward_propagate_into (use_info *use, bool reg_prop_only = false)
825 if (use->includes_read_writes ())
826 return false;
828 /* Disregard uninitialized uses. */
829 set_info *def = use->def ();
830 if (!def)
831 return false;
833 /* Only consider single-register definitions. This could be relaxed,
834 but it should rarely be needed before RA. */
835 def = look_through_degenerate_phi (def);
836 if (def->includes_multiregs ())
837 return false;
839 /* Only consider uses whose definition comes from a real instruction. */
840 insn_info *def_insn = def->insn ();
841 if (def_insn->is_artificial ())
842 return false;
844 rtx_insn *def_rtl = def_insn->rtl ();
845 if (!NONJUMP_INSN_P (def_rtl))
846 return false;
847 /* ??? This seems an unnecessary restriction. We can easily tell
848 which set the definition comes from. */
849 if (multiple_sets (def_rtl))
850 return false;
851 rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ());
852 if (!def_set)
853 return false;
855 rtx dest = SET_DEST (def_set);
856 rtx src = SET_SRC (def_set);
858 /* Allow propagations into a loop only for reg-to-reg copies, since
859 replacing one register by another shouldn't increase the cost.
860 Propagations from inner loop to outer loop should also be ok. */
861 struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
862 struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
863 if ((reg_prop_only
864 || (def_loop != use_loop
865 && !flow_loop_nested_p (use_loop, def_loop)))
866 && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
867 return false;
869 /* Don't substitute into a non-local goto, this confuses CFG. */
870 insn_info *use_insn = use->insn ();
871 rtx_insn *use_rtl = use_insn->rtl ();
872 if (JUMP_P (use_rtl)
873 && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
874 return false;
876 if (forward_propagate_and_simplify (use, def, dest, src)
877 || forward_propagate_subreg (use, def, dest, src))
878 return true;
880 return false;
883 static void
884 fwprop_init (void)
886 num_changes = 0;
887 calculate_dominance_info (CDI_DOMINATORS);
889 /* We do not always want to propagate into loops, so we have to find
890 loops and be careful about them. Avoid CFG modifications so that
891 we don't have to update dominance information afterwards for
892 build_single_def_use_links. */
893 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
895 df_analyze ();
896 crtl->ssa = new rtl_ssa::function_info (cfun);
899 static void
900 fwprop_done (void)
902 loop_optimizer_finalize ();
904 crtl->ssa->perform_pending_updates ();
905 free_dominance_info (CDI_DOMINATORS);
906 cleanup_cfg (0);
908 delete crtl->ssa;
909 crtl->ssa = nullptr;
911 delete_trivially_dead_insns (get_insns (), max_reg_num ());
913 if (dump_file)
914 fprintf (dump_file,
915 "\nNumber of successful forward propagations: %d\n\n",
916 num_changes);
919 /* Try to optimize INSN, returning true if something changes.
920 FWPROP_ADDR_P is true if we are running fwprop_addr rather than
921 the full fwprop. */
923 static bool
924 fwprop_insn (insn_info *insn, bool fwprop_addr_p)
926 for (use_info *use : insn->uses ())
928 if (use->is_mem ())
929 continue;
930 /* ??? The choices here follow those in the pre-SSA code. */
931 if (!use->includes_address_uses ())
933 if (forward_propagate_into (use, fwprop_addr_p))
934 return true;
936 else
938 struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
939 /* The outermost loop is not really a loop. */
940 if (loop == NULL || loop_outer (loop) == NULL)
942 if (forward_propagate_into (use, fwprop_addr_p))
943 return true;
945 else if (fwprop_addr_p)
947 if (forward_propagate_into (use, false))
948 return true;
952 return false;
955 /* Main entry point. */
957 static bool
958 gate_fwprop (void)
960 return optimize > 0 && flag_forward_propagate;
963 static unsigned int
964 fwprop (bool fwprop_addr_p)
966 fwprop_init ();
968 /* Go through all the instructions (including debug instructions) looking
969 for uses that we could propagate into.
971 Do not forward propagate addresses into loops until after unrolling.
972 CSE did so because it was able to fix its own mess, but we are not. */
974 insn_info *next;
976 /* ??? This code uses a worklist in order to preserve the behavior
977 of the pre-SSA implementation. It would be better to instead
978 iterate on each instruction until no more propagations are
979 possible, then move on to the next. */
980 auto_vec<insn_info *> worklist;
981 for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
983 next = insn->next_any_insn ();
984 if (insn->can_be_optimized () || insn->is_debug_insn ())
985 if (fwprop_insn (insn, fwprop_addr_p))
986 worklist.safe_push (insn);
988 for (unsigned int i = 0; i < worklist.length (); ++i)
990 insn_info *insn = worklist[i];
991 if (fwprop_insn (insn, fwprop_addr_p))
992 worklist.safe_push (insn);
995 fwprop_done ();
996 return 0;
999 namespace {
1001 const pass_data pass_data_rtl_fwprop =
1003 RTL_PASS, /* type */
1004 "fwprop1", /* name */
1005 OPTGROUP_NONE, /* optinfo_flags */
1006 TV_FWPROP, /* tv_id */
1007 0, /* properties_required */
1008 0, /* properties_provided */
1009 0, /* properties_destroyed */
1010 0, /* todo_flags_start */
1011 TODO_df_finish, /* todo_flags_finish */
1014 class pass_rtl_fwprop : public rtl_opt_pass
1016 public:
1017 pass_rtl_fwprop (gcc::context *ctxt)
1018 : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1021 /* opt_pass methods: */
1022 bool gate (function *) final override { return gate_fwprop (); }
1023 unsigned int execute (function *) final override { return fwprop (false); }
1025 }; // class pass_rtl_fwprop
1027 } // anon namespace
1029 rtl_opt_pass *
1030 make_pass_rtl_fwprop (gcc::context *ctxt)
1032 return new pass_rtl_fwprop (ctxt);
1035 namespace {
1037 const pass_data pass_data_rtl_fwprop_addr =
1039 RTL_PASS, /* type */
1040 "fwprop2", /* name */
1041 OPTGROUP_NONE, /* optinfo_flags */
1042 TV_FWPROP, /* tv_id */
1043 0, /* properties_required */
1044 0, /* properties_provided */
1045 0, /* properties_destroyed */
1046 0, /* todo_flags_start */
1047 TODO_df_finish, /* todo_flags_finish */
1050 class pass_rtl_fwprop_addr : public rtl_opt_pass
1052 public:
1053 pass_rtl_fwprop_addr (gcc::context *ctxt)
1054 : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1057 /* opt_pass methods: */
1058 bool gate (function *) final override { return gate_fwprop (); }
1059 unsigned int execute (function *) final override { return fwprop (true); }
1061 }; // class pass_rtl_fwprop_addr
1063 } // anon namespace
1065 rtl_opt_pass *
1066 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1068 return new pass_rtl_fwprop_addr (ctxt);