i386: Optimize pmovskb on zero_extend of subreg HI of pmovskb result [PR98461]
[official-gcc.git] / gcc / fwprop.c
blobeff8f7cc1411a2166ddb3d225684647c6346a72b
1 /* RTL-based forward propagation pass for GNU compiler.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 Contributed by Paolo Bonzini and Steven Bosscher.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define INCLUDE_ALGORITHM
22 #define INCLUDE_FUNCTIONAL
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "df.h"
29 #include "rtl-ssa.h"
31 #include "sparseset.h"
32 #include "predict.h"
33 #include "cfgrtl.h"
34 #include "cfgcleanup.h"
35 #include "cfgloop.h"
36 #include "tree-pass.h"
37 #include "rtl-iter.h"
38 #include "target.h"
40 /* This pass does simple forward propagation and simplification when an
41 operand of an insn can only come from a single def. This pass uses
42 RTL SSA, so it is global. However, we only do limited analysis of
43 available expressions.
45 1) The pass tries to propagate the source of the def into the use,
46 and checks if the result is independent of the substituted value.
47 For example, the high word of a (zero_extend:DI (reg:SI M)) is always
48 zero, independent of the source register.
50 In particular, we propagate constants into the use site. Sometimes
51 RTL expansion did not put the constant in the same insn on purpose,
52 to satisfy a predicate, and the result will fail to be recognized;
53 but this happens rarely and in this case we can still create a
54 REG_EQUAL note. For multi-word operations, this
56 (set (subreg:SI (reg:DI 120) 0) (const_int 0))
57 (set (subreg:SI (reg:DI 120) 4) (const_int -1))
58 (set (subreg:SI (reg:DI 122) 0)
59 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
60 (set (subreg:SI (reg:DI 122) 4)
61 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
63 can be simplified to the much simpler
65 (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
66 (set (subreg:SI (reg:DI 122) 4) (const_int -1))
68 This particular propagation is also effective at putting together
69 complex addressing modes. We are more aggressive inside MEMs, in
70 that all definitions are propagated if the use is in a MEM; if the
71 result is a valid memory address we check address_cost to decide
72 whether the substitution is worthwhile.
74 2) The pass propagates register copies. This is not as effective as
75 the copy propagation done by CSE's canon_reg, which works by walking
76 the instruction chain, it can help the other transformations.
78 We should consider removing this optimization, and instead reorder the
79 RTL passes, because GCSE does this transformation too. With some luck,
80 the CSE pass at the end of rest_of_handle_gcse could also go away.
82 3) The pass looks for paradoxical subregs that are actually unnecessary.
83 Things like this:
85 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
86 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
87 (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
88 (subreg:SI (reg:QI 121) 0)))
90 are very common on machines that can only do word-sized operations.
91 For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
92 if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
93 we can replace the paradoxical subreg with simply (reg:WIDE M). The
94 above will simplify this to
96 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
97 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
98 (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
100 where the first two insns are now dead. */
102 using namespace rtl_ssa;
104 static int num_changes;
106 /* Do not try to replace constant addresses or addresses of local and
107 argument slots. These MEM expressions are made only once and inserted
108 in many instructions, as well as being used to control symbol table
109 output. It is not safe to clobber them.
111 There are some uncommon cases where the address is already in a register
112 for some reason, but we cannot take advantage of that because we have
113 no easy way to unshare the MEM. In addition, looking up all stack
114 addresses is costly. */
116 static bool
117 can_simplify_addr (rtx addr)
119 rtx reg;
121 if (CONSTANT_ADDRESS_P (addr))
122 return false;
124 if (GET_CODE (addr) == PLUS)
125 reg = XEXP (addr, 0);
126 else
127 reg = addr;
129 return (!REG_P (reg)
130 || (REGNO (reg) != FRAME_POINTER_REGNUM
131 && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
132 && REGNO (reg) != ARG_POINTER_REGNUM));
135 /* MEM is the result of an address simplification, and temporarily
136 undoing changes OLD_NUM_CHANGES onwards restores the original address.
137 Return whether it is good to use the new address instead of the
138 old one. INSN is the containing instruction. */
140 static bool
141 should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
143 int gain;
145 /* Prefer the new address if it is less expensive. */
146 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
147 temporarily_undo_changes (old_num_changes);
148 gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
149 MEM_ADDR_SPACE (mem), speed);
150 redo_changes (old_num_changes);
151 gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
152 MEM_ADDR_SPACE (mem), speed);
154 /* If the addresses have equivalent cost, prefer the new address
155 if it has the highest `set_src_cost'. That has the potential of
156 eliminating the most insns without additional costs, and it
157 is the same that cse.c used to do. */
158 if (gain == 0)
160 gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
161 temporarily_undo_changes (old_num_changes);
162 gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
163 redo_changes (old_num_changes);
166 return (gain > 0);
170 namespace
172 class fwprop_propagation : public insn_propagation
174 public:
175 static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
176 static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
177 static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
179 fwprop_propagation (rtx_insn *, rtx, rtx);
181 bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
182 bool folded_to_constants_p () const;
183 bool profitable_p () const;
185 bool check_mem (int, rtx) final override;
186 void note_simplification (int, uint16_t, rtx, rtx) final override;
187 uint16_t classify_result (rtx, rtx);
191 /* Prepare to replace FROM with TO in INSN. */
193 fwprop_propagation::fwprop_propagation (rtx_insn *insn, rtx from, rtx to)
194 : insn_propagation (insn, from, to)
196 should_check_mems = true;
197 should_note_simplifications = true;
200 /* MEM is the result of an address simplification, and temporarily
201 undoing changes OLD_NUM_CHANGES onwards restores the original address.
202 Return true if the propagation should continue, false if it has failed. */
204 bool
205 fwprop_propagation::check_mem (int old_num_changes, rtx mem)
207 if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
208 MEM_ADDR_SPACE (mem)))
210 failure_reason = "would create an invalid MEM";
211 return false;
214 temporarily_undo_changes (old_num_changes);
215 bool can_simplify = can_simplify_addr (XEXP (mem, 0));
216 redo_changes (old_num_changes);
217 if (!can_simplify)
219 failure_reason = "would replace a frame address";
220 return false;
223 /* Copy propagations are always ok. Otherwise check the costs. */
224 if (!(REG_P (from) && REG_P (to))
225 && !should_replace_address (old_num_changes, mem, insn))
227 failure_reason = "would increase the cost of a MEM";
228 return false;
231 result_flags |= CHANGED_MEM;
232 return true;
235 /* OLDX has been simplified to NEWX. Describe the change in terms of
236 result_flags. */
238 uint16_t
239 fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
241 if (CONSTANT_P (new_rtx))
243 /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
244 and NEW_RTX is likely not a legitimate address. We want it to
245 disappear if it is invalid.
247 ??? Using the mode of the LO_SUM as the mode of the address
248 seems odd, but it was what the pre-SSA code did. */
249 if (GET_CODE (old_rtx) == LO_SUM
250 && !memory_address_p (GET_MODE (old_rtx), new_rtx))
251 return CONSTANT;
252 return CONSTANT | PROFITABLE;
255 /* Allow replacements that simplify operations on a vector or complex
256 value to a component. The most prominent case is
257 (subreg ([vec_]concat ...)). */
258 if (REG_P (new_rtx)
259 && !HARD_REGISTER_P (new_rtx)
260 && (VECTOR_MODE_P (GET_MODE (from))
261 || COMPLEX_MODE_P (GET_MODE (from)))
262 && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
263 return PROFITABLE;
265 return 0;
268 /* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES
269 is the number of unrelated changes that had been made before processing
270 OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags
271 had at that point. */
273 void
274 fwprop_propagation::note_simplification (int old_num_changes,
275 uint16_t old_result_flags,
276 rtx old_rtx, rtx new_rtx)
278 result_flags &= ~(CONSTANT | PROFITABLE);
279 uint16_t new_flags = classify_result (old_rtx, new_rtx);
280 if (old_num_changes)
281 new_flags &= old_result_flags;
282 result_flags |= new_flags;
285 /* Return true if all substitutions eventually folded to constants. */
287 bool
288 fwprop_propagation::folded_to_constants_p () const
290 /* If we're propagating a HIGH, require it to be folded with a
291 partnering LO_SUM. For example, a REG_EQUAL note with a register
292 replaced by an unfolded HIGH is not useful. */
293 if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
294 return true;
295 return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
299 /* Return true if it is worth keeping the result of the propagation,
300 false if it would increase the complexity of the pattern too much. */
302 bool
303 fwprop_propagation::profitable_p () const
305 if (changed_mem_p ())
306 return true;
308 if (!(result_flags & UNSIMPLIFIED)
309 && (result_flags & PROFITABLE))
310 return true;
312 if (REG_P (to))
313 return true;
315 if (GET_CODE (to) == SUBREG
316 && REG_P (SUBREG_REG (to))
317 && !paradoxical_subreg_p (to))
318 return true;
320 if (CONSTANT_P (to))
321 return true;
323 return false;
326 /* Check that X has a single def. */
328 static bool
329 reg_single_def_p (rtx x)
331 return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
334 /* Return true if X contains a paradoxical subreg. */
336 static bool
337 contains_paradoxical_subreg_p (rtx x)
339 subrtx_var_iterator::array_type array;
340 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
342 x = *iter;
343 if (SUBREG_P (x) && paradoxical_subreg_p (x))
344 return true;
346 return false;
349 /* Try to substitute (set DEST SRC) from DEF_INSN into note NOTE of USE_INSN.
350 Return the number of substitutions on success, otherwise return -1 and
351 leave USE_INSN unchanged.
353 If REQUIRE_CONSTANT is true, require all substituted occurences of SRC
354 to fold to a constant, so that the note does not use any more registers
355 than it did previously. If REQUIRE_CONSTANT is false, also allow the
356 substitution if it's something we'd normally allow for the main
357 instruction pattern. */
359 static int
360 try_fwprop_subst_note (insn_info *use_insn, insn_info *def_insn,
361 rtx note, rtx dest, rtx src, bool require_constant)
363 rtx_insn *use_rtl = use_insn->rtl ();
365 insn_change_watermark watermark;
366 fwprop_propagation prop (use_rtl, dest, src);
367 if (!prop.apply_to_rvalue (&XEXP (note, 0)))
369 if (dump_file && (dump_flags & TDF_DETAILS))
370 fprintf (dump_file, "cannot propagate from insn %d into"
371 " notes of insn %d: %s\n", def_insn->uid (),
372 use_insn->uid (), prop.failure_reason);
373 return -1;
376 if (prop.num_replacements == 0)
377 return 0;
379 if (require_constant)
381 if (!prop.folded_to_constants_p ())
383 if (dump_file && (dump_flags & TDF_DETAILS))
384 fprintf (dump_file, "cannot propagate from insn %d into"
385 " notes of insn %d: %s\n", def_insn->uid (),
386 use_insn->uid (), "wouldn't fold to constants");
387 return -1;
390 else
392 if (!prop.folded_to_constants_p () && !prop.profitable_p ())
394 if (dump_file && (dump_flags & TDF_DETAILS))
395 fprintf (dump_file, "cannot propagate from insn %d into"
396 " notes of insn %d: %s\n", def_insn->uid (),
397 use_insn->uid (), "would increase complexity of node");
398 return -1;
402 if (dump_file && (dump_flags & TDF_DETAILS))
404 fprintf (dump_file, "\nin notes of insn %d, replacing:\n ",
405 INSN_UID (use_rtl));
406 temporarily_undo_changes (0);
407 print_inline_rtx (dump_file, note, 2);
408 redo_changes (0);
409 fprintf (dump_file, "\n with:\n ");
410 print_inline_rtx (dump_file, note, 2);
411 fprintf (dump_file, "\n");
413 watermark.keep ();
414 return prop.num_replacements;
417 /* Try to substitute (set DEST SRC) from DEF_INSN into location LOC of
418 USE_INSN's pattern. Return true on success, otherwise leave USE_INSN
419 unchanged. */
421 static bool
422 try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
423 insn_info *def_insn, rtx *loc, rtx dest, rtx src)
425 insn_info *use_insn = use_change.insn ();
426 rtx_insn *use_rtl = use_insn->rtl ();
428 insn_change_watermark watermark;
429 fwprop_propagation prop (use_rtl, dest, src);
430 if (!prop.apply_to_pattern (loc))
432 if (dump_file && (dump_flags & TDF_DETAILS))
433 fprintf (dump_file, "cannot propagate from insn %d into"
434 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
435 prop.failure_reason);
436 return false;
439 if (prop.num_replacements == 0)
440 return false;
442 if (!prop.profitable_p ())
444 if (dump_file && (dump_flags & TDF_DETAILS))
445 fprintf (dump_file, "cannot propagate from insn %d into"
446 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
447 "would increase complexity of pattern");
448 return false;
451 if (dump_file && (dump_flags & TDF_DETAILS))
453 fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
454 def_insn->uid (), use_insn->uid ());
455 temporarily_undo_changes (0);
456 print_rtl_single (dump_file, PATTERN (use_rtl));
457 redo_changes (0);
460 /* ??? In theory, it should be better to use insn costs rather than
461 set_src_costs here. That would involve replacing this code with
462 change_is_worthwhile. */
463 bool ok = recog (attempt, use_change);
464 if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
465 if (rtx use_set = single_set (use_rtl))
467 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl));
468 temporarily_undo_changes (0);
469 auto old_cost = set_src_cost (SET_SRC (use_set),
470 GET_MODE (SET_DEST (use_set)), speed);
471 redo_changes (0);
472 auto new_cost = set_src_cost (SET_SRC (use_set),
473 GET_MODE (SET_DEST (use_set)), speed);
474 if (new_cost > old_cost)
476 if (dump_file)
477 fprintf (dump_file, "change not profitable"
478 " (cost %d -> cost %d)\n", old_cost, new_cost);
479 ok = false;
483 if (!ok)
485 /* The pattern didn't match, but if all uses of SRC folded to
486 constants, we can add a REG_EQUAL note for the result, if there
487 isn't one already. */
488 if (!prop.folded_to_constants_p ())
489 return false;
491 /* Test this first to avoid creating an unnecessary copy of SRC. */
492 if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
493 return false;
495 rtx set = set_for_reg_notes (use_rtl);
496 if (!set || !REG_P (SET_DEST (set)))
497 return false;
499 rtx value = copy_rtx (SET_SRC (set));
500 cancel_changes (0);
502 /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
503 because the bits in there can be anything and so might not
504 match the REG_EQUAL note content. See PR70574. */
505 if (contains_paradoxical_subreg_p (SET_SRC (set)))
506 return false;
508 if (dump_file && (dump_flags & TDF_DETAILS))
509 fprintf (dump_file, " Setting REG_EQUAL note\n");
511 return set_unique_reg_note (use_rtl, REG_EQUAL, value);
514 rtx *note_ptr = &REG_NOTES (use_rtl);
515 while (rtx note = *note_ptr)
517 if ((REG_NOTE_KIND (note) == REG_EQUAL
518 || REG_NOTE_KIND (note) == REG_EQUIV)
519 && try_fwprop_subst_note (use_insn, def_insn, note,
520 dest, src, false) < 0)
522 *note_ptr = XEXP (note, 1);
523 free_EXPR_LIST_node (note);
525 else
526 note_ptr = &XEXP (note, 1);
529 confirm_change_group ();
530 crtl->ssa->change_insn (use_change);
531 num_changes++;
532 return true;
535 /* Try to substitute (set DEST SRC) from DEF_INSN into USE_INSN's notes,
536 given that it was not possible to do this for USE_INSN's main pattern.
537 Return true on success, otherwise leave USE_INSN unchanged. */
539 static bool
540 try_fwprop_subst_notes (insn_info *use_insn, insn_info *def_insn,
541 rtx dest, rtx src)
543 rtx_insn *use_rtl = use_insn->rtl ();
544 for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
545 if ((REG_NOTE_KIND (note) == REG_EQUAL
546 || REG_NOTE_KIND (note) == REG_EQUIV)
547 && try_fwprop_subst_note (use_insn, def_insn, note,
548 dest, src, true) > 0)
550 confirm_change_group ();
551 return true;
554 return false;
557 /* Check whether we could validly substitute (set DEST SRC) from DEF_INSN
558 into USE. If so, first try performing the substitution in location LOC
559 of USE->insn ()'s pattern. If that fails, try instead to substitute
560 into the notes.
562 Return true on success, otherwise leave USE_INSN unchanged. */
564 static bool
565 try_fwprop_subst (use_info *use, insn_info *def_insn,
566 rtx *loc, rtx dest, rtx src)
568 insn_info *use_insn = use->insn ();
570 auto attempt = crtl->ssa->new_change_attempt ();
571 use_array src_uses = remove_note_accesses (attempt, def_insn->uses ());
573 /* ??? Not really a meaningful test: it means we can propagate arithmetic
574 involving hard registers but not bare references to them. A better
575 test would be to iterate over src_uses looking for hard registers
576 that are not fixed. */
577 if (REG_P (src) && HARD_REGISTER_P (src))
578 return false;
580 /* ??? It would be better to make this EBB-based instead. That would
581 involve checking for equal EBBs rather than equal BBs and trying
582 to make the uses available at use_insn->ebb ()->first_bb (). */
583 if (def_insn->bb () != use_insn->bb ())
585 src_uses = crtl->ssa->make_uses_available (attempt, src_uses,
586 use_insn->bb ());
587 if (!src_uses.is_valid ())
588 return false;
591 insn_change use_change (use_insn);
592 use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses,
593 src_uses);
594 if (!use_change.new_uses.is_valid ())
595 return false;
597 /* ??? We could allow movement within the EBB by adding:
599 use_change.move_range = use_insn->ebb ()->insn_range (); */
600 if (!restrict_movement (use_change))
601 return false;
603 return (try_fwprop_subst_pattern (attempt, use_change, def_insn,
604 loc, dest, src)
605 || try_fwprop_subst_notes (use_insn, def_insn, dest, src));
608 /* For the given single_set INSN, containing SRC known to be a
609 ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
610 is redundant due to the register being set by a LOAD_EXTEND_OP
611 load from memory. */
613 static bool
614 free_load_extend (rtx src, insn_info *insn)
616 rtx reg = XEXP (src, 0);
617 if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
618 return false;
620 def_info *def = nullptr;
621 for (use_info *use : insn->uses ())
622 if (use->regno () == REGNO (reg))
624 def = use->def ();
625 break;
628 if (!def)
629 return false;
631 insn_info *def_insn = def->insn ();
632 if (def_insn->is_artificial ())
633 return false;
635 rtx_insn *def_rtl = def_insn->rtl ();
636 if (NONJUMP_INSN_P (def_rtl))
638 rtx patt = PATTERN (def_rtl);
640 if (GET_CODE (patt) == SET
641 && GET_CODE (SET_SRC (patt)) == MEM
642 && rtx_equal_p (SET_DEST (patt), reg))
643 return true;
645 return false;
648 /* Subroutine of forward_propagate_subreg that handles a use of DEST
649 in REF. The other parameters are the same. */
651 static bool
652 forward_propagate_subreg (use_info *use, insn_info *def_insn,
653 rtx dest, rtx src, df_ref ref)
655 scalar_int_mode int_use_mode, src_mode;
657 /* Only consider subregs... */
658 rtx use_reg = DF_REF_REG (ref);
659 machine_mode use_mode = GET_MODE (use_reg);
660 if (GET_CODE (use_reg) != SUBREG
661 || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
662 return false;
664 /* ??? Replacing throughout the pattern would help for match_dups. */
665 rtx *loc = DF_REF_LOC (ref);
666 if (paradoxical_subreg_p (use_reg))
668 /* If this is a paradoxical SUBREG, we have no idea what value the
669 extra bits would have. However, if the operand is equivalent to
670 a SUBREG whose operand is the same as our mode, and all the modes
671 are within a word, we can just use the inner operand because
672 these SUBREGs just say how to treat the register. */
673 if (GET_CODE (src) == SUBREG
674 && REG_P (SUBREG_REG (src))
675 && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
676 && GET_MODE (SUBREG_REG (src)) == use_mode
677 && subreg_lowpart_p (src))
678 return try_fwprop_subst (use, def_insn, loc,
679 use_reg, SUBREG_REG (src));
682 /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
683 is the low part of the reg being extended then just use the inner
684 operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
685 be removed due to it matching a LOAD_EXTEND_OP load from memory,
686 or due to the operation being a no-op when applied to registers.
687 For example, if we have:
689 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
690 B: (... (subreg:SI (reg:DI X)) ...)
692 and mode_rep_extended says that Y is already sign-extended,
693 the backend will typically allow A to be combined with the
694 definition of Y or, failing that, allow A to be deleted after
695 reload through register tying. Introducing more uses of Y
696 prevents both optimisations. */
697 else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
698 && subreg_lowpart_p (use_reg))
700 if ((GET_CODE (src) == ZERO_EXTEND
701 || GET_CODE (src) == SIGN_EXTEND)
702 && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
703 && REG_P (XEXP (src, 0))
704 && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
705 && GET_MODE (XEXP (src, 0)) == use_mode
706 && !free_load_extend (src, def_insn)
707 && (targetm.mode_rep_extended (int_use_mode, src_mode)
708 != (int) GET_CODE (src)))
709 return try_fwprop_subst (use, def_insn, loc, use_reg, XEXP (src, 0));
712 return false;
715 /* Try to substitute (set DEST SRC) from DEF_INSN into USE and simplify
716 the result, handling cases where DEST is used in a subreg and where
717 applying that subreg to SRC results in a useful simplification. */
719 static bool
720 forward_propagate_subreg (use_info *use, insn_info *def_insn,
721 rtx dest, rtx src)
723 if (!use->includes_subregs () || !REG_P (dest))
724 return false;
726 if (GET_CODE (src) != SUBREG
727 && GET_CODE (src) != ZERO_EXTEND
728 && GET_CODE (src) != SIGN_EXTEND)
729 return false;
731 rtx_insn *use_rtl = use->insn ()->rtl ();
732 df_ref ref;
734 FOR_EACH_INSN_USE (ref, use_rtl)
735 if (DF_REF_REGNO (ref) == use->regno ()
736 && forward_propagate_subreg (use, def_insn, dest, src, ref))
737 return true;
739 FOR_EACH_INSN_EQ_USE (ref, use_rtl)
740 if (DF_REF_REGNO (ref) == use->regno ()
741 && forward_propagate_subreg (use, def_insn, dest, src, ref))
742 return true;
744 return false;
747 /* Try to substitute (set DEST SRC) from DEF_INSN into USE and
748 simplify the result. */
750 static bool
751 forward_propagate_and_simplify (use_info *use, insn_info *def_insn,
752 rtx dest, rtx src)
754 insn_info *use_insn = use->insn ();
755 rtx_insn *use_rtl = use_insn->rtl ();
757 /* ??? This check seems unnecessary. We should be able to propagate
758 into any kind of instruction, regardless of whether it's a single set.
759 It seems odd to be more permissive with asms than normal instructions. */
760 bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
761 rtx use_set = single_set (use_rtl);
762 if (need_single_set && !use_set)
763 return false;
765 /* Do not propagate into PC, CC0, etc.
767 ??? This too seems unnecessary. The current code should work correctly
768 without it, including cases where jumps become unconditional. */
769 if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
770 return false;
772 /* In __asm don't replace if src might need more registers than
773 reg, as that could increase register pressure on the __asm. */
774 if (use_insn->is_asm () && def_insn->uses ().size () > 1)
775 return false;
777 /* Check if the def is loading something from the constant pool; in this
778 case we would undo optimization such as compress_float_constant.
779 Still, we can set a REG_EQUAL note. */
780 if (MEM_P (src) && MEM_READONLY_P (src))
782 rtx x = avoid_constant_pool_reference (src);
783 rtx note_set;
784 if (x != src
785 && (note_set = set_for_reg_notes (use_rtl))
786 && REG_P (SET_DEST (note_set))
787 && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
789 rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
790 rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
791 rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
792 if (old_rtx != new_rtx)
793 set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
795 return false;
798 /* ??? Unconditionally propagating into PATTERN would work better
799 for instructions that have match_dups. */
800 rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl);
801 return try_fwprop_subst (use, def_insn, loc, dest, src);
804 /* Given a use USE of an insn, if it has a single reaching
805 definition, try to forward propagate it into that insn.
806 Return true if something changed.
808 REG_PROP_ONLY is true if we should only propagate register copies. */
810 static bool
811 forward_propagate_into (use_info *use, bool reg_prop_only = false)
813 if (use->includes_read_writes ())
814 return false;
816 /* Disregard uninitialized uses. */
817 def_info *def = use->def ();
818 if (!def)
819 return false;
821 /* Only consider single-register definitions. This could be relaxed,
822 but it should rarely be needed before RA. */
823 def = look_through_degenerate_phi (def);
824 if (def->includes_multiregs ())
825 return false;
827 /* Only consider uses whose definition comes from a real instruction. */
828 insn_info *def_insn = def->insn ();
829 if (def_insn->is_artificial ())
830 return false;
832 rtx_insn *def_rtl = def_insn->rtl ();
833 if (!NONJUMP_INSN_P (def_rtl))
834 return false;
835 /* ??? This seems an unnecessary restriction. We can easily tell
836 which set the definition comes from. */
837 if (multiple_sets (def_rtl))
838 return false;
839 rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ());
840 if (!def_set)
841 return false;
843 rtx dest = SET_DEST (def_set);
844 rtx src = SET_SRC (def_set);
846 /* Allow propagations into a loop only for reg-to-reg copies, since
847 replacing one register by another shouldn't increase the cost. */
848 struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
849 struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
850 if ((reg_prop_only || def_loop != use_loop)
851 && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
852 return false;
854 /* Don't substitute into a non-local goto, this confuses CFG. */
855 insn_info *use_insn = use->insn ();
856 rtx_insn *use_rtl = use_insn->rtl ();
857 if (JUMP_P (use_rtl)
858 && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
859 return false;
861 if (forward_propagate_and_simplify (use, def_insn, dest, src)
862 || forward_propagate_subreg (use, def_insn, dest, src))
863 return true;
865 return false;
868 static void
869 fwprop_init (void)
871 num_changes = 0;
872 calculate_dominance_info (CDI_DOMINATORS);
874 /* We do not always want to propagate into loops, so we have to find
875 loops and be careful about them. Avoid CFG modifications so that
876 we don't have to update dominance information afterwards for
877 build_single_def_use_links. */
878 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
880 df_analyze ();
881 crtl->ssa = new rtl_ssa::function_info (cfun);
884 static void
885 fwprop_done (void)
887 loop_optimizer_finalize ();
889 crtl->ssa->perform_pending_updates ();
890 free_dominance_info (CDI_DOMINATORS);
891 cleanup_cfg (0);
893 delete crtl->ssa;
894 crtl->ssa = nullptr;
896 delete_trivially_dead_insns (get_insns (), max_reg_num ());
898 if (dump_file)
899 fprintf (dump_file,
900 "\nNumber of successful forward propagations: %d\n\n",
901 num_changes);
904 /* Try to optimize INSN, returning true if something changes.
905 FWPROP_ADDR_P is true if we are running fwprop_addr rather than
906 the full fwprop. */
908 static bool
909 fwprop_insn (insn_info *insn, bool fwprop_addr_p)
911 for (use_info *use : insn->uses ())
913 if (use->is_mem ())
914 continue;
915 /* ??? The choices here follow those in the pre-SSA code. */
916 if (!use->includes_address_uses ())
918 if (forward_propagate_into (use, fwprop_addr_p))
919 return true;
921 else
923 struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
924 /* The outermost loop is not really a loop. */
925 if (loop == NULL || loop_outer (loop) == NULL)
927 if (forward_propagate_into (use, fwprop_addr_p))
928 return true;
930 else if (fwprop_addr_p)
932 if (forward_propagate_into (use, false))
933 return true;
937 return false;
940 /* Main entry point. */
942 static bool
943 gate_fwprop (void)
945 return optimize > 0 && flag_forward_propagate;
948 static unsigned int
949 fwprop (bool fwprop_addr_p)
951 fwprop_init ();
953 /* Go through all the instructions (including debug instructions) looking
954 for uses that we could propagate into.
956 Do not forward propagate addresses into loops until after unrolling.
957 CSE did so because it was able to fix its own mess, but we are not. */
959 insn_info *next;
961 /* ??? This code uses a worklist in order to preserve the behavior
962 of the pre-SSA implementation. It would be better to instead
963 iterate on each instruction until no more propagations are
964 possible, then move on to the next. */
965 auto_vec<insn_info *> worklist;
966 for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
968 next = insn->next_any_insn ();
969 if (insn->can_be_optimized () || insn->is_debug_insn ())
970 if (fwprop_insn (insn, fwprop_addr_p))
971 worklist.safe_push (insn);
973 for (unsigned int i = 0; i < worklist.length (); ++i)
975 insn_info *insn = worklist[i];
976 if (fwprop_insn (insn, fwprop_addr_p))
977 worklist.safe_push (insn);
980 fwprop_done ();
981 return 0;
984 namespace {
986 const pass_data pass_data_rtl_fwprop =
988 RTL_PASS, /* type */
989 "fwprop1", /* name */
990 OPTGROUP_NONE, /* optinfo_flags */
991 TV_FWPROP, /* tv_id */
992 0, /* properties_required */
993 0, /* properties_provided */
994 0, /* properties_destroyed */
995 0, /* todo_flags_start */
996 TODO_df_finish, /* todo_flags_finish */
999 class pass_rtl_fwprop : public rtl_opt_pass
1001 public:
1002 pass_rtl_fwprop (gcc::context *ctxt)
1003 : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1006 /* opt_pass methods: */
1007 virtual bool gate (function *) { return gate_fwprop (); }
1008 virtual unsigned int execute (function *) { return fwprop (false); }
1010 }; // class pass_rtl_fwprop
1012 } // anon namespace
1014 rtl_opt_pass *
1015 make_pass_rtl_fwprop (gcc::context *ctxt)
1017 return new pass_rtl_fwprop (ctxt);
1020 namespace {
1022 const pass_data pass_data_rtl_fwprop_addr =
1024 RTL_PASS, /* type */
1025 "fwprop2", /* name */
1026 OPTGROUP_NONE, /* optinfo_flags */
1027 TV_FWPROP, /* tv_id */
1028 0, /* properties_required */
1029 0, /* properties_provided */
1030 0, /* properties_destroyed */
1031 0, /* todo_flags_start */
1032 TODO_df_finish, /* todo_flags_finish */
1035 class pass_rtl_fwprop_addr : public rtl_opt_pass
1037 public:
1038 pass_rtl_fwprop_addr (gcc::context *ctxt)
1039 : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1042 /* opt_pass methods: */
1043 virtual bool gate (function *) { return gate_fwprop (); }
1044 virtual unsigned int execute (function *) { return fwprop (true); }
1046 }; // class pass_rtl_fwprop_addr
1048 } // anon namespace
1050 rtl_opt_pass *
1051 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1053 return new pass_rtl_fwprop_addr (ctxt);