aix: Use lcomm for TLS static data.
[official-gcc.git] / gcc / fwprop.c
blob4b8a554e823f90d227ee7b3083d982498f07c236
1 /* RTL-based forward propagation pass for GNU compiler.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 Contributed by Paolo Bonzini and Steven Bosscher.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define INCLUDE_ALGORITHM
22 #define INCLUDE_FUNCTIONAL
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "df.h"
29 #include "rtl-ssa.h"
31 #include "predict.h"
32 #include "cfgrtl.h"
33 #include "cfgcleanup.h"
34 #include "cfgloop.h"
35 #include "tree-pass.h"
36 #include "rtl-iter.h"
37 #include "target.h"
39 /* This pass does simple forward propagation and simplification when an
40 operand of an insn can only come from a single def. This pass uses
41 RTL SSA, so it is global. However, we only do limited analysis of
42 available expressions.
44 1) The pass tries to propagate the source of the def into the use,
45 and checks if the result is independent of the substituted value.
46 For example, the high word of a (zero_extend:DI (reg:SI M)) is always
47 zero, independent of the source register.
49 In particular, we propagate constants into the use site. Sometimes
50 RTL expansion did not put the constant in the same insn on purpose,
51 to satisfy a predicate, and the result will fail to be recognized;
52 but this happens rarely and in this case we can still create a
53 REG_EQUAL note. For multi-word operations, this
55 (set (subreg:SI (reg:DI 120) 0) (const_int 0))
56 (set (subreg:SI (reg:DI 120) 4) (const_int -1))
57 (set (subreg:SI (reg:DI 122) 0)
58 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
59 (set (subreg:SI (reg:DI 122) 4)
60 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
62 can be simplified to the much simpler
64 (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
65 (set (subreg:SI (reg:DI 122) 4) (const_int -1))
67 This particular propagation is also effective at putting together
68 complex addressing modes. We are more aggressive inside MEMs, in
69 that all definitions are propagated if the use is in a MEM; if the
70 result is a valid memory address we check address_cost to decide
71 whether the substitution is worthwhile.
73 2) The pass propagates register copies. This is not as effective as
74 the copy propagation done by CSE's canon_reg, which works by walking
75 the instruction chain, it can help the other transformations.
77 We should consider removing this optimization, and instead reorder the
78 RTL passes, because GCSE does this transformation too. With some luck,
79 the CSE pass at the end of rest_of_handle_gcse could also go away.
81 3) The pass looks for paradoxical subregs that are actually unnecessary.
82 Things like this:
84 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
85 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
86 (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
87 (subreg:SI (reg:QI 121) 0)))
89 are very common on machines that can only do word-sized operations.
90 For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
91 if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
92 we can replace the paradoxical subreg with simply (reg:WIDE M). The
93 above will simplify this to
95 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
96 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
97 (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
99 where the first two insns are now dead. */
101 using namespace rtl_ssa;
103 static int num_changes;
105 /* Do not try to replace constant addresses or addresses of local and
106 argument slots. These MEM expressions are made only once and inserted
107 in many instructions, as well as being used to control symbol table
108 output. It is not safe to clobber them.
110 There are some uncommon cases where the address is already in a register
111 for some reason, but we cannot take advantage of that because we have
112 no easy way to unshare the MEM. In addition, looking up all stack
113 addresses is costly. */
115 static bool
116 can_simplify_addr (rtx addr)
118 rtx reg;
120 if (CONSTANT_ADDRESS_P (addr))
121 return false;
123 if (GET_CODE (addr) == PLUS)
124 reg = XEXP (addr, 0);
125 else
126 reg = addr;
128 return (!REG_P (reg)
129 || (REGNO (reg) != FRAME_POINTER_REGNUM
130 && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
131 && REGNO (reg) != ARG_POINTER_REGNUM));
134 /* MEM is the result of an address simplification, and temporarily
135 undoing changes OLD_NUM_CHANGES onwards restores the original address.
136 Return whether it is good to use the new address instead of the
137 old one. INSN is the containing instruction. */
139 static bool
140 should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
142 int gain;
144 /* Prefer the new address if it is less expensive. */
145 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
146 temporarily_undo_changes (old_num_changes);
147 gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
148 MEM_ADDR_SPACE (mem), speed);
149 redo_changes (old_num_changes);
150 gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
151 MEM_ADDR_SPACE (mem), speed);
153 /* If the addresses have equivalent cost, prefer the new address
154 if it has the highest `set_src_cost'. That has the potential of
155 eliminating the most insns without additional costs, and it
156 is the same that cse.c used to do. */
157 if (gain == 0)
159 gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
160 temporarily_undo_changes (old_num_changes);
161 gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
162 redo_changes (old_num_changes);
165 return (gain > 0);
169 namespace
171 class fwprop_propagation : public insn_propagation
173 public:
174 static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
175 static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
176 static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
178 fwprop_propagation (insn_info *, insn_info *, rtx, rtx);
180 bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
181 bool folded_to_constants_p () const;
182 bool profitable_p () const;
184 bool check_mem (int, rtx) final override;
185 void note_simplification (int, uint16_t, rtx, rtx) final override;
186 uint16_t classify_result (rtx, rtx);
188 private:
189 const bool single_use_p;
190 const bool single_ebb_p;
194 /* Prepare to replace FROM with TO in INSN. */
196 fwprop_propagation::fwprop_propagation (insn_info *use_insn,
197 insn_info *def_insn, rtx from, rtx to)
198 : insn_propagation (use_insn->rtl (), from, to),
199 single_use_p (def_insn->num_uses () == 1),
200 single_ebb_p (use_insn->ebb () == def_insn->ebb ())
202 should_check_mems = true;
203 should_note_simplifications = true;
206 /* MEM is the result of an address simplification, and temporarily
207 undoing changes OLD_NUM_CHANGES onwards restores the original address.
208 Return true if the propagation should continue, false if it has failed. */
210 bool
211 fwprop_propagation::check_mem (int old_num_changes, rtx mem)
213 if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
214 MEM_ADDR_SPACE (mem)))
216 failure_reason = "would create an invalid MEM";
217 return false;
220 temporarily_undo_changes (old_num_changes);
221 bool can_simplify = can_simplify_addr (XEXP (mem, 0));
222 redo_changes (old_num_changes);
223 if (!can_simplify)
225 failure_reason = "would replace a frame address";
226 return false;
229 /* Copy propagations are always ok. Otherwise check the costs. */
230 if (!(REG_P (from) && REG_P (to))
231 && !should_replace_address (old_num_changes, mem, insn))
233 failure_reason = "would increase the cost of a MEM";
234 return false;
237 result_flags |= CHANGED_MEM;
238 return true;
241 /* OLDX has been simplified to NEWX. Describe the change in terms of
242 result_flags. */
244 uint16_t
245 fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
247 if (CONSTANT_P (new_rtx))
249 /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
250 and NEW_RTX is likely not a legitimate address. We want it to
251 disappear if it is invalid.
253 ??? Using the mode of the LO_SUM as the mode of the address
254 seems odd, but it was what the pre-SSA code did. */
255 if (GET_CODE (old_rtx) == LO_SUM
256 && !memory_address_p (GET_MODE (old_rtx), new_rtx))
257 return CONSTANT;
258 return CONSTANT | PROFITABLE;
261 /* Allow replacements that simplify operations on a vector or complex
262 value to a component. The most prominent case is
263 (subreg ([vec_]concat ...)). */
264 if (REG_P (new_rtx)
265 && !HARD_REGISTER_P (new_rtx)
266 && (VECTOR_MODE_P (GET_MODE (from))
267 || COMPLEX_MODE_P (GET_MODE (from)))
268 && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
269 return PROFITABLE;
271 /* Allow (subreg (mem)) -> (mem) simplifications with the following
272 exceptions:
273 1) Propagating (mem)s into multiple uses is not profitable.
274 2) Propagating (mem)s across EBBs may not be profitable if the source EBB
275 runs less frequently.
276 3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
277 4) Creating new (mem/v)s is not correct, since DCE will not remove the old
278 ones. */
279 if (single_use_p
280 && single_ebb_p
281 && SUBREG_P (old_rtx)
282 && !paradoxical_subreg_p (old_rtx)
283 && MEM_P (new_rtx)
284 && !MEM_VOLATILE_P (new_rtx))
285 return PROFITABLE;
287 return 0;
290 /* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES
291 is the number of unrelated changes that had been made before processing
292 OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags
293 had at that point. */
295 void
296 fwprop_propagation::note_simplification (int old_num_changes,
297 uint16_t old_result_flags,
298 rtx old_rtx, rtx new_rtx)
300 result_flags &= ~(CONSTANT | PROFITABLE);
301 uint16_t new_flags = classify_result (old_rtx, new_rtx);
302 if (old_num_changes)
303 new_flags &= old_result_flags;
304 result_flags |= new_flags;
307 /* Return true if all substitutions eventually folded to constants. */
309 bool
310 fwprop_propagation::folded_to_constants_p () const
312 /* If we're propagating a HIGH, require it to be folded with a
313 partnering LO_SUM. For example, a REG_EQUAL note with a register
314 replaced by an unfolded HIGH is not useful. */
315 if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
316 return true;
317 return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
321 /* Return true if it is worth keeping the result of the propagation,
322 false if it would increase the complexity of the pattern too much. */
324 bool
325 fwprop_propagation::profitable_p () const
327 if (changed_mem_p ())
328 return true;
330 if (!(result_flags & UNSIMPLIFIED)
331 && (result_flags & PROFITABLE))
332 return true;
334 if (REG_P (to))
335 return true;
337 if (GET_CODE (to) == SUBREG
338 && REG_P (SUBREG_REG (to))
339 && !paradoxical_subreg_p (to))
340 return true;
342 if (CONSTANT_P (to))
343 return true;
345 return false;
348 /* Check that X has a single def. */
350 static bool
351 reg_single_def_p (rtx x)
353 return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
356 /* Return true if X contains a paradoxical subreg. */
358 static bool
359 contains_paradoxical_subreg_p (rtx x)
361 subrtx_var_iterator::array_type array;
362 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
364 x = *iter;
365 if (SUBREG_P (x) && paradoxical_subreg_p (x))
366 return true;
368 return false;
371 /* Try to substitute (set DEST SRC) from DEF_INSN into note NOTE of USE_INSN.
372 Return the number of substitutions on success, otherwise return -1 and
373 leave USE_INSN unchanged.
375 If REQUIRE_CONSTANT is true, require all substituted occurences of SRC
376 to fold to a constant, so that the note does not use any more registers
377 than it did previously. If REQUIRE_CONSTANT is false, also allow the
378 substitution if it's something we'd normally allow for the main
379 instruction pattern. */
381 static int
382 try_fwprop_subst_note (insn_info *use_insn, insn_info *def_insn,
383 rtx note, rtx dest, rtx src, bool require_constant)
385 rtx_insn *use_rtl = use_insn->rtl ();
387 insn_change_watermark watermark;
388 fwprop_propagation prop (use_insn, def_insn, dest, src);
389 if (!prop.apply_to_rvalue (&XEXP (note, 0)))
391 if (dump_file && (dump_flags & TDF_DETAILS))
392 fprintf (dump_file, "cannot propagate from insn %d into"
393 " notes of insn %d: %s\n", def_insn->uid (),
394 use_insn->uid (), prop.failure_reason);
395 return -1;
398 if (prop.num_replacements == 0)
399 return 0;
401 if (require_constant)
403 if (!prop.folded_to_constants_p ())
405 if (dump_file && (dump_flags & TDF_DETAILS))
406 fprintf (dump_file, "cannot propagate from insn %d into"
407 " notes of insn %d: %s\n", def_insn->uid (),
408 use_insn->uid (), "wouldn't fold to constants");
409 return -1;
412 else
414 if (!prop.folded_to_constants_p () && !prop.profitable_p ())
416 if (dump_file && (dump_flags & TDF_DETAILS))
417 fprintf (dump_file, "cannot propagate from insn %d into"
418 " notes of insn %d: %s\n", def_insn->uid (),
419 use_insn->uid (), "would increase complexity of node");
420 return -1;
424 if (dump_file && (dump_flags & TDF_DETAILS))
426 fprintf (dump_file, "\nin notes of insn %d, replacing:\n ",
427 INSN_UID (use_rtl));
428 temporarily_undo_changes (0);
429 print_inline_rtx (dump_file, note, 2);
430 redo_changes (0);
431 fprintf (dump_file, "\n with:\n ");
432 print_inline_rtx (dump_file, note, 2);
433 fprintf (dump_file, "\n");
435 watermark.keep ();
436 return prop.num_replacements;
439 /* Try to substitute (set DEST SRC) from DEF_INSN into location LOC of
440 USE_INSN's pattern. Return true on success, otherwise leave USE_INSN
441 unchanged. */
443 static bool
444 try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
445 insn_info *def_insn, rtx *loc, rtx dest, rtx src)
447 insn_info *use_insn = use_change.insn ();
448 rtx_insn *use_rtl = use_insn->rtl ();
450 insn_change_watermark watermark;
451 fwprop_propagation prop (use_insn, def_insn, dest, src);
452 if (!prop.apply_to_pattern (loc))
454 if (dump_file && (dump_flags & TDF_DETAILS))
455 fprintf (dump_file, "cannot propagate from insn %d into"
456 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
457 prop.failure_reason);
458 return false;
461 if (prop.num_replacements == 0)
462 return false;
464 if (!prop.profitable_p ())
466 if (dump_file && (dump_flags & TDF_DETAILS))
467 fprintf (dump_file, "cannot propagate from insn %d into"
468 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
469 "would increase complexity of pattern");
470 return false;
473 if (dump_file && (dump_flags & TDF_DETAILS))
475 fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
476 def_insn->uid (), use_insn->uid ());
477 temporarily_undo_changes (0);
478 print_rtl_single (dump_file, PATTERN (use_rtl));
479 redo_changes (0);
482 /* ??? In theory, it should be better to use insn costs rather than
483 set_src_costs here. That would involve replacing this code with
484 change_is_worthwhile. */
485 bool ok = recog (attempt, use_change);
486 if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
487 if (rtx use_set = single_set (use_rtl))
489 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl));
490 temporarily_undo_changes (0);
491 auto old_cost = set_src_cost (SET_SRC (use_set),
492 GET_MODE (SET_DEST (use_set)), speed);
493 redo_changes (0);
494 auto new_cost = set_src_cost (SET_SRC (use_set),
495 GET_MODE (SET_DEST (use_set)), speed);
496 if (new_cost > old_cost)
498 if (dump_file)
499 fprintf (dump_file, "change not profitable"
500 " (cost %d -> cost %d)\n", old_cost, new_cost);
501 ok = false;
505 if (!ok)
507 /* The pattern didn't match, but if all uses of SRC folded to
508 constants, we can add a REG_EQUAL note for the result, if there
509 isn't one already. */
510 if (!prop.folded_to_constants_p ())
511 return false;
513 /* Test this first to avoid creating an unnecessary copy of SRC. */
514 if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
515 return false;
517 rtx set = set_for_reg_notes (use_rtl);
518 if (!set || !REG_P (SET_DEST (set)))
519 return false;
521 rtx value = copy_rtx (SET_SRC (set));
522 cancel_changes (0);
524 /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
525 because the bits in there can be anything and so might not
526 match the REG_EQUAL note content. See PR70574. */
527 if (contains_paradoxical_subreg_p (SET_SRC (set)))
528 return false;
530 if (dump_file && (dump_flags & TDF_DETAILS))
531 fprintf (dump_file, " Setting REG_EQUAL note\n");
533 return set_unique_reg_note (use_rtl, REG_EQUAL, value);
536 rtx *note_ptr = &REG_NOTES (use_rtl);
537 while (rtx note = *note_ptr)
539 if ((REG_NOTE_KIND (note) == REG_EQUAL
540 || REG_NOTE_KIND (note) == REG_EQUIV)
541 && try_fwprop_subst_note (use_insn, def_insn, note,
542 dest, src, false) < 0)
544 *note_ptr = XEXP (note, 1);
545 free_EXPR_LIST_node (note);
547 else
548 note_ptr = &XEXP (note, 1);
551 confirm_change_group ();
552 crtl->ssa->change_insn (use_change);
553 num_changes++;
554 return true;
557 /* Try to substitute (set DEST SRC) from DEF_INSN into USE_INSN's notes,
558 given that it was not possible to do this for USE_INSN's main pattern.
559 Return true on success, otherwise leave USE_INSN unchanged. */
561 static bool
562 try_fwprop_subst_notes (insn_info *use_insn, insn_info *def_insn,
563 rtx dest, rtx src)
565 rtx_insn *use_rtl = use_insn->rtl ();
566 for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
567 if ((REG_NOTE_KIND (note) == REG_EQUAL
568 || REG_NOTE_KIND (note) == REG_EQUIV)
569 && try_fwprop_subst_note (use_insn, def_insn, note,
570 dest, src, true) > 0)
572 confirm_change_group ();
573 return true;
576 return false;
579 /* Check whether we could validly substitute (set DEST SRC) from DEF_INSN
580 into USE. If so, first try performing the substitution in location LOC
581 of USE->insn ()'s pattern. If that fails, try instead to substitute
582 into the notes.
584 Return true on success, otherwise leave USE_INSN unchanged. */
586 static bool
587 try_fwprop_subst (use_info *use, insn_info *def_insn,
588 rtx *loc, rtx dest, rtx src)
590 insn_info *use_insn = use->insn ();
592 auto attempt = crtl->ssa->new_change_attempt ();
593 use_array src_uses = remove_note_accesses (attempt, def_insn->uses ());
595 /* ??? Not really a meaningful test: it means we can propagate arithmetic
596 involving hard registers but not bare references to them. A better
597 test would be to iterate over src_uses looking for hard registers
598 that are not fixed. */
599 if (REG_P (src) && HARD_REGISTER_P (src))
600 return false;
602 /* ??? It would be better to make this EBB-based instead. That would
603 involve checking for equal EBBs rather than equal BBs and trying
604 to make the uses available at use_insn->ebb ()->first_bb (). */
605 if (def_insn->bb () != use_insn->bb ())
607 src_uses = crtl->ssa->make_uses_available (attempt, src_uses,
608 use_insn->bb ());
609 if (!src_uses.is_valid ())
610 return false;
613 insn_change use_change (use_insn);
614 use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses,
615 src_uses);
616 if (!use_change.new_uses.is_valid ())
617 return false;
619 /* ??? We could allow movement within the EBB by adding:
621 use_change.move_range = use_insn->ebb ()->insn_range (); */
622 if (!restrict_movement (use_change))
623 return false;
625 return (try_fwprop_subst_pattern (attempt, use_change, def_insn,
626 loc, dest, src)
627 || try_fwprop_subst_notes (use_insn, def_insn, dest, src));
630 /* For the given single_set INSN, containing SRC known to be a
631 ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
632 is redundant due to the register being set by a LOAD_EXTEND_OP
633 load from memory. */
635 static bool
636 free_load_extend (rtx src, insn_info *insn)
638 rtx reg = XEXP (src, 0);
639 if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
640 return false;
642 def_info *def = nullptr;
643 for (use_info *use : insn->uses ())
644 if (use->regno () == REGNO (reg))
646 def = use->def ();
647 break;
650 if (!def)
651 return false;
653 insn_info *def_insn = def->insn ();
654 if (def_insn->is_artificial ())
655 return false;
657 rtx_insn *def_rtl = def_insn->rtl ();
658 if (NONJUMP_INSN_P (def_rtl))
660 rtx patt = PATTERN (def_rtl);
662 if (GET_CODE (patt) == SET
663 && GET_CODE (SET_SRC (patt)) == MEM
664 && rtx_equal_p (SET_DEST (patt), reg))
665 return true;
667 return false;
670 /* Subroutine of forward_propagate_subreg that handles a use of DEST
671 in REF. The other parameters are the same. */
673 static bool
674 forward_propagate_subreg (use_info *use, insn_info *def_insn,
675 rtx dest, rtx src, df_ref ref)
677 scalar_int_mode int_use_mode, src_mode;
679 /* Only consider subregs... */
680 rtx use_reg = DF_REF_REG (ref);
681 machine_mode use_mode = GET_MODE (use_reg);
682 if (GET_CODE (use_reg) != SUBREG
683 || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
684 return false;
686 /* ??? Replacing throughout the pattern would help for match_dups. */
687 rtx *loc = DF_REF_LOC (ref);
688 if (paradoxical_subreg_p (use_reg))
690 /* If this is a paradoxical SUBREG, we have no idea what value the
691 extra bits would have. However, if the operand is equivalent to
692 a SUBREG whose operand is the same as our mode, and all the modes
693 are within a word, we can just use the inner operand because
694 these SUBREGs just say how to treat the register. */
695 if (GET_CODE (src) == SUBREG
696 && REG_P (SUBREG_REG (src))
697 && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
698 && GET_MODE (SUBREG_REG (src)) == use_mode
699 && subreg_lowpart_p (src))
700 return try_fwprop_subst (use, def_insn, loc,
701 use_reg, SUBREG_REG (src));
704 /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
705 is the low part of the reg being extended then just use the inner
706 operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
707 be removed due to it matching a LOAD_EXTEND_OP load from memory,
708 or due to the operation being a no-op when applied to registers.
709 For example, if we have:
711 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
712 B: (... (subreg:SI (reg:DI X)) ...)
714 and mode_rep_extended says that Y is already sign-extended,
715 the backend will typically allow A to be combined with the
716 definition of Y or, failing that, allow A to be deleted after
717 reload through register tying. Introducing more uses of Y
718 prevents both optimisations. */
719 else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
720 && subreg_lowpart_p (use_reg))
722 if ((GET_CODE (src) == ZERO_EXTEND
723 || GET_CODE (src) == SIGN_EXTEND)
724 && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
725 && REG_P (XEXP (src, 0))
726 && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
727 && GET_MODE (XEXP (src, 0)) == use_mode
728 && !free_load_extend (src, def_insn)
729 && (targetm.mode_rep_extended (int_use_mode, src_mode)
730 != (int) GET_CODE (src)))
731 return try_fwprop_subst (use, def_insn, loc, use_reg, XEXP (src, 0));
734 return false;
737 /* Try to substitute (set DEST SRC) from DEF_INSN into USE and simplify
738 the result, handling cases where DEST is used in a subreg and where
739 applying that subreg to SRC results in a useful simplification. */
741 static bool
742 forward_propagate_subreg (use_info *use, insn_info *def_insn,
743 rtx dest, rtx src)
745 if (!use->includes_subregs () || !REG_P (dest))
746 return false;
748 if (GET_CODE (src) != SUBREG
749 && GET_CODE (src) != ZERO_EXTEND
750 && GET_CODE (src) != SIGN_EXTEND)
751 return false;
753 rtx_insn *use_rtl = use->insn ()->rtl ();
754 df_ref ref;
756 FOR_EACH_INSN_USE (ref, use_rtl)
757 if (DF_REF_REGNO (ref) == use->regno ()
758 && forward_propagate_subreg (use, def_insn, dest, src, ref))
759 return true;
761 FOR_EACH_INSN_EQ_USE (ref, use_rtl)
762 if (DF_REF_REGNO (ref) == use->regno ()
763 && forward_propagate_subreg (use, def_insn, dest, src, ref))
764 return true;
766 return false;
769 /* Try to substitute (set DEST SRC) from DEF_INSN into USE and
770 simplify the result. */
772 static bool
773 forward_propagate_and_simplify (use_info *use, insn_info *def_insn,
774 rtx dest, rtx src)
776 insn_info *use_insn = use->insn ();
777 rtx_insn *use_rtl = use_insn->rtl ();
779 /* ??? This check seems unnecessary. We should be able to propagate
780 into any kind of instruction, regardless of whether it's a single set.
781 It seems odd to be more permissive with asms than normal instructions. */
782 bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
783 rtx use_set = single_set (use_rtl);
784 if (need_single_set && !use_set)
785 return false;
787 /* Do not propagate into PC, CC0, etc.
789 ??? This too seems unnecessary. The current code should work correctly
790 without it, including cases where jumps become unconditional. */
791 if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
792 return false;
794 /* In __asm don't replace if src might need more registers than
795 reg, as that could increase register pressure on the __asm. */
796 if (use_insn->is_asm () && def_insn->uses ().size () > 1)
797 return false;
799 /* Check if the def is loading something from the constant pool; in this
800 case we would undo optimization such as compress_float_constant.
801 Still, we can set a REG_EQUAL note. */
802 if (MEM_P (src) && MEM_READONLY_P (src))
804 rtx x = avoid_constant_pool_reference (src);
805 rtx note_set;
806 if (x != src
807 && (note_set = set_for_reg_notes (use_rtl))
808 && REG_P (SET_DEST (note_set))
809 && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
811 rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
812 rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
813 rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
814 if (old_rtx != new_rtx)
815 set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
817 return false;
820 /* ??? Unconditionally propagating into PATTERN would work better
821 for instructions that have match_dups. */
822 rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl);
823 return try_fwprop_subst (use, def_insn, loc, dest, src);
826 /* Given a use USE of an insn, if it has a single reaching
827 definition, try to forward propagate it into that insn.
828 Return true if something changed.
830 REG_PROP_ONLY is true if we should only propagate register copies. */
832 static bool
833 forward_propagate_into (use_info *use, bool reg_prop_only = false)
835 if (use->includes_read_writes ())
836 return false;
838 /* Disregard uninitialized uses. */
839 def_info *def = use->def ();
840 if (!def)
841 return false;
843 /* Only consider single-register definitions. This could be relaxed,
844 but it should rarely be needed before RA. */
845 def = look_through_degenerate_phi (def);
846 if (def->includes_multiregs ())
847 return false;
849 /* Only consider uses whose definition comes from a real instruction. */
850 insn_info *def_insn = def->insn ();
851 if (def_insn->is_artificial ())
852 return false;
854 rtx_insn *def_rtl = def_insn->rtl ();
855 if (!NONJUMP_INSN_P (def_rtl))
856 return false;
857 /* ??? This seems an unnecessary restriction. We can easily tell
858 which set the definition comes from. */
859 if (multiple_sets (def_rtl))
860 return false;
861 rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ());
862 if (!def_set)
863 return false;
865 rtx dest = SET_DEST (def_set);
866 rtx src = SET_SRC (def_set);
868 /* Allow propagations into a loop only for reg-to-reg copies, since
869 replacing one register by another shouldn't increase the cost. */
870 struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
871 struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
872 if ((reg_prop_only || def_loop != use_loop)
873 && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
874 return false;
876 /* Don't substitute into a non-local goto, this confuses CFG. */
877 insn_info *use_insn = use->insn ();
878 rtx_insn *use_rtl = use_insn->rtl ();
879 if (JUMP_P (use_rtl)
880 && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
881 return false;
883 if (forward_propagate_and_simplify (use, def_insn, dest, src)
884 || forward_propagate_subreg (use, def_insn, dest, src))
885 return true;
887 return false;
890 static void
891 fwprop_init (void)
893 num_changes = 0;
894 calculate_dominance_info (CDI_DOMINATORS);
896 /* We do not always want to propagate into loops, so we have to find
897 loops and be careful about them. Avoid CFG modifications so that
898 we don't have to update dominance information afterwards for
899 build_single_def_use_links. */
900 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
902 df_analyze ();
903 crtl->ssa = new rtl_ssa::function_info (cfun);
906 static void
907 fwprop_done (void)
909 loop_optimizer_finalize ();
911 crtl->ssa->perform_pending_updates ();
912 free_dominance_info (CDI_DOMINATORS);
913 cleanup_cfg (0);
915 delete crtl->ssa;
916 crtl->ssa = nullptr;
918 delete_trivially_dead_insns (get_insns (), max_reg_num ());
920 if (dump_file)
921 fprintf (dump_file,
922 "\nNumber of successful forward propagations: %d\n\n",
923 num_changes);
926 /* Try to optimize INSN, returning true if something changes.
927 FWPROP_ADDR_P is true if we are running fwprop_addr rather than
928 the full fwprop. */
930 static bool
931 fwprop_insn (insn_info *insn, bool fwprop_addr_p)
933 for (use_info *use : insn->uses ())
935 if (use->is_mem ())
936 continue;
937 /* ??? The choices here follow those in the pre-SSA code. */
938 if (!use->includes_address_uses ())
940 if (forward_propagate_into (use, fwprop_addr_p))
941 return true;
943 else
945 struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
946 /* The outermost loop is not really a loop. */
947 if (loop == NULL || loop_outer (loop) == NULL)
949 if (forward_propagate_into (use, fwprop_addr_p))
950 return true;
952 else if (fwprop_addr_p)
954 if (forward_propagate_into (use, false))
955 return true;
959 return false;
962 /* Main entry point. */
964 static bool
965 gate_fwprop (void)
967 return optimize > 0 && flag_forward_propagate;
970 static unsigned int
971 fwprop (bool fwprop_addr_p)
973 fwprop_init ();
975 /* Go through all the instructions (including debug instructions) looking
976 for uses that we could propagate into.
978 Do not forward propagate addresses into loops until after unrolling.
979 CSE did so because it was able to fix its own mess, but we are not. */
981 insn_info *next;
983 /* ??? This code uses a worklist in order to preserve the behavior
984 of the pre-SSA implementation. It would be better to instead
985 iterate on each instruction until no more propagations are
986 possible, then move on to the next. */
987 auto_vec<insn_info *> worklist;
988 for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
990 next = insn->next_any_insn ();
991 if (insn->can_be_optimized () || insn->is_debug_insn ())
992 if (fwprop_insn (insn, fwprop_addr_p))
993 worklist.safe_push (insn);
995 for (unsigned int i = 0; i < worklist.length (); ++i)
997 insn_info *insn = worklist[i];
998 if (fwprop_insn (insn, fwprop_addr_p))
999 worklist.safe_push (insn);
1002 fwprop_done ();
1003 return 0;
1006 namespace {
1008 const pass_data pass_data_rtl_fwprop =
1010 RTL_PASS, /* type */
1011 "fwprop1", /* name */
1012 OPTGROUP_NONE, /* optinfo_flags */
1013 TV_FWPROP, /* tv_id */
1014 0, /* properties_required */
1015 0, /* properties_provided */
1016 0, /* properties_destroyed */
1017 0, /* todo_flags_start */
1018 TODO_df_finish, /* todo_flags_finish */
1021 class pass_rtl_fwprop : public rtl_opt_pass
1023 public:
1024 pass_rtl_fwprop (gcc::context *ctxt)
1025 : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1028 /* opt_pass methods: */
1029 virtual bool gate (function *) { return gate_fwprop (); }
1030 virtual unsigned int execute (function *) { return fwprop (false); }
1032 }; // class pass_rtl_fwprop
1034 } // anon namespace
1036 rtl_opt_pass *
1037 make_pass_rtl_fwprop (gcc::context *ctxt)
1039 return new pass_rtl_fwprop (ctxt);
1042 namespace {
1044 const pass_data pass_data_rtl_fwprop_addr =
1046 RTL_PASS, /* type */
1047 "fwprop2", /* name */
1048 OPTGROUP_NONE, /* optinfo_flags */
1049 TV_FWPROP, /* tv_id */
1050 0, /* properties_required */
1051 0, /* properties_provided */
1052 0, /* properties_destroyed */
1053 0, /* todo_flags_start */
1054 TODO_df_finish, /* todo_flags_finish */
1057 class pass_rtl_fwprop_addr : public rtl_opt_pass
1059 public:
1060 pass_rtl_fwprop_addr (gcc::context *ctxt)
1061 : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1064 /* opt_pass methods: */
1065 virtual bool gate (function *) { return gate_fwprop (); }
1066 virtual unsigned int execute (function *) { return fwprop (true); }
1068 }; // class pass_rtl_fwprop_addr
1070 } // anon namespace
1072 rtl_opt_pass *
1073 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1075 return new pass_rtl_fwprop_addr (ctxt);