gcc/fwprop.c

   1 /* RTL-based forward propagation pass for GNU compiler.
   2    Copyright (C) 2005-2016 Free Software Foundation, Inc.
   3    Contributed by Paolo Bonzini and Steven Bosscher.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "target.h"
  26 #include "rtl.h"
  27 #include "predict.h"
  28 #include "df.h"
  29 #include "memmodel.h"
  30 #include "tm_p.h"
  31 #include "insn-config.h"
  32 #include "emit-rtl.h"
  33 #include "recog.h"
  34
  35 #include "sparseset.h"
  36 #include "cfgrtl.h"
  37 #include "cfgcleanup.h"
  38 #include "cfgloop.h"
  39 #include "tree-pass.h"
  40 #include "domwalk.h"
  41 #include "rtl-iter.h"
  42
  43
  44 /* This pass does simple forward propagation and simplification when an
  45    operand of an insn can only come from a single def.  This pass uses
  46    df.c, so it is global.  However, we only do limited analysis of
  47    available expressions.
  48
  49    1) The pass tries to propagate the source of the def into the use,
  50    and checks if the result is independent of the substituted value.
  51    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
  52    zero, independent of the source register.
  53
  54    In particular, we propagate constants into the use site.  Sometimes
  55    RTL expansion did not put the constant in the same insn on purpose,
  56    to satisfy a predicate, and the result will fail to be recognized;
  57    but this happens rarely and in this case we can still create a
  58    REG_EQUAL note.  For multi-word operations, this
  59
  60       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
  61       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
  62       (set (subreg:SI (reg:DI 122) 0)
  63          (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
  64       (set (subreg:SI (reg:DI 122) 4)
  65          (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
  66
  67    can be simplified to the much simpler
  68
  69       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
  70       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
  71
  72    This particular propagation is also effective at putting together
  73    complex addressing modes.  We are more aggressive inside MEMs, in
  74    that all definitions are propagated if the use is in a MEM; if the
  75    result is a valid memory address we check address_cost to decide
  76    whether the substitution is worthwhile.
  77
  78    2) The pass propagates register copies.  This is not as effective as
  79    the copy propagation done by CSE's canon_reg, which works by walking
  80    the instruction chain, it can help the other transformations.
  81
  82    We should consider removing this optimization, and instead reorder the
  83    RTL passes, because GCSE does this transformation too.  With some luck,
  84    the CSE pass at the end of rest_of_handle_gcse could also go away.
  85
  86    3) The pass looks for paradoxical subregs that are actually unnecessary.
  87    Things like this:
  88
  89      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
  90      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
  91      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
  92                                 (subreg:SI (reg:QI 121) 0)))
  93
  94    are very common on machines that can only do word-sized operations.
  95    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
  96    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
  97    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
  98    above will simplify this to
  99
 100      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
 101      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
 102      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
 103
 104    where the first two insns are now dead.
 105
 106    We used to use reaching definitions to find which uses have a
 107    single reaching definition (sounds obvious...), but this is too
 108    complex a problem in nasty testcases like PR33928.  Now we use the
 109    multiple definitions problem in df-problems.c.  The similarity
 110    between that problem and SSA form creation is taken further, in
 111    that fwprop does a dominator walk to create its chains; however,
 112    instead of creating a PHI function where multiple definitions meet
 113    I just punt and record only singleton use-def chains, which is
 114    all that is needed by fwprop.  */
 115
 116
 117 static int num_changes;
 118
 119 static vec<df_ref> use_def_ref;
 120 static vec<df_ref> reg_defs;
 121 static vec<df_ref> reg_defs_stack;
 122
 123 /* The MD bitmaps are trimmed to include only live registers to cut
 124    memory usage on testcases like insn-recog.c.  Track live registers
 125    in the basic block and do not perform forward propagation if the
 126    destination is a dead pseudo occurring in a note.  */
 127 static bitmap local_md;
 128 static bitmap local_lr;
 129
 130 /* Return the only def in USE's use-def chain, or NULL if there is
 131    more than one def in the chain.  */
 132
 133 static inline df_ref
 134 get_def_for_use (df_ref use)
 135 {
 136   return use_def_ref[DF_REF_ID (use)];
 137 }
 138
 139
 140 /* Update the reg_defs vector with non-partial definitions in DEF_REC.
 141    TOP_FLAG says which artificials uses should be used, when DEF_REC
 142    is an artificial def vector.  LOCAL_MD is modified as after a
 143    df_md_simulate_* function; we do more or less the same processing
 144    done there, so we do not use those functions.  */
 145
 146 #define DF_MD_GEN_FLAGS \
 147         (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
 148
 149 static void
 150 process_defs (df_ref def, int top_flag)
 151 {
 152   for (; def; def = DF_REF_NEXT_LOC (def))
 153     {
 154       df_ref curr_def = reg_defs[DF_REF_REGNO (def)];
 155       unsigned int dregno;
 156
 157       if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
 158         continue;
 159
 160       dregno = DF_REF_REGNO (def);
 161       if (curr_def)
 162         reg_defs_stack.safe_push (curr_def);
 163       else
 164         {
 165           /* Do not store anything if "transitioning" from NULL to NULL.  But
 166              otherwise, push a special entry on the stack to tell the
 167              leave_block callback that the entry in reg_defs was NULL.  */
 168           if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 169             ;
 170           else
 171             reg_defs_stack.safe_push (def);
 172         }
 173
 174       if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 175         {
 176           bitmap_set_bit (local_md, dregno);
 177           reg_defs[dregno] = NULL;
 178         }
 179       else
 180         {
 181           bitmap_clear_bit (local_md, dregno);
 182           reg_defs[dregno] = def;
 183         }
 184     }
 185 }
 186
 187
 188 /* Fill the use_def_ref vector with values for the uses in USE_REC,
 189    taking reaching definitions info from LOCAL_MD and REG_DEFS.
 190    TOP_FLAG says which artificials uses should be used, when USE_REC
 191    is an artificial use vector.  */
 192
 193 static void
 194 process_uses (df_ref use, int top_flag)
 195 {
 196   for (; use; use = DF_REF_NEXT_LOC (use))
 197     if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
 198       {
 199         unsigned int uregno = DF_REF_REGNO (use);
 200         if (reg_defs[uregno]
 201             && !bitmap_bit_p (local_md, uregno)
 202             && bitmap_bit_p (local_lr, uregno))
 203           use_def_ref[DF_REF_ID (use)] = reg_defs[uregno];
 204       }
 205 }
 206
 207 class single_def_use_dom_walker : public dom_walker
 208 {
 209 public:
 210   single_def_use_dom_walker (cdi_direction direction)
 211     : dom_walker (direction) {}
 212   virtual edge before_dom_children (basic_block);
 213   virtual void after_dom_children (basic_block);
 214 };
 215
 216 edge
 217 single_def_use_dom_walker::before_dom_children (basic_block bb)
 218 {
 219   int bb_index = bb->index;
 220   struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
 221   struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
 222   rtx_insn *insn;
 223
 224   bitmap_copy (local_md, &md_bb_info->in);
 225   bitmap_copy (local_lr, &lr_bb_info->in);
 226
 227   /* Push a marker for the leave_block callback.  */
 228   reg_defs_stack.safe_push (NULL);
 229
 230   process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
 231   process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
 232
 233   /* We don't call df_simulate_initialize_forwards, as it may overestimate
 234      the live registers if there are unused artificial defs.  We prefer
 235      liveness to be underestimated.  */
 236
 237   FOR_BB_INSNS (bb, insn)
 238     if (INSN_P (insn))
 239       {
 240         unsigned int uid = INSN_UID (insn);
 241         process_uses (DF_INSN_UID_USES (uid), 0);
 242         process_uses (DF_INSN_UID_EQ_USES (uid), 0);
 243         process_defs (DF_INSN_UID_DEFS (uid), 0);
 244         df_simulate_one_insn_forwards (bb, insn, local_lr);
 245       }
 246
 247   process_uses (df_get_artificial_uses (bb_index), 0);
 248   process_defs (df_get_artificial_defs (bb_index), 0);
 249
 250   return NULL;
 251 }
 252
 253 /* Pop the definitions created in this basic block when leaving its
 254    dominated parts.  */
 255
 256 void
 257 single_def_use_dom_walker::after_dom_children (basic_block bb ATTRIBUTE_UNUSED)
 258 {
 259   df_ref saved_def;
 260   while ((saved_def = reg_defs_stack.pop ()) != NULL)
 261     {
 262       unsigned int dregno = DF_REF_REGNO (saved_def);
 263
 264       /* See also process_defs.  */
 265       if (saved_def == reg_defs[dregno])
 266         reg_defs[dregno] = NULL;
 267       else
 268         reg_defs[dregno] = saved_def;
 269     }
 270 }
 271
 272
 273 /* Build a vector holding the reaching definitions of uses reached by a
 274    single dominating definition.  */
 275
 276 static void
 277 build_single_def_use_links (void)
 278 {
 279   /* We use the multiple definitions problem to compute our restricted
 280      use-def chains.  */
 281   df_set_flags (DF_EQ_NOTES);
 282   df_md_add_problem ();
 283   df_note_add_problem ();
 284   df_analyze ();
 285   df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
 286
 287   use_def_ref.create (DF_USES_TABLE_SIZE ());
 288   use_def_ref.safe_grow_cleared (DF_USES_TABLE_SIZE ());
 289
 290   reg_defs.create (max_reg_num ());
 291   reg_defs.safe_grow_cleared (max_reg_num ());
 292
 293   reg_defs_stack.create (n_basic_blocks_for_fn (cfun) * 10);
 294   local_md = BITMAP_ALLOC (NULL);
 295   local_lr = BITMAP_ALLOC (NULL);
 296
 297   /* Walk the dominator tree looking for single reaching definitions
 298      dominating the uses.  This is similar to how SSA form is built.  */
 299   single_def_use_dom_walker (CDI_DOMINATORS)
 300     .walk (cfun->cfg->x_entry_block_ptr);
 301
 302   BITMAP_FREE (local_lr);
 303   BITMAP_FREE (local_md);
 304   reg_defs.release ();
 305   reg_defs_stack.release ();
 306 }
 307
 308 \f
 309 /* Do not try to replace constant addresses or addresses of local and
 310    argument slots.  These MEM expressions are made only once and inserted
 311    in many instructions, as well as being used to control symbol table
 312    output.  It is not safe to clobber them.
 313
 314    There are some uncommon cases where the address is already in a register
 315    for some reason, but we cannot take advantage of that because we have
 316    no easy way to unshare the MEM.  In addition, looking up all stack
 317    addresses is costly.  */
 318
 319 static bool
 320 can_simplify_addr (rtx addr)
 321 {
 322   rtx reg;
 323
 324   if (CONSTANT_ADDRESS_P (addr))
 325     return false;
 326
 327   if (GET_CODE (addr) == PLUS)
 328     reg = XEXP (addr, 0);
 329   else
 330     reg = addr;
 331
 332   return (!REG_P (reg)
 333           || (REGNO (reg) != FRAME_POINTER_REGNUM
 334               && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
 335               && REGNO (reg) != ARG_POINTER_REGNUM));
 336 }
 337
 338 /* Returns a canonical version of X for the address, from the point of view,
 339    that all multiplications are represented as MULT instead of the multiply
 340    by a power of 2 being represented as ASHIFT.
 341
 342    Every ASHIFT we find has been made by simplify_gen_binary and was not
 343    there before, so it is not shared.  So we can do this in place.  */
 344
 345 static void
 346 canonicalize_address (rtx x)
 347 {
 348   for (;;)
 349     switch (GET_CODE (x))
 350       {
 351       case ASHIFT:
 352         if (CONST_INT_P (XEXP (x, 1))
 353             && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
 354             && INTVAL (XEXP (x, 1)) >= 0)
 355           {
 356             HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
 357             PUT_CODE (x, MULT);
 358             XEXP (x, 1) = gen_int_mode (HOST_WIDE_INT_1 << shift,
 359                                         GET_MODE (x));
 360           }
 361
 362         x = XEXP (x, 0);
 363         break;
 364
 365       case PLUS:
 366         if (GET_CODE (XEXP (x, 0)) == PLUS
 367             || GET_CODE (XEXP (x, 0)) == ASHIFT
 368             || GET_CODE (XEXP (x, 0)) == CONST)
 369           canonicalize_address (XEXP (x, 0));
 370
 371         x = XEXP (x, 1);
 372         break;
 373
 374       case CONST:
 375         x = XEXP (x, 0);
 376         break;
 377
 378       default:
 379         return;
 380       }
 381 }
 382
 383 /* OLD is a memory address.  Return whether it is good to use NEW instead,
 384    for a memory access in the given MODE.  */
 385
 386 static bool
 387 should_replace_address (rtx old_rtx, rtx new_rtx, machine_mode mode,
 388                         addr_space_t as, bool speed)
 389 {
 390   int gain;
 391
 392   if (rtx_equal_p (old_rtx, new_rtx)
 393       || !memory_address_addr_space_p (mode, new_rtx, as))
 394     return false;
 395
 396   /* Copy propagation is always ok.  */
 397   if (REG_P (old_rtx) && REG_P (new_rtx))
 398     return true;
 399
 400   /* Prefer the new address if it is less expensive.  */
 401   gain = (address_cost (old_rtx, mode, as, speed)
 402           - address_cost (new_rtx, mode, as, speed));
 403
 404   /* If the addresses have equivalent cost, prefer the new address
 405      if it has the highest `set_src_cost'.  That has the potential of
 406      eliminating the most insns without additional costs, and it
 407      is the same that cse.c used to do.  */
 408   if (gain == 0)
 409     gain = (set_src_cost (new_rtx, VOIDmode, speed)
 410             - set_src_cost (old_rtx, VOIDmode, speed));
 411
 412   return (gain > 0);
 413 }
 414
 415
 416 /* Flags for the last parameter of propagate_rtx_1.  */
 417
 418 enum {
 419   /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
 420      if it is false, propagate_rtx_1 returns false if, for at least
 421      one occurrence OLD, it failed to collapse the result to a constant.
 422      For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
 423      collapse to zero if replacing (reg:M B) with (reg:M A).
 424
 425      PR_CAN_APPEAR is disregarded inside MEMs: in that case,
 426      propagate_rtx_1 just tries to make cheaper and valid memory
 427      addresses.  */
 428   PR_CAN_APPEAR = 1,
 429
 430   /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
 431      outside memory addresses.  This is needed because propagate_rtx_1 does
 432      not do any analysis on memory; thus it is very conservative and in general
 433      it will fail if non-read-only MEMs are found in the source expression.
 434
 435      PR_HANDLE_MEM is set when the source of the propagation was not
 436      another MEM.  Then, it is safe not to treat non-read-only MEMs as
 437      ``opaque'' objects.  */
 438   PR_HANDLE_MEM = 2,
 439
 440   /* Set when costs should be optimized for speed.  */
 441   PR_OPTIMIZE_FOR_SPEED = 4
 442 };
 443
 444
 445 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
 446    resulting expression.  Replace *PX with a new RTL expression if an
 447    occurrence of OLD was found.
 448
 449    This is only a wrapper around simplify-rtx.c: do not add any pattern
 450    matching code here.  (The sole exception is the handling of LO_SUM, but
 451    that is because there is no simplify_gen_* function for LO_SUM).  */
 452
 453 static bool
 454 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 455 {
 456   rtx x = *px, tem = NULL_RTX, op0, op1, op2;
 457   enum rtx_code code = GET_CODE (x);
 458   machine_mode mode = GET_MODE (x);
 459   machine_mode op_mode;
 460   bool can_appear = (flags & PR_CAN_APPEAR) != 0;
 461   bool valid_ops = true;
 462
 463   if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
 464     {
 465       /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
 466          they have side effects or not).  */
 467       *px = (side_effects_p (x)
 468              ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
 469              : gen_rtx_SCRATCH (GET_MODE (x)));
 470       return false;
 471     }
 472
 473   /* If X is OLD_RTX, return NEW_RTX.  But not if replacing only within an
 474      address, and we are *not* inside one.  */
 475   if (x == old_rtx)
 476     {
 477       *px = new_rtx;
 478       return can_appear;
 479     }
 480
 481   /* If this is an expression, try recursive substitution.  */
 482   switch (GET_RTX_CLASS (code))
 483     {
 484     case RTX_UNARY:
 485       op0 = XEXP (x, 0);
 486       op_mode = GET_MODE (op0);
 487       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 488       if (op0 == XEXP (x, 0))
 489         return true;
 490       tem = simplify_gen_unary (code, mode, op0, op_mode);
 491       break;
 492
 493     case RTX_BIN_ARITH:
 494     case RTX_COMM_ARITH:
 495       op0 = XEXP (x, 0);
 496       op1 = XEXP (x, 1);
 497       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 498       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 499       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 500         return true;
 501       tem = simplify_gen_binary (code, mode, op0, op1);
 502       break;
 503
 504     case RTX_COMPARE:
 505     case RTX_COMM_COMPARE:
 506       op0 = XEXP (x, 0);
 507       op1 = XEXP (x, 1);
 508       op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
 509       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 510       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 511       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 512         return true;
 513       tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
 514       break;
 515
 516     case RTX_TERNARY:
 517     case RTX_BITFIELD_OPS:
 518       op0 = XEXP (x, 0);
 519       op1 = XEXP (x, 1);
 520       op2 = XEXP (x, 2);
 521       op_mode = GET_MODE (op0);
 522       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 523       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 524       valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
 525       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
 526         return true;
 527       if (op_mode == VOIDmode)
 528         op_mode = GET_MODE (op0);
 529       tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
 530       break;
 531
 532     case RTX_EXTRA:
 533       /* The only case we try to handle is a SUBREG.  */
 534       if (code == SUBREG)
 535         {
 536           op0 = XEXP (x, 0);
 537           valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 538           if (op0 == XEXP (x, 0))
 539             return true;
 540           tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
 541                                      SUBREG_BYTE (x));
 542         }
 543       break;
 544
 545     case RTX_OBJ:
 546       if (code == MEM && x != new_rtx)
 547         {
 548           rtx new_op0;
 549           op0 = XEXP (x, 0);
 550
 551           /* There are some addresses that we cannot work on.  */
 552           if (!can_simplify_addr (op0))
 553             return true;
 554
 555           op0 = new_op0 = targetm.delegitimize_address (op0);
 556           valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
 557                                         flags | PR_CAN_APPEAR);
 558
 559           /* Dismiss transformation that we do not want to carry on.  */
 560           if (!valid_ops
 561               || new_op0 == op0
 562               || !(GET_MODE (new_op0) == GET_MODE (op0)
 563                    || GET_MODE (new_op0) == VOIDmode))
 564             return true;
 565
 566           canonicalize_address (new_op0);
 567
 568           /* Copy propagations are always ok.  Otherwise check the costs.  */
 569           if (!(REG_P (old_rtx) && REG_P (new_rtx))
 570               && !should_replace_address (op0, new_op0, GET_MODE (x),
 571                                           MEM_ADDR_SPACE (x),
 572                                           flags & PR_OPTIMIZE_FOR_SPEED))
 573             return true;
 574
 575           tem = replace_equiv_address_nv (x, new_op0);
 576         }
 577
 578       else if (code == LO_SUM)
 579         {
 580           op0 = XEXP (x, 0);
 581           op1 = XEXP (x, 1);
 582
 583           /* The only simplification we do attempts to remove references to op0
 584              or make it constant -- in both cases, op0's invalidity will not
 585              make the result invalid.  */
 586           propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
 587           valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 588           if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 589             return true;
 590
 591           /* (lo_sum (high x) x) -> x  */
 592           if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
 593             tem = op1;
 594           else
 595             tem = gen_rtx_LO_SUM (mode, op0, op1);
 596
 597           /* OP1 is likely not a legitimate address, otherwise there would have
 598              been no LO_SUM.  We want it to disappear if it is invalid, return
 599              false in that case.  */
 600           return memory_address_p (mode, tem);
 601         }
 602
 603       else if (code == REG)
 604         {
 605           if (rtx_equal_p (x, old_rtx))
 606             {
 607               *px = new_rtx;
 608               return can_appear;
 609             }
 610         }
 611       break;
 612
 613     default:
 614       break;
 615     }
 616
 617   /* No change, no trouble.  */
 618   if (tem == NULL_RTX)
 619     return true;
 620
 621   *px = tem;
 622
 623   /* Allow replacements that simplify operations on a vector or complex
 624      value to a component.  The most prominent case is
 625      (subreg ([vec_]concat ...)).   */
 626   if (REG_P (tem) && !HARD_REGISTER_P (tem)
 627       && (VECTOR_MODE_P (GET_MODE (new_rtx))
 628           || COMPLEX_MODE_P (GET_MODE (new_rtx)))
 629       && GET_MODE (tem) == GET_MODE_INNER (GET_MODE (new_rtx)))
 630     return true;
 631
 632   /* The replacement we made so far is valid, if all of the recursive
 633      replacements were valid, or we could simplify everything to
 634      a constant.  */
 635   return valid_ops || can_appear || CONSTANT_P (tem);
 636 }
 637
 638
 639 /* Return true if X constains a non-constant mem.  */
 640
 641 static bool
 642 varying_mem_p (const_rtx x)
 643 {
 644   subrtx_iterator::array_type array;
 645   FOR_EACH_SUBRTX (iter, array, x, NONCONST)
 646     if (MEM_P (*iter) && !MEM_READONLY_P (*iter))
 647       return true;
 648   return false;
 649 }
 650
 651
 652 /* Replace all occurrences of OLD in X with NEW and try to simplify the
 653    resulting expression (in mode MODE).  Return a new expression if it is
 654    a constant, otherwise X.
 655
 656    Simplifications where occurrences of NEW collapse to a constant are always
 657    accepted.  All simplifications are accepted if NEW is a pseudo too.
 658    Otherwise, we accept simplifications that have a lower or equal cost.  */
 659
 660 static rtx
 661 propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx,
 662                bool speed)
 663 {
 664   rtx tem;
 665   bool collapsed;
 666   int flags;
 667
 668   if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
 669     return NULL_RTX;
 670
 671   flags = 0;
 672   if (REG_P (new_rtx)
 673       || CONSTANT_P (new_rtx)
 674       || (GET_CODE (new_rtx) == SUBREG
 675           && REG_P (SUBREG_REG (new_rtx))
 676           && (GET_MODE_SIZE (mode)
 677               <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (new_rtx))))))
 678     flags |= PR_CAN_APPEAR;
 679   if (!varying_mem_p (new_rtx))
 680     flags |= PR_HANDLE_MEM;
 681
 682   if (speed)
 683     flags |= PR_OPTIMIZE_FOR_SPEED;
 684
 685   tem = x;
 686   collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
 687   if (tem == x || !collapsed)
 688     return NULL_RTX;
 689
 690   /* gen_lowpart_common will not be able to process VOIDmode entities other
 691      than CONST_INTs.  */
 692   if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
 693     return NULL_RTX;
 694
 695   if (GET_MODE (tem) == VOIDmode)
 696     tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
 697   else
 698     gcc_assert (GET_MODE (tem) == mode);
 699
 700   return tem;
 701 }
 702
 703
 704 \f
 705
 706 /* Return true if the register from reference REF is killed
 707    between FROM to (but not including) TO.  */
 708
 709 static bool
 710 local_ref_killed_between_p (df_ref ref, rtx_insn *from, rtx_insn *to)
 711 {
 712   rtx_insn *insn;
 713
 714   for (insn = from; insn != to; insn = NEXT_INSN (insn))
 715     {
 716       df_ref def;
 717       if (!INSN_P (insn))
 718         continue;
 719
 720       FOR_EACH_INSN_DEF (def, insn)
 721         if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
 722           return true;
 723     }
 724   return false;
 725 }
 726
 727
 728 /* Check if the given DEF is available in INSN.  This would require full
 729    computation of available expressions; we check only restricted conditions:
 730    - if DEF is the sole definition of its register, go ahead;
 731    - in the same basic block, we check for no definitions killing the
 732      definition of DEF_INSN;
 733    - if USE's basic block has DEF's basic block as the sole predecessor,
 734      we check if the definition is killed after DEF_INSN or before
 735      TARGET_INSN insn, in their respective basic blocks.  */
 736 static bool
 737 use_killed_between (df_ref use, rtx_insn *def_insn, rtx_insn *target_insn)
 738 {
 739   basic_block def_bb = BLOCK_FOR_INSN (def_insn);
 740   basic_block target_bb = BLOCK_FOR_INSN (target_insn);
 741   int regno;
 742   df_ref def;
 743
 744   /* We used to have a def reaching a use that is _before_ the def,
 745      with the def not dominating the use even though the use and def
 746      are in the same basic block, when a register may be used
 747      uninitialized in a loop.  This should not happen anymore since
 748      we do not use reaching definitions, but still we test for such
 749      cases and assume that DEF is not available.  */
 750   if (def_bb == target_bb
 751       ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
 752       : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
 753     return true;
 754
 755   /* Check if the reg in USE has only one definition.  We already
 756      know that this definition reaches use, or we wouldn't be here.
 757      However, this is invalid for hard registers because if they are
 758      live at the beginning of the function it does not mean that we
 759      have an uninitialized access.  */
 760   regno = DF_REF_REGNO (use);
 761   def = DF_REG_DEF_CHAIN (regno);
 762   if (def
 763       && DF_REF_NEXT_REG (def) == NULL
 764       && regno >= FIRST_PSEUDO_REGISTER)
 765     return false;
 766
 767   /* Check locally if we are in the same basic block.  */
 768   if (def_bb == target_bb)
 769     return local_ref_killed_between_p (use, def_insn, target_insn);
 770
 771   /* Finally, if DEF_BB is the sole predecessor of TARGET_BB.  */
 772   if (single_pred_p (target_bb)
 773       && single_pred (target_bb) == def_bb)
 774     {
 775       df_ref x;
 776
 777       /* See if USE is killed between DEF_INSN and the last insn in the
 778          basic block containing DEF_INSN.  */
 779       x = df_bb_regno_last_def_find (def_bb, regno);
 780       if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
 781         return true;
 782
 783       /* See if USE is killed between TARGET_INSN and the first insn in the
 784          basic block containing TARGET_INSN.  */
 785       x = df_bb_regno_first_def_find (target_bb, regno);
 786       if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
 787         return true;
 788
 789       return false;
 790     }
 791
 792   /* Otherwise assume the worst case.  */
 793   return true;
 794 }
 795
 796
 797 /* Check if all uses in DEF_INSN can be used in TARGET_INSN.  This
 798    would require full computation of available expressions;
 799    we check only restricted conditions, see use_killed_between.  */
 800 static bool
 801 all_uses_available_at (rtx_insn *def_insn, rtx_insn *target_insn)
 802 {
 803   df_ref use;
 804   struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
 805   rtx def_set = single_set (def_insn);
 806   rtx_insn *next;
 807
 808   gcc_assert (def_set);
 809
 810   /* If target_insn comes right after def_insn, which is very common
 811      for addresses, we can use a quicker test.  Ignore debug insns
 812      other than target insns for this.  */
 813   next = NEXT_INSN (def_insn);
 814   while (next && next != target_insn && DEBUG_INSN_P (next))
 815     next = NEXT_INSN (next);
 816   if (next == target_insn && REG_P (SET_DEST (def_set)))
 817     {
 818       rtx def_reg = SET_DEST (def_set);
 819
 820       /* If the insn uses the reg that it defines, the substitution is
 821          invalid.  */
 822       FOR_EACH_INSN_INFO_USE (use, insn_info)
 823         if (rtx_equal_p (DF_REF_REG (use), def_reg))
 824           return false;
 825       FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
 826         if (rtx_equal_p (DF_REF_REG (use), def_reg))
 827           return false;
 828     }
 829   else
 830     {
 831       rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
 832
 833       /* Look at all the uses of DEF_INSN, and see if they are not
 834          killed between DEF_INSN and TARGET_INSN.  */
 835       FOR_EACH_INSN_INFO_USE (use, insn_info)
 836         {
 837           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 838             return false;
 839           if (use_killed_between (use, def_insn, target_insn))
 840             return false;
 841         }
 842       FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
 843         {
 844           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 845             return false;
 846           if (use_killed_between (use, def_insn, target_insn))
 847             return false;
 848         }
 849     }
 850
 851   return true;
 852 }
 853
 854 \f
 855 static df_ref *active_defs;
 856 static sparseset active_defs_check;
 857
 858 /* Fill the ACTIVE_DEFS array with the use->def link for the registers
 859    mentioned in USE_REC.  Register the valid entries in ACTIVE_DEFS_CHECK
 860    too, for checking purposes.  */
 861
 862 static void
 863 register_active_defs (df_ref use)
 864 {
 865   for (; use; use = DF_REF_NEXT_LOC (use))
 866     {
 867       df_ref def = get_def_for_use (use);
 868       int regno = DF_REF_REGNO (use);
 869
 870       if (flag_checking)
 871         sparseset_set_bit (active_defs_check, regno);
 872       active_defs[regno] = def;
 873     }
 874 }
 875
 876
 877 /* Build the use->def links that we use to update the dataflow info
 878    for new uses.  Note that building the links is very cheap and if
 879    it were done earlier, they could be used to rule out invalid
 880    propagations (in addition to what is done in all_uses_available_at).
 881    I'm not doing this yet, though.  */
 882
 883 static void
 884 update_df_init (rtx_insn *def_insn, rtx_insn *insn)
 885 {
 886   if (flag_checking)
 887     sparseset_clear (active_defs_check);
 888   register_active_defs (DF_INSN_USES (def_insn));
 889   register_active_defs (DF_INSN_USES (insn));
 890   register_active_defs (DF_INSN_EQ_USES (insn));
 891 }
 892
 893
 894 /* Update the USE_DEF_REF array for the given use, using the active definitions
 895    in the ACTIVE_DEFS array to match pseudos to their def. */
 896
 897 static inline void
 898 update_uses (df_ref use)
 899 {
 900   for (; use; use = DF_REF_NEXT_LOC (use))
 901     {
 902       int regno = DF_REF_REGNO (use);
 903
 904       /* Set up the use-def chain.  */
 905       if (DF_REF_ID (use) >= (int) use_def_ref.length ())
 906         use_def_ref.safe_grow_cleared (DF_REF_ID (use) + 1);
 907
 908       if (flag_checking)
 909         gcc_assert (sparseset_bit_p (active_defs_check, regno));
 910       use_def_ref[DF_REF_ID (use)] = active_defs[regno];
 911     }
 912 }
 913
 914
 915 /* Update the USE_DEF_REF array for the uses in INSN.  Only update note
 916    uses if NOTES_ONLY is true.  */
 917
 918 static void
 919 update_df (rtx_insn *insn, rtx note)
 920 {
 921   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
 922
 923   if (note)
 924     {
 925       df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE);
 926       df_notes_rescan (insn);
 927     }
 928   else
 929     {
 930       df_uses_create (&PATTERN (insn), insn, 0);
 931       df_insn_rescan (insn);
 932       update_uses (DF_INSN_INFO_USES (insn_info));
 933     }
 934
 935   update_uses (DF_INSN_INFO_EQ_USES (insn_info));
 936 }
 937
 938
 939 /* Try substituting NEW into LOC, which originated from forward propagation
 940    of USE's value from DEF_INSN.  SET_REG_EQUAL says whether we are
 941    substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
 942    new insn is not recognized.  Return whether the substitution was
 943    performed.  */
 944
 945 static bool
 946 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn,
 947                   bool set_reg_equal)
 948 {
 949   rtx_insn *insn = DF_REF_INSN (use);
 950   rtx set = single_set (insn);
 951   rtx note = NULL_RTX;
 952   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
 953   int old_cost = 0;
 954   bool ok;
 955
 956   update_df_init (def_insn, insn);
 957
 958   /* forward_propagate_subreg may be operating on an instruction with
 959      multiple sets.  If so, assume the cost of the new instruction is
 960      not greater than the old one.  */
 961   if (set)
 962     old_cost = set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed);
 963   if (dump_file)
 964     {
 965       fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
 966       print_inline_rtx (dump_file, *loc, 2);
 967       fprintf (dump_file, "\n with ");
 968       print_inline_rtx (dump_file, new_rtx, 2);
 969       fprintf (dump_file, "\n");
 970     }
 971
 972   validate_unshare_change (insn, loc, new_rtx, true);
 973   if (!verify_changes (0))
 974     {
 975       if (dump_file)
 976         fprintf (dump_file, "Changes to insn %d not recognized\n",
 977                  INSN_UID (insn));
 978       ok = false;
 979     }
 980
 981   else if (DF_REF_TYPE (use) == DF_REF_REG_USE
 982            && set
 983            && (set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed)
 984                > old_cost))
 985     {
 986       if (dump_file)
 987         fprintf (dump_file, "Changes to insn %d not profitable\n",
 988                  INSN_UID (insn));
 989       ok = false;
 990     }
 991
 992   else
 993     {
 994       if (dump_file)
 995         fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
 996       ok = true;
 997     }
 998
 999   if (ok)
1000     {
1001       confirm_change_group ();
1002       num_changes++;
1003     }
1004   else
1005     {
1006       cancel_changes (0);
1007
1008       /* Can also record a simplified value in a REG_EQUAL note,
1009          making a new one if one does not already exist.  */
1010       if (set_reg_equal)
1011         {
1012           /* If there are any paradoxical SUBREGs, don't add REG_EQUAL note,
1013              because the bits in there can be anything and so might not
1014              match the REG_EQUAL note content.  See PR70574.  */
1015           subrtx_var_iterator::array_type array;
1016           FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
1017             {
1018               rtx x = *iter;
1019               if (SUBREG_P (x) && paradoxical_subreg_p (x))
1020                 {
1021                   set_reg_equal = false;
1022                   break;
1023                 }
1024             }
1025
1026           if (set_reg_equal)
1027             {
1028               if (dump_file)
1029                 fprintf (dump_file, " Setting REG_EQUAL note\n");
1030
1031               note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
1032             }
1033         }
1034     }
1035
1036   if ((ok || note) && !CONSTANT_P (new_rtx))
1037     update_df (insn, note);
1038
1039   return ok;
1040 }
1041
1042 /* For the given single_set INSN, containing SRC known to be a
1043    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
1044    is redundant due to the register being set by a LOAD_EXTEND_OP
1045    load from memory.  */
1046
1047 static bool
1048 free_load_extend (rtx src, rtx_insn *insn)
1049 {
1050   rtx reg;
1051   df_ref def, use;
1052
1053   reg = XEXP (src, 0);
1054   if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
1055     return false;
1056
1057   FOR_EACH_INSN_USE (use, insn)
1058     if (!DF_REF_IS_ARTIFICIAL (use)
1059         && DF_REF_TYPE (use) == DF_REF_REG_USE
1060         && DF_REF_REG (use) == reg)
1061       break;
1062   if (!use)
1063     return false;
1064
1065   def = get_def_for_use (use);
1066   if (!def)
1067     return false;
1068
1069   if (DF_REF_IS_ARTIFICIAL (def))
1070     return false;
1071
1072   if (NONJUMP_INSN_P (DF_REF_INSN (def)))
1073     {
1074       rtx patt = PATTERN (DF_REF_INSN (def));
1075
1076       if (GET_CODE (patt) == SET
1077           && GET_CODE (SET_SRC (patt)) == MEM
1078           && rtx_equal_p (SET_DEST (patt), reg))
1079         return true;
1080     }
1081   return false;
1082 }
1083
1084 /* If USE is a subreg, see if it can be replaced by a pseudo.  */
1085
1086 static bool
1087 forward_propagate_subreg (df_ref use, rtx_insn *def_insn, rtx def_set)
1088 {
1089   rtx use_reg = DF_REF_REG (use);
1090   rtx_insn *use_insn;
1091   rtx src;
1092
1093   /* Only consider subregs... */
1094   machine_mode use_mode = GET_MODE (use_reg);
1095   if (GET_CODE (use_reg) != SUBREG
1096       || !REG_P (SET_DEST (def_set)))
1097     return false;
1098
1099   /* If this is a paradoxical SUBREG...  */
1100   if (GET_MODE_SIZE (use_mode)
1101       > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
1102     {
1103       /* If this is a paradoxical SUBREG, we have no idea what value the
1104          extra bits would have.  However, if the operand is equivalent to
1105          a SUBREG whose operand is the same as our mode, and all the modes
1106          are within a word, we can just use the inner operand because
1107          these SUBREGs just say how to treat the register.  */
1108       use_insn = DF_REF_INSN (use);
1109       src = SET_SRC (def_set);
1110       if (GET_CODE (src) == SUBREG
1111           && REG_P (SUBREG_REG (src))
1112           && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
1113           && GET_MODE (SUBREG_REG (src)) == use_mode
1114           && subreg_lowpart_p (src)
1115           && all_uses_available_at (def_insn, use_insn))
1116         return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
1117                                  def_insn, false);
1118     }
1119
1120   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
1121      is the low part of the reg being extended then just use the inner
1122      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
1123      be removed due to it matching a LOAD_EXTEND_OP load from memory,
1124      or due to the operation being a no-op when applied to registers.
1125      For example, if we have:
1126
1127          A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
1128          B: (... (subreg:SI (reg:DI X)) ...)
1129
1130      and mode_rep_extended says that Y is already sign-extended,
1131      the backend will typically allow A to be combined with the
1132      definition of Y or, failing that, allow A to be deleted after
1133      reload through register tying.  Introducing more uses of Y
1134      prevents both optimisations.  */
1135   else if (subreg_lowpart_p (use_reg))
1136     {
1137       use_insn = DF_REF_INSN (use);
1138       src = SET_SRC (def_set);
1139       if ((GET_CODE (src) == ZERO_EXTEND
1140            || GET_CODE (src) == SIGN_EXTEND)
1141           && REG_P (XEXP (src, 0))
1142           && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
1143           && GET_MODE (XEXP (src, 0)) == use_mode
1144           && !free_load_extend (src, def_insn)
1145           && (targetm.mode_rep_extended (use_mode, GET_MODE (src))
1146               != (int) GET_CODE (src))
1147           && all_uses_available_at (def_insn, use_insn))
1148         return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
1149                                  def_insn, false);
1150     }
1151
1152   return false;
1153 }
1154
1155 /* Try to replace USE with SRC (defined in DEF_INSN) in __asm.  */
1156
1157 static bool
1158 forward_propagate_asm (df_ref use, rtx_insn *def_insn, rtx def_set, rtx reg)
1159 {
1160   rtx_insn *use_insn = DF_REF_INSN (use);
1161   rtx src, use_pat, asm_operands, new_rtx, *loc;
1162   int speed_p, i;
1163   df_ref uses;
1164
1165   gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
1166
1167   src = SET_SRC (def_set);
1168   use_pat = PATTERN (use_insn);
1169
1170   /* In __asm don't replace if src might need more registers than
1171      reg, as that could increase register pressure on the __asm.  */
1172   uses = DF_INSN_USES (def_insn);
1173   if (uses && DF_REF_NEXT_LOC (uses))
1174     return false;
1175
1176   update_df_init (def_insn, use_insn);
1177   speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
1178   asm_operands = NULL_RTX;
1179   switch (GET_CODE (use_pat))
1180     {
1181     case ASM_OPERANDS:
1182       asm_operands = use_pat;
1183       break;
1184     case SET:
1185       if (MEM_P (SET_DEST (use_pat)))
1186         {
1187           loc = &SET_DEST (use_pat);
1188           new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1189           if (new_rtx)
1190             validate_unshare_change (use_insn, loc, new_rtx, true);
1191         }
1192       asm_operands = SET_SRC (use_pat);
1193       break;
1194     case PARALLEL:
1195       for (i = 0; i < XVECLEN (use_pat, 0); i++)
1196         if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1197           {
1198             if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1199               {
1200                 loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1201                 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1202                                          src, speed_p);
1203                 if (new_rtx)
1204                   validate_unshare_change (use_insn, loc, new_rtx, true);
1205               }
1206             asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1207           }
1208         else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1209           asm_operands = XVECEXP (use_pat, 0, i);
1210       break;
1211     default:
1212       gcc_unreachable ();
1213     }
1214
1215   gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1216   for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1217     {
1218       loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1219       new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1220       if (new_rtx)
1221         validate_unshare_change (use_insn, loc, new_rtx, true);
1222     }
1223
1224   if (num_changes_pending () == 0 || !apply_change_group ())
1225     return false;
1226
1227   update_df (use_insn, NULL);
1228   num_changes++;
1229   return true;
1230 }
1231
1232 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1233    result.  */
1234
1235 static bool
1236 forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set)
1237 {
1238   rtx_insn *use_insn = DF_REF_INSN (use);
1239   rtx use_set = single_set (use_insn);
1240   rtx src, reg, new_rtx, *loc;
1241   bool set_reg_equal;
1242   machine_mode mode;
1243   int asm_use = -1;
1244
1245   if (INSN_CODE (use_insn) < 0)
1246     asm_use = asm_noperands (PATTERN (use_insn));
1247
1248   if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
1249     return false;
1250
1251   /* Do not propagate into PC, CC0, etc.  */
1252   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1253     return false;
1254
1255   /* If def and use are subreg, check if they match.  */
1256   reg = DF_REF_REG (use);
1257   if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG)
1258     {
1259       if (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg))
1260         return false;
1261     }
1262   /* Check if the def had a subreg, but the use has the whole reg.  */
1263   else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1264     return false;
1265   /* Check if the use has a subreg, but the def had the whole reg.  Unlike the
1266      previous case, the optimization is possible and often useful indeed.  */
1267   else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1268     reg = SUBREG_REG (reg);
1269
1270   /* Make sure that we can treat REG as having the same mode as the
1271      source of DEF_SET.  */
1272   if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))
1273     return false;
1274
1275   /* Check if the substitution is valid (last, because it's the most
1276      expensive check!).  */
1277   src = SET_SRC (def_set);
1278   if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1279     return false;
1280
1281   /* Check if the def is loading something from the constant pool; in this
1282      case we would undo optimization such as compress_float_constant.
1283      Still, we can set a REG_EQUAL note.  */
1284   if (MEM_P (src) && MEM_READONLY_P (src))
1285     {
1286       rtx x = avoid_constant_pool_reference (src);
1287       if (x != src && use_set)
1288         {
1289           rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1290           rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1291           rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1292           if (old_rtx != new_rtx)
1293             set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1294         }
1295       return false;
1296     }
1297
1298   if (asm_use >= 0)
1299     return forward_propagate_asm (use, def_insn, def_set, reg);
1300
1301   /* Else try simplifying.  */
1302
1303   if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1304     {
1305       loc = &SET_DEST (use_set);
1306       set_reg_equal = false;
1307     }
1308   else if (!use_set)
1309     {
1310       loc = &INSN_VAR_LOCATION_LOC (use_insn);
1311       set_reg_equal = false;
1312     }
1313   else
1314     {
1315       rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1316       if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1317         loc = &XEXP (note, 0);
1318       else
1319         loc = &SET_SRC (use_set);
1320
1321       /* Do not replace an existing REG_EQUAL note if the insn is not
1322          recognized.  Either we're already replacing in the note, or we'll
1323          separately try plugging the definition in the note and simplifying.
1324          And only install a REQ_EQUAL note when the destination is a REG
1325          that isn't mentioned in USE_SET, as the note would be invalid
1326          otherwise.  We also don't want to install a note if we are merely
1327          propagating a pseudo since verifying that this pseudo isn't dead
1328          is a pain; moreover such a note won't help anything.
1329          If the use is a paradoxical subreg, make sure we don't add a
1330          REG_EQUAL note for it, because it is not equivalent, it is one
1331          possible value for it, but we can't rely on it holding that value.
1332          See PR70574.  */
1333       set_reg_equal = (note == NULL_RTX
1334                        && REG_P (SET_DEST (use_set))
1335                        && !REG_P (src)
1336                        && !(GET_CODE (src) == SUBREG
1337                             && REG_P (SUBREG_REG (src)))
1338                        && !reg_mentioned_p (SET_DEST (use_set),
1339                                             SET_SRC (use_set))
1340                        && !paradoxical_subreg_p (DF_REF_REG (use)));
1341     }
1342
1343   if (GET_MODE (*loc) == VOIDmode)
1344     mode = GET_MODE (SET_DEST (use_set));
1345   else
1346     mode = GET_MODE (*loc);
1347
1348   new_rtx = propagate_rtx (*loc, mode, reg, src,
1349                            optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1350
1351   if (!new_rtx)
1352     return false;
1353
1354   return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1355 }
1356
1357
1358 /* Given a use USE of an insn, if it has a single reaching
1359    definition, try to forward propagate it into that insn.
1360    Return true if cfg cleanup will be needed.  */
1361
1362 static bool
1363 forward_propagate_into (df_ref use)
1364 {
1365   df_ref def;
1366   rtx_insn *def_insn, *use_insn;
1367   rtx def_set;
1368   rtx parent;
1369
1370   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1371     return false;
1372   if (DF_REF_IS_ARTIFICIAL (use))
1373     return false;
1374
1375   /* Only consider uses that have a single definition.  */
1376   def = get_def_for_use (use);
1377   if (!def)
1378     return false;
1379   if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1380     return false;
1381   if (DF_REF_IS_ARTIFICIAL (def))
1382     return false;
1383
1384   /* Do not propagate loop invariant definitions inside the loop.  */
1385   if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1386     return false;
1387
1388   /* Check if the use is still present in the insn!  */
1389   use_insn = DF_REF_INSN (use);
1390   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1391     parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1392   else
1393     parent = PATTERN (use_insn);
1394
1395   if (!reg_mentioned_p (DF_REF_REG (use), parent))
1396     return false;
1397
1398   def_insn = DF_REF_INSN (def);
1399   if (multiple_sets (def_insn))
1400     return false;
1401   def_set = single_set (def_insn);
1402   if (!def_set)
1403     return false;
1404
1405   /* Only try one kind of propagation.  If two are possible, we'll
1406      do it on the following iterations.  */
1407   if (forward_propagate_and_simplify (use, def_insn, def_set)
1408       || forward_propagate_subreg (use, def_insn, def_set))
1409     {
1410       if (cfun->can_throw_non_call_exceptions
1411           && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX)
1412           && purge_dead_edges (DF_REF_BB (use)))
1413         return true;
1414     }
1415   return false;
1416 }
1417
1418 \f
1419 static void
1420 fwprop_init (void)
1421 {
1422   num_changes = 0;
1423   calculate_dominance_info (CDI_DOMINATORS);
1424
1425   /* We do not always want to propagate into loops, so we have to find
1426      loops and be careful about them.  Avoid CFG modifications so that
1427      we don't have to update dominance information afterwards for
1428      build_single_def_use_links.  */
1429   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
1430
1431   build_single_def_use_links ();
1432   df_set_flags (DF_DEFER_INSN_RESCAN);
1433
1434   active_defs = XNEWVEC (df_ref, max_reg_num ());
1435   if (flag_checking)
1436     active_defs_check = sparseset_alloc (max_reg_num ());
1437 }
1438
1439 static void
1440 fwprop_done (void)
1441 {
1442   loop_optimizer_finalize ();
1443
1444   use_def_ref.release ();
1445   free (active_defs);
1446   if (flag_checking)
1447     sparseset_free (active_defs_check);
1448
1449   free_dominance_info (CDI_DOMINATORS);
1450   cleanup_cfg (0);
1451   delete_trivially_dead_insns (get_insns (), max_reg_num ());
1452
1453   if (dump_file)
1454     fprintf (dump_file,
1455              "\nNumber of successful forward propagations: %d\n\n",
1456              num_changes);
1457 }
1458
1459
1460 /* Main entry point.  */
1461
1462 static bool
1463 gate_fwprop (void)
1464 {
1465   return optimize > 0 && flag_forward_propagate;
1466 }
1467
1468 static unsigned int
1469 fwprop (void)
1470 {
1471   unsigned i;
1472
1473   fwprop_init ();
1474
1475   /* Go through all the uses.  df_uses_create will create new ones at the
1476      end, and we'll go through them as well.
1477
1478      Do not forward propagate addresses into loops until after unrolling.
1479      CSE did so because it was able to fix its own mess, but we are not.  */
1480
1481   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1482     {
1483       df_ref use = DF_USES_GET (i);
1484       if (use)
1485         if (DF_REF_TYPE (use) == DF_REF_REG_USE
1486             || DF_REF_BB (use)->loop_father == NULL
1487             /* The outer most loop is not really a loop.  */
1488             || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1489           forward_propagate_into (use);
1490     }
1491
1492   fwprop_done ();
1493   return 0;
1494 }
1495
1496 namespace {
1497
1498 const pass_data pass_data_rtl_fwprop =
1499 {
1500   RTL_PASS, /* type */
1501   "fwprop1", /* name */
1502   OPTGROUP_NONE, /* optinfo_flags */
1503   TV_FWPROP, /* tv_id */
1504   0, /* properties_required */
1505   0, /* properties_provided */
1506   0, /* properties_destroyed */
1507   0, /* todo_flags_start */
1508   TODO_df_finish, /* todo_flags_finish */
1509 };
1510
1511 class pass_rtl_fwprop : public rtl_opt_pass
1512 {
1513 public:
1514   pass_rtl_fwprop (gcc::context *ctxt)
1515     : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1516   {}
1517
1518   /* opt_pass methods: */
1519   virtual bool gate (function *) { return gate_fwprop (); }
1520   virtual unsigned int execute (function *) { return fwprop (); }
1521
1522 }; // class pass_rtl_fwprop
1523
1524 } // anon namespace
1525
1526 rtl_opt_pass *
1527 make_pass_rtl_fwprop (gcc::context *ctxt)
1528 {
1529   return new pass_rtl_fwprop (ctxt);
1530 }
1531
1532 static unsigned int
1533 fwprop_addr (void)
1534 {
1535   unsigned i;
1536
1537   fwprop_init ();
1538
1539   /* Go through all the uses.  df_uses_create will create new ones at the
1540      end, and we'll go through them as well.  */
1541   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1542     {
1543       df_ref use = DF_USES_GET (i);
1544       if (use)
1545         if (DF_REF_TYPE (use) != DF_REF_REG_USE
1546             && DF_REF_BB (use)->loop_father != NULL
1547             /* The outer most loop is not really a loop.  */
1548             && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1549           forward_propagate_into (use);
1550     }
1551
1552   fwprop_done ();
1553   return 0;
1554 }
1555
1556 namespace {
1557
1558 const pass_data pass_data_rtl_fwprop_addr =
1559 {
1560   RTL_PASS, /* type */
1561   "fwprop2", /* name */
1562   OPTGROUP_NONE, /* optinfo_flags */
1563   TV_FWPROP, /* tv_id */
1564   0, /* properties_required */
1565   0, /* properties_provided */
1566   0, /* properties_destroyed */
1567   0, /* todo_flags_start */
1568   TODO_df_finish, /* todo_flags_finish */
1569 };
1570
1571 class pass_rtl_fwprop_addr : public rtl_opt_pass
1572 {
1573 public:
1574   pass_rtl_fwprop_addr (gcc::context *ctxt)
1575     : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1576   {}
1577
1578   /* opt_pass methods: */
1579   virtual bool gate (function *) { return gate_fwprop (); }
1580   virtual unsigned int execute (function *) { return fwprop_addr (); }
1581
1582 }; // class pass_rtl_fwprop_addr
1583
1584 } // anon namespace
1585
1586 rtl_opt_pass *
1587 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1588 {
1589   return new pass_rtl_fwprop_addr (ctxt);
1590 }