gcc/fwprop.c

   1 /* RTL-based forward propagation pass for GNU compiler.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Paolo Bonzini and Steven Bosscher.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "diagnostic-core.h"
  26
  27 #include "sparseset.h"
  28 #include "rtl.h"
  29 #include "tm_p.h"
  30 #include "insn-config.h"
  31 #include "recog.h"
  32 #include "flags.h"
  33 #include "obstack.h"
  34 #include "basic-block.h"
  35 #include "df.h"
  36 #include "target.h"
  37 #include "cfgloop.h"
  38 #include "tree-pass.h"
  39 #include "domwalk.h"
  40 #include "emit-rtl.h"
  41
  42
  43 /* This pass does simple forward propagation and simplification when an
  44    operand of an insn can only come from a single def.  This pass uses
  45    df.c, so it is global.  However, we only do limited analysis of
  46    available expressions.
  47
  48    1) The pass tries to propagate the source of the def into the use,
  49    and checks if the result is independent of the substituted value.
  50    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
  51    zero, independent of the source register.
  52
  53    In particular, we propagate constants into the use site.  Sometimes
  54    RTL expansion did not put the constant in the same insn on purpose,
  55    to satisfy a predicate, and the result will fail to be recognized;
  56    but this happens rarely and in this case we can still create a
  57    REG_EQUAL note.  For multi-word operations, this
  58
  59       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
  60       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
  61       (set (subreg:SI (reg:DI 122) 0)
  62          (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
  63       (set (subreg:SI (reg:DI 122) 4)
  64          (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
  65
  66    can be simplified to the much simpler
  67
  68       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
  69       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
  70
  71    This particular propagation is also effective at putting together
  72    complex addressing modes.  We are more aggressive inside MEMs, in
  73    that all definitions are propagated if the use is in a MEM; if the
  74    result is a valid memory address we check address_cost to decide
  75    whether the substitution is worthwhile.
  76
  77    2) The pass propagates register copies.  This is not as effective as
  78    the copy propagation done by CSE's canon_reg, which works by walking
  79    the instruction chain, it can help the other transformations.
  80
  81    We should consider removing this optimization, and instead reorder the
  82    RTL passes, because GCSE does this transformation too.  With some luck,
  83    the CSE pass at the end of rest_of_handle_gcse could also go away.
  84
  85    3) The pass looks for paradoxical subregs that are actually unnecessary.
  86    Things like this:
  87
  88      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
  89      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
  90      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
  91                                 (subreg:SI (reg:QI 121) 0)))
  92
  93    are very common on machines that can only do word-sized operations.
  94    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
  95    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
  96    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
  97    above will simplify this to
  98
  99      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
 100      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
 101      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
 102
 103    where the first two insns are now dead.
 104
 105    We used to use reaching definitions to find which uses have a
 106    single reaching definition (sounds obvious...), but this is too
 107    complex a problem in nasty testcases like PR33928.  Now we use the
 108    multiple definitions problem in df-problems.c.  The similarity
 109    between that problem and SSA form creation is taken further, in
 110    that fwprop does a dominator walk to create its chains; however,
 111    instead of creating a PHI function where multiple definitions meet
 112    I just punt and record only singleton use-def chains, which is
 113    all that is needed by fwprop.  */
 114
 115
 116 static int num_changes;
 117
 118 static vec<df_ref> use_def_ref;
 119 static vec<df_ref> reg_defs;
 120 static vec<df_ref> reg_defs_stack;
 121
 122 /* The MD bitmaps are trimmed to include only live registers to cut
 123    memory usage on testcases like insn-recog.c.  Track live registers
 124    in the basic block and do not perform forward propagation if the
 125    destination is a dead pseudo occurring in a note.  */
 126 static bitmap local_md;
 127 static bitmap local_lr;
 128
 129 /* Return the only def in USE's use-def chain, or NULL if there is
 130    more than one def in the chain.  */
 131
 132 static inline df_ref
 133 get_def_for_use (df_ref use)
 134 {
 135   return use_def_ref[DF_REF_ID (use)];
 136 }
 137
 138
 139 /* Update the reg_defs vector with non-partial definitions in DEF_REC.
 140    TOP_FLAG says which artificials uses should be used, when DEF_REC
 141    is an artificial def vector.  LOCAL_MD is modified as after a
 142    df_md_simulate_* function; we do more or less the same processing
 143    done there, so we do not use those functions.  */
 144
 145 #define DF_MD_GEN_FLAGS \
 146         (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
 147
 148 static void
 149 process_defs (df_ref *def_rec, int top_flag)
 150 {
 151   df_ref def;
 152   while ((def = *def_rec++) != NULL)
 153     {
 154       df_ref curr_def = reg_defs[DF_REF_REGNO (def)];
 155       unsigned int dregno;
 156
 157       if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
 158         continue;
 159
 160       dregno = DF_REF_REGNO (def);
 161       if (curr_def)
 162         reg_defs_stack.safe_push (curr_def);
 163       else
 164         {
 165           /* Do not store anything if "transitioning" from NULL to NULL.  But
 166              otherwise, push a special entry on the stack to tell the
 167              leave_block callback that the entry in reg_defs was NULL.  */
 168           if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 169             ;
 170           else
 171             reg_defs_stack.safe_push (def);
 172         }
 173
 174       if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 175         {
 176           bitmap_set_bit (local_md, dregno);
 177           reg_defs[dregno] = NULL;
 178         }
 179       else
 180         {
 181           bitmap_clear_bit (local_md, dregno);
 182           reg_defs[dregno] = def;
 183         }
 184     }
 185 }
 186
 187
 188 /* Fill the use_def_ref vector with values for the uses in USE_REC,
 189    taking reaching definitions info from LOCAL_MD and REG_DEFS.
 190    TOP_FLAG says which artificials uses should be used, when USE_REC
 191    is an artificial use vector.  */
 192
 193 static void
 194 process_uses (df_ref *use_rec, int top_flag)
 195 {
 196   df_ref use;
 197   while ((use = *use_rec++) != NULL)
 198     if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
 199       {
 200         unsigned int uregno = DF_REF_REGNO (use);
 201         if (reg_defs[uregno]
 202             && !bitmap_bit_p (local_md, uregno)
 203             && bitmap_bit_p (local_lr, uregno))
 204           use_def_ref[DF_REF_ID (use)] = reg_defs[uregno];
 205       }
 206 }
 207
 208 class single_def_use_dom_walker : public dom_walker
 209 {
 210 public:
 211   single_def_use_dom_walker (cdi_direction direction)
 212     : dom_walker (direction) {}
 213   virtual void before_dom_children (basic_block);
 214   virtual void after_dom_children (basic_block);
 215 };
 216
 217 void
 218 single_def_use_dom_walker::before_dom_children (basic_block bb)
 219 {
 220   int bb_index = bb->index;
 221   struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
 222   struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
 223   rtx insn;
 224
 225   bitmap_copy (local_md, &md_bb_info->in);
 226   bitmap_copy (local_lr, &lr_bb_info->in);
 227
 228   /* Push a marker for the leave_block callback.  */
 229   reg_defs_stack.safe_push (NULL);
 230
 231   process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
 232   process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
 233
 234   /* We don't call df_simulate_initialize_forwards, as it may overestimate
 235      the live registers if there are unused artificial defs.  We prefer
 236      liveness to be underestimated.  */
 237
 238   FOR_BB_INSNS (bb, insn)
 239     if (INSN_P (insn))
 240       {
 241         unsigned int uid = INSN_UID (insn);
 242         process_uses (DF_INSN_UID_USES (uid), 0);
 243         process_uses (DF_INSN_UID_EQ_USES (uid), 0);
 244         process_defs (DF_INSN_UID_DEFS (uid), 0);
 245         df_simulate_one_insn_forwards (bb, insn, local_lr);
 246       }
 247
 248   process_uses (df_get_artificial_uses (bb_index), 0);
 249   process_defs (df_get_artificial_defs (bb_index), 0);
 250 }
 251
 252 /* Pop the definitions created in this basic block when leaving its
 253    dominated parts.  */
 254
 255 void
 256 single_def_use_dom_walker::after_dom_children (basic_block bb ATTRIBUTE_UNUSED)
 257 {
 258   df_ref saved_def;
 259   while ((saved_def = reg_defs_stack.pop ()) != NULL)
 260     {
 261       unsigned int dregno = DF_REF_REGNO (saved_def);
 262
 263       /* See also process_defs.  */
 264       if (saved_def == reg_defs[dregno])
 265         reg_defs[dregno] = NULL;
 266       else
 267         reg_defs[dregno] = saved_def;
 268     }
 269 }
 270
 271
 272 /* Build a vector holding the reaching definitions of uses reached by a
 273    single dominating definition.  */
 274
 275 static void
 276 build_single_def_use_links (void)
 277 {
 278   /* We use the multiple definitions problem to compute our restricted
 279      use-def chains.  */
 280   df_set_flags (DF_EQ_NOTES);
 281   df_md_add_problem ();
 282   df_note_add_problem ();
 283   df_analyze ();
 284   df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
 285
 286   use_def_ref.create (DF_USES_TABLE_SIZE ());
 287   use_def_ref.safe_grow_cleared (DF_USES_TABLE_SIZE ());
 288
 289   reg_defs.create (max_reg_num ());
 290   reg_defs.safe_grow_cleared (max_reg_num ());
 291
 292   reg_defs_stack.create (n_basic_blocks * 10);
 293   local_md = BITMAP_ALLOC (NULL);
 294   local_lr = BITMAP_ALLOC (NULL);
 295
 296   /* Walk the dominator tree looking for single reaching definitions
 297      dominating the uses.  This is similar to how SSA form is built.  */
 298   single_def_use_dom_walker (CDI_DOMINATORS)
 299     .walk (cfun->cfg->x_entry_block_ptr);
 300
 301   BITMAP_FREE (local_lr);
 302   BITMAP_FREE (local_md);
 303   reg_defs.release ();
 304   reg_defs_stack.release ();
 305 }
 306
 307 \f
 308 /* Do not try to replace constant addresses or addresses of local and
 309    argument slots.  These MEM expressions are made only once and inserted
 310    in many instructions, as well as being used to control symbol table
 311    output.  It is not safe to clobber them.
 312
 313    There are some uncommon cases where the address is already in a register
 314    for some reason, but we cannot take advantage of that because we have
 315    no easy way to unshare the MEM.  In addition, looking up all stack
 316    addresses is costly.  */
 317
 318 static bool
 319 can_simplify_addr (rtx addr)
 320 {
 321   rtx reg;
 322
 323   if (CONSTANT_ADDRESS_P (addr))
 324     return false;
 325
 326   if (GET_CODE (addr) == PLUS)
 327     reg = XEXP (addr, 0);
 328   else
 329     reg = addr;
 330
 331   return (!REG_P (reg)
 332           || (REGNO (reg) != FRAME_POINTER_REGNUM
 333               && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
 334               && REGNO (reg) != ARG_POINTER_REGNUM));
 335 }
 336
 337 /* Returns a canonical version of X for the address, from the point of view,
 338    that all multiplications are represented as MULT instead of the multiply
 339    by a power of 2 being represented as ASHIFT.
 340
 341    Every ASHIFT we find has been made by simplify_gen_binary and was not
 342    there before, so it is not shared.  So we can do this in place.  */
 343
 344 static void
 345 canonicalize_address (rtx x)
 346 {
 347   for (;;)
 348     switch (GET_CODE (x))
 349       {
 350       case ASHIFT:
 351         if (CONST_INT_P (XEXP (x, 1))
 352             && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
 353             && INTVAL (XEXP (x, 1)) >= 0)
 354           {
 355             HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
 356             PUT_CODE (x, MULT);
 357             XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
 358                                         GET_MODE (x));
 359           }
 360
 361         x = XEXP (x, 0);
 362         break;
 363
 364       case PLUS:
 365         if (GET_CODE (XEXP (x, 0)) == PLUS
 366             || GET_CODE (XEXP (x, 0)) == ASHIFT
 367             || GET_CODE (XEXP (x, 0)) == CONST)
 368           canonicalize_address (XEXP (x, 0));
 369
 370         x = XEXP (x, 1);
 371         break;
 372
 373       case CONST:
 374         x = XEXP (x, 0);
 375         break;
 376
 377       default:
 378         return;
 379       }
 380 }
 381
 382 /* OLD is a memory address.  Return whether it is good to use NEW instead,
 383    for a memory access in the given MODE.  */
 384
 385 static bool
 386 should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode,
 387                         addr_space_t as, bool speed)
 388 {
 389   int gain;
 390
 391   if (rtx_equal_p (old_rtx, new_rtx)
 392       || !memory_address_addr_space_p (mode, new_rtx, as))
 393     return false;
 394
 395   /* Copy propagation is always ok.  */
 396   if (REG_P (old_rtx) && REG_P (new_rtx))
 397     return true;
 398
 399   /* Prefer the new address if it is less expensive.  */
 400   gain = (address_cost (old_rtx, mode, as, speed)
 401           - address_cost (new_rtx, mode, as, speed));
 402
 403   /* If the addresses have equivalent cost, prefer the new address
 404      if it has the highest `set_src_cost'.  That has the potential of
 405      eliminating the most insns without additional costs, and it
 406      is the same that cse.c used to do.  */
 407   if (gain == 0)
 408     gain = set_src_cost (new_rtx, speed) - set_src_cost (old_rtx, speed);
 409
 410   return (gain > 0);
 411 }
 412
 413
 414 /* Flags for the last parameter of propagate_rtx_1.  */
 415
 416 enum {
 417   /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
 418      if it is false, propagate_rtx_1 returns false if, for at least
 419      one occurrence OLD, it failed to collapse the result to a constant.
 420      For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
 421      collapse to zero if replacing (reg:M B) with (reg:M A).
 422
 423      PR_CAN_APPEAR is disregarded inside MEMs: in that case,
 424      propagate_rtx_1 just tries to make cheaper and valid memory
 425      addresses.  */
 426   PR_CAN_APPEAR = 1,
 427
 428   /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
 429      outside memory addresses.  This is needed because propagate_rtx_1 does
 430      not do any analysis on memory; thus it is very conservative and in general
 431      it will fail if non-read-only MEMs are found in the source expression.
 432
 433      PR_HANDLE_MEM is set when the source of the propagation was not
 434      another MEM.  Then, it is safe not to treat non-read-only MEMs as
 435      ``opaque'' objects.  */
 436   PR_HANDLE_MEM = 2,
 437
 438   /* Set when costs should be optimized for speed.  */
 439   PR_OPTIMIZE_FOR_SPEED = 4
 440 };
 441
 442
 443 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
 444    resulting expression.  Replace *PX with a new RTL expression if an
 445    occurrence of OLD was found.
 446
 447    This is only a wrapper around simplify-rtx.c: do not add any pattern
 448    matching code here.  (The sole exception is the handling of LO_SUM, but
 449    that is because there is no simplify_gen_* function for LO_SUM).  */
 450
 451 static bool
 452 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 453 {
 454   rtx x = *px, tem = NULL_RTX, op0, op1, op2;
 455   enum rtx_code code = GET_CODE (x);
 456   enum machine_mode mode = GET_MODE (x);
 457   enum machine_mode op_mode;
 458   bool can_appear = (flags & PR_CAN_APPEAR) != 0;
 459   bool valid_ops = true;
 460
 461   if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
 462     {
 463       /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
 464          they have side effects or not).  */
 465       *px = (side_effects_p (x)
 466              ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
 467              : gen_rtx_SCRATCH (GET_MODE (x)));
 468       return false;
 469     }
 470
 471   /* If X is OLD_RTX, return NEW_RTX.  But not if replacing only within an
 472      address, and we are *not* inside one.  */
 473   if (x == old_rtx)
 474     {
 475       *px = new_rtx;
 476       return can_appear;
 477     }
 478
 479   /* If this is an expression, try recursive substitution.  */
 480   switch (GET_RTX_CLASS (code))
 481     {
 482     case RTX_UNARY:
 483       op0 = XEXP (x, 0);
 484       op_mode = GET_MODE (op0);
 485       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 486       if (op0 == XEXP (x, 0))
 487         return true;
 488       tem = simplify_gen_unary (code, mode, op0, op_mode);
 489       break;
 490
 491     case RTX_BIN_ARITH:
 492     case RTX_COMM_ARITH:
 493       op0 = XEXP (x, 0);
 494       op1 = XEXP (x, 1);
 495       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 496       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 497       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 498         return true;
 499       tem = simplify_gen_binary (code, mode, op0, op1);
 500       break;
 501
 502     case RTX_COMPARE:
 503     case RTX_COMM_COMPARE:
 504       op0 = XEXP (x, 0);
 505       op1 = XEXP (x, 1);
 506       op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
 507       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 508       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 509       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 510         return true;
 511       tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
 512       break;
 513
 514     case RTX_TERNARY:
 515     case RTX_BITFIELD_OPS:
 516       op0 = XEXP (x, 0);
 517       op1 = XEXP (x, 1);
 518       op2 = XEXP (x, 2);
 519       op_mode = GET_MODE (op0);
 520       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 521       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 522       valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
 523       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
 524         return true;
 525       if (op_mode == VOIDmode)
 526         op_mode = GET_MODE (op0);
 527       tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
 528       break;
 529
 530     case RTX_EXTRA:
 531       /* The only case we try to handle is a SUBREG.  */
 532       if (code == SUBREG)
 533         {
 534           op0 = XEXP (x, 0);
 535           valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 536           if (op0 == XEXP (x, 0))
 537             return true;
 538           tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
 539                                      SUBREG_BYTE (x));
 540         }
 541       break;
 542
 543     case RTX_OBJ:
 544       if (code == MEM && x != new_rtx)
 545         {
 546           rtx new_op0;
 547           op0 = XEXP (x, 0);
 548
 549           /* There are some addresses that we cannot work on.  */
 550           if (!can_simplify_addr (op0))
 551             return true;
 552
 553           op0 = new_op0 = targetm.delegitimize_address (op0);
 554           valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
 555                                         flags | PR_CAN_APPEAR);
 556
 557           /* Dismiss transformation that we do not want to carry on.  */
 558           if (!valid_ops
 559               || new_op0 == op0
 560               || !(GET_MODE (new_op0) == GET_MODE (op0)
 561                    || GET_MODE (new_op0) == VOIDmode))
 562             return true;
 563
 564           canonicalize_address (new_op0);
 565
 566           /* Copy propagations are always ok.  Otherwise check the costs.  */
 567           if (!(REG_P (old_rtx) && REG_P (new_rtx))
 568               && !should_replace_address (op0, new_op0, GET_MODE (x),
 569                                           MEM_ADDR_SPACE (x),
 570                                           flags & PR_OPTIMIZE_FOR_SPEED))
 571             return true;
 572
 573           tem = replace_equiv_address_nv (x, new_op0);
 574         }
 575
 576       else if (code == LO_SUM)
 577         {
 578           op0 = XEXP (x, 0);
 579           op1 = XEXP (x, 1);
 580
 581           /* The only simplification we do attempts to remove references to op0
 582              or make it constant -- in both cases, op0's invalidity will not
 583              make the result invalid.  */
 584           propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
 585           valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 586           if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 587             return true;
 588
 589           /* (lo_sum (high x) x) -> x  */
 590           if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
 591             tem = op1;
 592           else
 593             tem = gen_rtx_LO_SUM (mode, op0, op1);
 594
 595           /* OP1 is likely not a legitimate address, otherwise there would have
 596              been no LO_SUM.  We want it to disappear if it is invalid, return
 597              false in that case.  */
 598           return memory_address_p (mode, tem);
 599         }
 600
 601       else if (code == REG)
 602         {
 603           if (rtx_equal_p (x, old_rtx))
 604             {
 605               *px = new_rtx;
 606               return can_appear;
 607             }
 608         }
 609       break;
 610
 611     default:
 612       break;
 613     }
 614
 615   /* No change, no trouble.  */
 616   if (tem == NULL_RTX)
 617     return true;
 618
 619   *px = tem;
 620
 621   /* The replacement we made so far is valid, if all of the recursive
 622      replacements were valid, or we could simplify everything to
 623      a constant.  */
 624   return valid_ops || can_appear || CONSTANT_P (tem);
 625 }
 626
 627
 628 /* for_each_rtx traversal function that returns 1 if BODY points to
 629    a non-constant mem.  */
 630
 631 static int
 632 varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED)
 633 {
 634   rtx x = *body;
 635   return MEM_P (x) && !MEM_READONLY_P (x);
 636 }
 637
 638
 639 /* Replace all occurrences of OLD in X with NEW and try to simplify the
 640    resulting expression (in mode MODE).  Return a new expression if it is
 641    a constant, otherwise X.
 642
 643    Simplifications where occurrences of NEW collapse to a constant are always
 644    accepted.  All simplifications are accepted if NEW is a pseudo too.
 645    Otherwise, we accept simplifications that have a lower or equal cost.  */
 646
 647 static rtx
 648 propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx,
 649                bool speed)
 650 {
 651   rtx tem;
 652   bool collapsed;
 653   int flags;
 654
 655   if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
 656     return NULL_RTX;
 657
 658   flags = 0;
 659   if (REG_P (new_rtx)
 660       || CONSTANT_P (new_rtx)
 661       || (GET_CODE (new_rtx) == SUBREG
 662           && REG_P (SUBREG_REG (new_rtx))
 663           && (GET_MODE_SIZE (mode)
 664               <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (new_rtx))))))
 665     flags |= PR_CAN_APPEAR;
 666   if (!for_each_rtx (&new_rtx, varying_mem_p, NULL))
 667     flags |= PR_HANDLE_MEM;
 668
 669   if (speed)
 670     flags |= PR_OPTIMIZE_FOR_SPEED;
 671
 672   tem = x;
 673   collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
 674   if (tem == x || !collapsed)
 675     return NULL_RTX;
 676
 677   /* gen_lowpart_common will not be able to process VOIDmode entities other
 678      than CONST_INTs.  */
 679   if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
 680     return NULL_RTX;
 681
 682   if (GET_MODE (tem) == VOIDmode)
 683     tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
 684   else
 685     gcc_assert (GET_MODE (tem) == mode);
 686
 687   return tem;
 688 }
 689
 690
 691 \f
 692
 693 /* Return true if the register from reference REF is killed
 694    between FROM to (but not including) TO.  */
 695
 696 static bool
 697 local_ref_killed_between_p (df_ref ref, rtx from, rtx to)
 698 {
 699   rtx insn;
 700
 701   for (insn = from; insn != to; insn = NEXT_INSN (insn))
 702     {
 703       df_ref *def_rec;
 704       if (!INSN_P (insn))
 705         continue;
 706
 707       for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
 708         {
 709           df_ref def = *def_rec;
 710           if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
 711             return true;
 712         }
 713     }
 714   return false;
 715 }
 716
 717
 718 /* Check if the given DEF is available in INSN.  This would require full
 719    computation of available expressions; we check only restricted conditions:
 720    - if DEF is the sole definition of its register, go ahead;
 721    - in the same basic block, we check for no definitions killing the
 722      definition of DEF_INSN;
 723    - if USE's basic block has DEF's basic block as the sole predecessor,
 724      we check if the definition is killed after DEF_INSN or before
 725      TARGET_INSN insn, in their respective basic blocks.  */
 726 static bool
 727 use_killed_between (df_ref use, rtx def_insn, rtx target_insn)
 728 {
 729   basic_block def_bb = BLOCK_FOR_INSN (def_insn);
 730   basic_block target_bb = BLOCK_FOR_INSN (target_insn);
 731   int regno;
 732   df_ref def;
 733
 734   /* We used to have a def reaching a use that is _before_ the def,
 735      with the def not dominating the use even though the use and def
 736      are in the same basic block, when a register may be used
 737      uninitialized in a loop.  This should not happen anymore since
 738      we do not use reaching definitions, but still we test for such
 739      cases and assume that DEF is not available.  */
 740   if (def_bb == target_bb
 741       ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
 742       : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
 743     return true;
 744
 745   /* Check if the reg in USE has only one definition.  We already
 746      know that this definition reaches use, or we wouldn't be here.
 747      However, this is invalid for hard registers because if they are
 748      live at the beginning of the function it does not mean that we
 749      have an uninitialized access.  */
 750   regno = DF_REF_REGNO (use);
 751   def = DF_REG_DEF_CHAIN (regno);
 752   if (def
 753       && DF_REF_NEXT_REG (def) == NULL
 754       && regno >= FIRST_PSEUDO_REGISTER)
 755     return false;
 756
 757   /* Check locally if we are in the same basic block.  */
 758   if (def_bb == target_bb)
 759     return local_ref_killed_between_p (use, def_insn, target_insn);
 760
 761   /* Finally, if DEF_BB is the sole predecessor of TARGET_BB.  */
 762   if (single_pred_p (target_bb)
 763       && single_pred (target_bb) == def_bb)
 764     {
 765       df_ref x;
 766
 767       /* See if USE is killed between DEF_INSN and the last insn in the
 768          basic block containing DEF_INSN.  */
 769       x = df_bb_regno_last_def_find (def_bb, regno);
 770       if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
 771         return true;
 772
 773       /* See if USE is killed between TARGET_INSN and the first insn in the
 774          basic block containing TARGET_INSN.  */
 775       x = df_bb_regno_first_def_find (target_bb, regno);
 776       if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
 777         return true;
 778
 779       return false;
 780     }
 781
 782   /* Otherwise assume the worst case.  */
 783   return true;
 784 }
 785
 786
 787 /* Check if all uses in DEF_INSN can be used in TARGET_INSN.  This
 788    would require full computation of available expressions;
 789    we check only restricted conditions, see use_killed_between.  */
 790 static bool
 791 all_uses_available_at (rtx def_insn, rtx target_insn)
 792 {
 793   df_ref *use_rec;
 794   struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
 795   rtx def_set = single_set (def_insn);
 796   rtx next;
 797
 798   gcc_assert (def_set);
 799
 800   /* If target_insn comes right after def_insn, which is very common
 801      for addresses, we can use a quicker test.  Ignore debug insns
 802      other than target insns for this.  */
 803   next = NEXT_INSN (def_insn);
 804   while (next && next != target_insn && DEBUG_INSN_P (next))
 805     next = NEXT_INSN (next);
 806   if (next == target_insn && REG_P (SET_DEST (def_set)))
 807     {
 808       rtx def_reg = SET_DEST (def_set);
 809
 810       /* If the insn uses the reg that it defines, the substitution is
 811          invalid.  */
 812       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 813         {
 814           df_ref use = *use_rec;
 815           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 816             return false;
 817         }
 818       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 819         {
 820           df_ref use = *use_rec;
 821           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 822             return false;
 823         }
 824     }
 825   else
 826     {
 827       rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
 828
 829       /* Look at all the uses of DEF_INSN, and see if they are not
 830          killed between DEF_INSN and TARGET_INSN.  */
 831       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 832         {
 833           df_ref use = *use_rec;
 834           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 835             return false;
 836           if (use_killed_between (use, def_insn, target_insn))
 837             return false;
 838         }
 839       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 840         {
 841           df_ref use = *use_rec;
 842           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 843             return false;
 844           if (use_killed_between (use, def_insn, target_insn))
 845             return false;
 846         }
 847     }
 848
 849   return true;
 850 }
 851
 852 \f
 853 static df_ref *active_defs;
 854 #ifdef ENABLE_CHECKING
 855 static sparseset active_defs_check;
 856 #endif
 857
 858 /* Fill the ACTIVE_DEFS array with the use->def link for the registers
 859    mentioned in USE_REC.  Register the valid entries in ACTIVE_DEFS_CHECK
 860    too, for checking purposes.  */
 861
 862 static void
 863 register_active_defs (df_ref *use_rec)
 864 {
 865   while (*use_rec)
 866     {
 867       df_ref use = *use_rec++;
 868       df_ref def = get_def_for_use (use);
 869       int regno = DF_REF_REGNO (use);
 870
 871 #ifdef ENABLE_CHECKING
 872       sparseset_set_bit (active_defs_check, regno);
 873 #endif
 874       active_defs[regno] = def;
 875     }
 876 }
 877
 878
 879 /* Build the use->def links that we use to update the dataflow info
 880    for new uses.  Note that building the links is very cheap and if
 881    it were done earlier, they could be used to rule out invalid
 882    propagations (in addition to what is done in all_uses_available_at).
 883    I'm not doing this yet, though.  */
 884
 885 static void
 886 update_df_init (rtx def_insn, rtx insn)
 887 {
 888 #ifdef ENABLE_CHECKING
 889   sparseset_clear (active_defs_check);
 890 #endif
 891   register_active_defs (DF_INSN_USES (def_insn));
 892   register_active_defs (DF_INSN_USES (insn));
 893   register_active_defs (DF_INSN_EQ_USES (insn));
 894 }
 895
 896
 897 /* Update the USE_DEF_REF array for the given use, using the active definitions
 898    in the ACTIVE_DEFS array to match pseudos to their def. */
 899
 900 static inline void
 901 update_uses (df_ref *use_rec)
 902 {
 903   while (*use_rec)
 904     {
 905       df_ref use = *use_rec++;
 906       int regno = DF_REF_REGNO (use);
 907
 908       /* Set up the use-def chain.  */
 909       if (DF_REF_ID (use) >= (int) use_def_ref.length ())
 910         use_def_ref.safe_grow_cleared (DF_REF_ID (use) + 1);
 911
 912 #ifdef ENABLE_CHECKING
 913       gcc_assert (sparseset_bit_p (active_defs_check, regno));
 914 #endif
 915       use_def_ref[DF_REF_ID (use)] = active_defs[regno];
 916     }
 917 }
 918
 919
 920 /* Update the USE_DEF_REF array for the uses in INSN.  Only update note
 921    uses if NOTES_ONLY is true.  */
 922
 923 static void
 924 update_df (rtx insn, rtx note)
 925 {
 926   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
 927
 928   if (note)
 929     {
 930       df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE);
 931       df_notes_rescan (insn);
 932     }
 933   else
 934     {
 935       df_uses_create (&PATTERN (insn), insn, 0);
 936       df_insn_rescan (insn);
 937       update_uses (DF_INSN_INFO_USES (insn_info));
 938     }
 939
 940   update_uses (DF_INSN_INFO_EQ_USES (insn_info));
 941 }
 942
 943
 944 /* Try substituting NEW into LOC, which originated from forward propagation
 945    of USE's value from DEF_INSN.  SET_REG_EQUAL says whether we are
 946    substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
 947    new insn is not recognized.  Return whether the substitution was
 948    performed.  */
 949
 950 static bool
 951 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal)
 952 {
 953   rtx insn = DF_REF_INSN (use);
 954   rtx set = single_set (insn);
 955   rtx note = NULL_RTX;
 956   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
 957   int old_cost = 0;
 958   bool ok;
 959
 960   update_df_init (def_insn, insn);
 961
 962   /* forward_propagate_subreg may be operating on an instruction with
 963      multiple sets.  If so, assume the cost of the new instruction is
 964      not greater than the old one.  */
 965   if (set)
 966     old_cost = set_src_cost (SET_SRC (set), speed);
 967   if (dump_file)
 968     {
 969       fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
 970       print_inline_rtx (dump_file, *loc, 2);
 971       fprintf (dump_file, "\n with ");
 972       print_inline_rtx (dump_file, new_rtx, 2);
 973       fprintf (dump_file, "\n");
 974     }
 975
 976   validate_unshare_change (insn, loc, new_rtx, true);
 977   if (!verify_changes (0))
 978     {
 979       if (dump_file)
 980         fprintf (dump_file, "Changes to insn %d not recognized\n",
 981                  INSN_UID (insn));
 982       ok = false;
 983     }
 984
 985   else if (DF_REF_TYPE (use) == DF_REF_REG_USE
 986            && set
 987            && set_src_cost (SET_SRC (set), speed) > old_cost)
 988     {
 989       if (dump_file)
 990         fprintf (dump_file, "Changes to insn %d not profitable\n",
 991                  INSN_UID (insn));
 992       ok = false;
 993     }
 994
 995   else
 996     {
 997       if (dump_file)
 998         fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
 999       ok = true;
1000     }
1001
1002   if (ok)
1003     {
1004       confirm_change_group ();
1005       num_changes++;
1006     }
1007   else
1008     {
1009       cancel_changes (0);
1010
1011       /* Can also record a simplified value in a REG_EQUAL note,
1012          making a new one if one does not already exist.  */
1013       if (set_reg_equal)
1014         {
1015           if (dump_file)
1016             fprintf (dump_file, " Setting REG_EQUAL note\n");
1017
1018           note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
1019         }
1020     }
1021
1022   if ((ok || note) && !CONSTANT_P (new_rtx))
1023     update_df (insn, note);
1024
1025   return ok;
1026 }
1027
1028 /* For the given single_set INSN, containing SRC known to be a
1029    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
1030    is redundant due to the register being set by a LOAD_EXTEND_OP
1031    load from memory.  */
1032
1033 static bool
1034 free_load_extend (rtx src, rtx insn)
1035 {
1036   rtx reg;
1037   df_ref *use_vec;
1038   df_ref use = 0, def;
1039
1040   reg = XEXP (src, 0);
1041 #ifdef LOAD_EXTEND_OP
1042   if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src))
1043 #endif
1044     return false;
1045
1046   for (use_vec = DF_INSN_USES (insn); *use_vec; use_vec++)
1047     {
1048       use = *use_vec;
1049
1050       if (!DF_REF_IS_ARTIFICIAL (use)
1051           && DF_REF_TYPE (use) == DF_REF_REG_USE
1052           && DF_REF_REG (use) == reg)
1053         break;
1054     }
1055   if (!use)
1056     return false;
1057
1058   def = get_def_for_use (use);
1059   if (!def)
1060     return false;
1061
1062   if (DF_REF_IS_ARTIFICIAL (def))
1063     return false;
1064
1065   if (NONJUMP_INSN_P (DF_REF_INSN (def)))
1066     {
1067       rtx patt = PATTERN (DF_REF_INSN (def));
1068
1069       if (GET_CODE (patt) == SET
1070           && GET_CODE (SET_SRC (patt)) == MEM
1071           && rtx_equal_p (SET_DEST (patt), reg))
1072         return true;
1073     }
1074   return false;
1075 }
1076
1077 /* If USE is a subreg, see if it can be replaced by a pseudo.  */
1078
1079 static bool
1080 forward_propagate_subreg (df_ref use, rtx def_insn, rtx def_set)
1081 {
1082   rtx use_reg = DF_REF_REG (use);
1083   rtx use_insn, src;
1084
1085   /* Only consider subregs... */
1086   enum machine_mode use_mode = GET_MODE (use_reg);
1087   if (GET_CODE (use_reg) != SUBREG
1088       || !REG_P (SET_DEST (def_set)))
1089     return false;
1090
1091   /* If this is a paradoxical SUBREG...  */
1092   if (GET_MODE_SIZE (use_mode)
1093       > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
1094     {
1095       /* If this is a paradoxical SUBREG, we have no idea what value the
1096          extra bits would have.  However, if the operand is equivalent to
1097          a SUBREG whose operand is the same as our mode, and all the modes
1098          are within a word, we can just use the inner operand because
1099          these SUBREGs just say how to treat the register.  */
1100       use_insn = DF_REF_INSN (use);
1101       src = SET_SRC (def_set);
1102       if (GET_CODE (src) == SUBREG
1103           && REG_P (SUBREG_REG (src))
1104           && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
1105           && GET_MODE (SUBREG_REG (src)) == use_mode
1106           && subreg_lowpart_p (src)
1107           && all_uses_available_at (def_insn, use_insn))
1108         return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
1109                                  def_insn, false);
1110     }
1111
1112   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
1113      is the low part of the reg being extended then just use the inner
1114      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
1115      be removed due to it matching a LOAD_EXTEND_OP load from memory,
1116      or due to the operation being a no-op when applied to registers.
1117      For example, if we have:
1118
1119          A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
1120          B: (... (subreg:SI (reg:DI X)) ...)
1121
1122      and mode_rep_extended says that Y is already sign-extended,
1123      the backend will typically allow A to be combined with the
1124      definition of Y or, failing that, allow A to be deleted after
1125      reload through register tying.  Introducing more uses of Y
1126      prevents both optimisations.  */
1127   else if (subreg_lowpart_p (use_reg))
1128     {
1129       use_insn = DF_REF_INSN (use);
1130       src = SET_SRC (def_set);
1131       if ((GET_CODE (src) == ZERO_EXTEND
1132            || GET_CODE (src) == SIGN_EXTEND)
1133           && REG_P (XEXP (src, 0))
1134           && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
1135           && GET_MODE (XEXP (src, 0)) == use_mode
1136           && !free_load_extend (src, def_insn)
1137           && (targetm.mode_rep_extended (use_mode, GET_MODE (src))
1138               != (int) GET_CODE (src))
1139           && all_uses_available_at (def_insn, use_insn))
1140         return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
1141                                  def_insn, false);
1142     }
1143
1144   return false;
1145 }
1146
1147 /* Try to replace USE with SRC (defined in DEF_INSN) in __asm.  */
1148
1149 static bool
1150 forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg)
1151 {
1152   rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc;
1153   int speed_p, i;
1154   df_ref *use_vec;
1155
1156   gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
1157
1158   src = SET_SRC (def_set);
1159   use_pat = PATTERN (use_insn);
1160
1161   /* In __asm don't replace if src might need more registers than
1162      reg, as that could increase register pressure on the __asm.  */
1163   use_vec = DF_INSN_USES (def_insn);
1164   if (use_vec[0] && use_vec[1])
1165     return false;
1166
1167   update_df_init (def_insn, use_insn);
1168   speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
1169   asm_operands = NULL_RTX;
1170   switch (GET_CODE (use_pat))
1171     {
1172     case ASM_OPERANDS:
1173       asm_operands = use_pat;
1174       break;
1175     case SET:
1176       if (MEM_P (SET_DEST (use_pat)))
1177         {
1178           loc = &SET_DEST (use_pat);
1179           new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1180           if (new_rtx)
1181             validate_unshare_change (use_insn, loc, new_rtx, true);
1182         }
1183       asm_operands = SET_SRC (use_pat);
1184       break;
1185     case PARALLEL:
1186       for (i = 0; i < XVECLEN (use_pat, 0); i++)
1187         if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1188           {
1189             if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1190               {
1191                 loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1192                 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1193                                          src, speed_p);
1194                 if (new_rtx)
1195                   validate_unshare_change (use_insn, loc, new_rtx, true);
1196               }
1197             asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1198           }
1199         else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1200           asm_operands = XVECEXP (use_pat, 0, i);
1201       break;
1202     default:
1203       gcc_unreachable ();
1204     }
1205
1206   gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1207   for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1208     {
1209       loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1210       new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1211       if (new_rtx)
1212         validate_unshare_change (use_insn, loc, new_rtx, true);
1213     }
1214
1215   if (num_changes_pending () == 0 || !apply_change_group ())
1216     return false;
1217
1218   update_df (use_insn, NULL);
1219   num_changes++;
1220   return true;
1221 }
1222
1223 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1224    result.  */
1225
1226 static bool
1227 forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set)
1228 {
1229   rtx use_insn = DF_REF_INSN (use);
1230   rtx use_set = single_set (use_insn);
1231   rtx src, reg, new_rtx, *loc;
1232   bool set_reg_equal;
1233   enum machine_mode mode;
1234   int asm_use = -1;
1235
1236   if (INSN_CODE (use_insn) < 0)
1237     asm_use = asm_noperands (PATTERN (use_insn));
1238
1239   if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
1240     return false;
1241
1242   /* Do not propagate into PC, CC0, etc.  */
1243   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1244     return false;
1245
1246   /* If def and use are subreg, check if they match.  */
1247   reg = DF_REF_REG (use);
1248   if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG)
1249     {
1250       if (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg))
1251         return false;
1252     }
1253   /* Check if the def had a subreg, but the use has the whole reg.  */
1254   else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1255     return false;
1256   /* Check if the use has a subreg, but the def had the whole reg.  Unlike the
1257      previous case, the optimization is possible and often useful indeed.  */
1258   else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1259     reg = SUBREG_REG (reg);
1260
1261   /* Make sure that we can treat REG as having the same mode as the
1262      source of DEF_SET.  */
1263   if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))
1264     return false;
1265
1266   /* Check if the substitution is valid (last, because it's the most
1267      expensive check!).  */
1268   src = SET_SRC (def_set);
1269   if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1270     return false;
1271
1272   /* Check if the def is loading something from the constant pool; in this
1273      case we would undo optimization such as compress_float_constant.
1274      Still, we can set a REG_EQUAL note.  */
1275   if (MEM_P (src) && MEM_READONLY_P (src))
1276     {
1277       rtx x = avoid_constant_pool_reference (src);
1278       if (x != src && use_set)
1279         {
1280           rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1281           rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1282           rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1283           if (old_rtx != new_rtx)
1284             set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1285         }
1286       return false;
1287     }
1288
1289   if (asm_use >= 0)
1290     return forward_propagate_asm (use, def_insn, def_set, reg);
1291
1292   /* Else try simplifying.  */
1293
1294   if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1295     {
1296       loc = &SET_DEST (use_set);
1297       set_reg_equal = false;
1298     }
1299   else if (!use_set)
1300     {
1301       loc = &INSN_VAR_LOCATION_LOC (use_insn);
1302       set_reg_equal = false;
1303     }
1304   else
1305     {
1306       rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1307       if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1308         loc = &XEXP (note, 0);
1309       else
1310         loc = &SET_SRC (use_set);
1311
1312       /* Do not replace an existing REG_EQUAL note if the insn is not
1313          recognized.  Either we're already replacing in the note, or we'll
1314          separately try plugging the definition in the note and simplifying.
1315          And only install a REQ_EQUAL note when the destination is a REG
1316          that isn't mentioned in USE_SET, as the note would be invalid
1317          otherwise.  We also don't want to install a note if we are merely
1318          propagating a pseudo since verifying that this pseudo isn't dead
1319          is a pain; moreover such a note won't help anything.  */
1320       set_reg_equal = (note == NULL_RTX
1321                        && REG_P (SET_DEST (use_set))
1322                        && !REG_P (src)
1323                        && !(GET_CODE (src) == SUBREG
1324                             && REG_P (SUBREG_REG (src)))
1325                        && !reg_mentioned_p (SET_DEST (use_set),
1326                                             SET_SRC (use_set)));
1327     }
1328
1329   if (GET_MODE (*loc) == VOIDmode)
1330     mode = GET_MODE (SET_DEST (use_set));
1331   else
1332     mode = GET_MODE (*loc);
1333
1334   new_rtx = propagate_rtx (*loc, mode, reg, src,
1335                            optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1336
1337   if (!new_rtx)
1338     return false;
1339
1340   return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1341 }
1342
1343
1344 /* Given a use USE of an insn, if it has a single reaching
1345    definition, try to forward propagate it into that insn.
1346    Return true if cfg cleanup will be needed.  */
1347
1348 static bool
1349 forward_propagate_into (df_ref use)
1350 {
1351   df_ref def;
1352   rtx def_insn, def_set, use_insn;
1353   rtx parent;
1354
1355   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1356     return false;
1357   if (DF_REF_IS_ARTIFICIAL (use))
1358     return false;
1359
1360   /* Only consider uses that have a single definition.  */
1361   def = get_def_for_use (use);
1362   if (!def)
1363     return false;
1364   if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1365     return false;
1366   if (DF_REF_IS_ARTIFICIAL (def))
1367     return false;
1368
1369   /* Do not propagate loop invariant definitions inside the loop.  */
1370   if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1371     return false;
1372
1373   /* Check if the use is still present in the insn!  */
1374   use_insn = DF_REF_INSN (use);
1375   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1376     parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1377   else
1378     parent = PATTERN (use_insn);
1379
1380   if (!reg_mentioned_p (DF_REF_REG (use), parent))
1381     return false;
1382
1383   def_insn = DF_REF_INSN (def);
1384   if (multiple_sets (def_insn))
1385     return false;
1386   def_set = single_set (def_insn);
1387   if (!def_set)
1388     return false;
1389
1390   /* Only try one kind of propagation.  If two are possible, we'll
1391      do it on the following iterations.  */
1392   if (forward_propagate_and_simplify (use, def_insn, def_set)
1393       || forward_propagate_subreg (use, def_insn, def_set))
1394     {
1395       if (cfun->can_throw_non_call_exceptions
1396           && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX)
1397           && purge_dead_edges (DF_REF_BB (use)))
1398         return true;
1399     }
1400   return false;
1401 }
1402
1403 \f
1404 static void
1405 fwprop_init (void)
1406 {
1407   num_changes = 0;
1408   calculate_dominance_info (CDI_DOMINATORS);
1409
1410   /* We do not always want to propagate into loops, so we have to find
1411      loops and be careful about them.  Avoid CFG modifications so that
1412      we don't have to update dominance information afterwards for
1413      build_single_def_use_links.  */
1414   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
1415
1416   build_single_def_use_links ();
1417   df_set_flags (DF_DEFER_INSN_RESCAN);
1418
1419   active_defs = XNEWVEC (df_ref, max_reg_num ());
1420 #ifdef ENABLE_CHECKING
1421   active_defs_check = sparseset_alloc (max_reg_num ());
1422 #endif
1423 }
1424
1425 static void
1426 fwprop_done (void)
1427 {
1428   loop_optimizer_finalize ();
1429
1430   use_def_ref.release ();
1431   free (active_defs);
1432 #ifdef ENABLE_CHECKING
1433   sparseset_free (active_defs_check);
1434 #endif
1435
1436   free_dominance_info (CDI_DOMINATORS);
1437   cleanup_cfg (0);
1438   delete_trivially_dead_insns (get_insns (), max_reg_num ());
1439
1440   if (dump_file)
1441     fprintf (dump_file,
1442              "\nNumber of successful forward propagations: %d\n\n",
1443              num_changes);
1444 }
1445
1446
1447 /* Main entry point.  */
1448
1449 static bool
1450 gate_fwprop (void)
1451 {
1452   return optimize > 0 && flag_forward_propagate;
1453 }
1454
1455 static unsigned int
1456 fwprop (void)
1457 {
1458   unsigned i;
1459   bool need_cleanup = false;
1460
1461   fwprop_init ();
1462
1463   /* Go through all the uses.  df_uses_create will create new ones at the
1464      end, and we'll go through them as well.
1465
1466      Do not forward propagate addresses into loops until after unrolling.
1467      CSE did so because it was able to fix its own mess, but we are not.  */
1468
1469   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1470     {
1471       df_ref use = DF_USES_GET (i);
1472       if (use)
1473         if (DF_REF_TYPE (use) == DF_REF_REG_USE
1474             || DF_REF_BB (use)->loop_father == NULL
1475             /* The outer most loop is not really a loop.  */
1476             || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1477           need_cleanup |= forward_propagate_into (use);
1478     }
1479
1480   fwprop_done ();
1481   if (need_cleanup)
1482     cleanup_cfg (0);
1483   return 0;
1484 }
1485
1486 namespace {
1487
1488 const pass_data pass_data_rtl_fwprop =
1489 {
1490   RTL_PASS, /* type */
1491   "fwprop1", /* name */
1492   OPTGROUP_NONE, /* optinfo_flags */
1493   true, /* has_gate */
1494   true, /* has_execute */
1495   TV_FWPROP, /* tv_id */
1496   0, /* properties_required */
1497   0, /* properties_provided */
1498   0, /* properties_destroyed */
1499   0, /* todo_flags_start */
1500   ( TODO_df_finish | TODO_verify_flow
1501     | TODO_verify_rtl_sharing ), /* todo_flags_finish */
1502 };
1503
1504 class pass_rtl_fwprop : public rtl_opt_pass
1505 {
1506 public:
1507   pass_rtl_fwprop(gcc::context *ctxt)
1508     : rtl_opt_pass(pass_data_rtl_fwprop, ctxt)
1509   {}
1510
1511   /* opt_pass methods: */
1512   bool gate () { return gate_fwprop (); }
1513   unsigned int execute () { return fwprop (); }
1514
1515 }; // class pass_rtl_fwprop
1516
1517 } // anon namespace
1518
1519 rtl_opt_pass *
1520 make_pass_rtl_fwprop (gcc::context *ctxt)
1521 {
1522   return new pass_rtl_fwprop (ctxt);
1523 }
1524
1525 static unsigned int
1526 fwprop_addr (void)
1527 {
1528   unsigned i;
1529   bool need_cleanup = false;
1530
1531   fwprop_init ();
1532
1533   /* Go through all the uses.  df_uses_create will create new ones at the
1534      end, and we'll go through them as well.  */
1535   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1536     {
1537       df_ref use = DF_USES_GET (i);
1538       if (use)
1539         if (DF_REF_TYPE (use) != DF_REF_REG_USE
1540             && DF_REF_BB (use)->loop_father != NULL
1541             /* The outer most loop is not really a loop.  */
1542             && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1543           need_cleanup |= forward_propagate_into (use);
1544     }
1545
1546   fwprop_done ();
1547
1548   if (need_cleanup)
1549     cleanup_cfg (0);
1550   return 0;
1551 }
1552
1553 namespace {
1554
1555 const pass_data pass_data_rtl_fwprop_addr =
1556 {
1557   RTL_PASS, /* type */
1558   "fwprop2", /* name */
1559   OPTGROUP_NONE, /* optinfo_flags */
1560   true, /* has_gate */
1561   true, /* has_execute */
1562   TV_FWPROP, /* tv_id */
1563   0, /* properties_required */
1564   0, /* properties_provided */
1565   0, /* properties_destroyed */
1566   0, /* todo_flags_start */
1567   ( TODO_df_finish | TODO_verify_rtl_sharing ), /* todo_flags_finish */
1568 };
1569
1570 class pass_rtl_fwprop_addr : public rtl_opt_pass
1571 {
1572 public:
1573   pass_rtl_fwprop_addr(gcc::context *ctxt)
1574     : rtl_opt_pass(pass_data_rtl_fwprop_addr, ctxt)
1575   {}
1576
1577   /* opt_pass methods: */
1578   bool gate () { return gate_fwprop (); }
1579   unsigned int execute () { return fwprop_addr (); }
1580
1581 }; // class pass_rtl_fwprop_addr
1582
1583 } // anon namespace
1584
1585 rtl_opt_pass *
1586 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1587 {
1588   return new pass_rtl_fwprop_addr (ctxt);
1589 }