gcc/fwprop.c

   1 /* RTL-based forward propagation pass for GNU compiler.
   2    Copyright (C) 2005-2015 Free Software Foundation, Inc.
   3    Contributed by Paolo Bonzini and Steven Bosscher.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "target.h"
  26 #include "rtl.h"
  27 #include "predict.h"
  28 #include "df.h"
  29 #include "tm_p.h"
  30 #include "insn-config.h"
  31 #include "emit-rtl.h"
  32 #include "recog.h"
  33
  34 #include "sparseset.h"
  35 #include "cfgrtl.h"
  36 #include "cfgcleanup.h"
  37 #include "cfgloop.h"
  38 #include "tree-pass.h"
  39 #include "domwalk.h"
  40 #include "rtl-iter.h"
  41
  42
  43 /* This pass does simple forward propagation and simplification when an
  44    operand of an insn can only come from a single def.  This pass uses
  45    df.c, so it is global.  However, we only do limited analysis of
  46    available expressions.
  47
  48    1) The pass tries to propagate the source of the def into the use,
  49    and checks if the result is independent of the substituted value.
  50    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
  51    zero, independent of the source register.
  52
  53    In particular, we propagate constants into the use site.  Sometimes
  54    RTL expansion did not put the constant in the same insn on purpose,
  55    to satisfy a predicate, and the result will fail to be recognized;
  56    but this happens rarely and in this case we can still create a
  57    REG_EQUAL note.  For multi-word operations, this
  58
  59       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
  60       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
  61       (set (subreg:SI (reg:DI 122) 0)
  62          (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
  63       (set (subreg:SI (reg:DI 122) 4)
  64          (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
  65
  66    can be simplified to the much simpler
  67
  68       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
  69       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
  70
  71    This particular propagation is also effective at putting together
  72    complex addressing modes.  We are more aggressive inside MEMs, in
  73    that all definitions are propagated if the use is in a MEM; if the
  74    result is a valid memory address we check address_cost to decide
  75    whether the substitution is worthwhile.
  76
  77    2) The pass propagates register copies.  This is not as effective as
  78    the copy propagation done by CSE's canon_reg, which works by walking
  79    the instruction chain, it can help the other transformations.
  80
  81    We should consider removing this optimization, and instead reorder the
  82    RTL passes, because GCSE does this transformation too.  With some luck,
  83    the CSE pass at the end of rest_of_handle_gcse could also go away.
  84
  85    3) The pass looks for paradoxical subregs that are actually unnecessary.
  86    Things like this:
  87
  88      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
  89      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
  90      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
  91                                 (subreg:SI (reg:QI 121) 0)))
  92
  93    are very common on machines that can only do word-sized operations.
  94    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
  95    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
  96    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
  97    above will simplify this to
  98
  99      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
 100      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
 101      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
 102
 103    where the first two insns are now dead.
 104
 105    We used to use reaching definitions to find which uses have a
 106    single reaching definition (sounds obvious...), but this is too
 107    complex a problem in nasty testcases like PR33928.  Now we use the
 108    multiple definitions problem in df-problems.c.  The similarity
 109    between that problem and SSA form creation is taken further, in
 110    that fwprop does a dominator walk to create its chains; however,
 111    instead of creating a PHI function where multiple definitions meet
 112    I just punt and record only singleton use-def chains, which is
 113    all that is needed by fwprop.  */
 114
 115
 116 static int num_changes;
 117
 118 static vec<df_ref> use_def_ref;
 119 static vec<df_ref> reg_defs;
 120 static vec<df_ref> reg_defs_stack;
 121
 122 /* The MD bitmaps are trimmed to include only live registers to cut
 123    memory usage on testcases like insn-recog.c.  Track live registers
 124    in the basic block and do not perform forward propagation if the
 125    destination is a dead pseudo occurring in a note.  */
 126 static bitmap local_md;
 127 static bitmap local_lr;
 128
 129 /* Return the only def in USE's use-def chain, or NULL if there is
 130    more than one def in the chain.  */
 131
 132 static inline df_ref
 133 get_def_for_use (df_ref use)
 134 {
 135   return use_def_ref[DF_REF_ID (use)];
 136 }
 137
 138
 139 /* Update the reg_defs vector with non-partial definitions in DEF_REC.
 140    TOP_FLAG says which artificials uses should be used, when DEF_REC
 141    is an artificial def vector.  LOCAL_MD is modified as after a
 142    df_md_simulate_* function; we do more or less the same processing
 143    done there, so we do not use those functions.  */
 144
 145 #define DF_MD_GEN_FLAGS \
 146         (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
 147
 148 static void
 149 process_defs (df_ref def, int top_flag)
 150 {
 151   for (; def; def = DF_REF_NEXT_LOC (def))
 152     {
 153       df_ref curr_def = reg_defs[DF_REF_REGNO (def)];
 154       unsigned int dregno;
 155
 156       if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
 157         continue;
 158
 159       dregno = DF_REF_REGNO (def);
 160       if (curr_def)
 161         reg_defs_stack.safe_push (curr_def);
 162       else
 163         {
 164           /* Do not store anything if "transitioning" from NULL to NULL.  But
 165              otherwise, push a special entry on the stack to tell the
 166              leave_block callback that the entry in reg_defs was NULL.  */
 167           if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 168             ;
 169           else
 170             reg_defs_stack.safe_push (def);
 171         }
 172
 173       if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 174         {
 175           bitmap_set_bit (local_md, dregno);
 176           reg_defs[dregno] = NULL;
 177         }
 178       else
 179         {
 180           bitmap_clear_bit (local_md, dregno);
 181           reg_defs[dregno] = def;
 182         }
 183     }
 184 }
 185
 186
 187 /* Fill the use_def_ref vector with values for the uses in USE_REC,
 188    taking reaching definitions info from LOCAL_MD and REG_DEFS.
 189    TOP_FLAG says which artificials uses should be used, when USE_REC
 190    is an artificial use vector.  */
 191
 192 static void
 193 process_uses (df_ref use, int top_flag)
 194 {
 195   for (; use; use = DF_REF_NEXT_LOC (use))
 196     if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
 197       {
 198         unsigned int uregno = DF_REF_REGNO (use);
 199         if (reg_defs[uregno]
 200             && !bitmap_bit_p (local_md, uregno)
 201             && bitmap_bit_p (local_lr, uregno))
 202           use_def_ref[DF_REF_ID (use)] = reg_defs[uregno];
 203       }
 204 }
 205
 206 class single_def_use_dom_walker : public dom_walker
 207 {
 208 public:
 209   single_def_use_dom_walker (cdi_direction direction)
 210     : dom_walker (direction) {}
 211   virtual void before_dom_children (basic_block);
 212   virtual void after_dom_children (basic_block);
 213 };
 214
 215 void
 216 single_def_use_dom_walker::before_dom_children (basic_block bb)
 217 {
 218   int bb_index = bb->index;
 219   struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
 220   struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
 221   rtx_insn *insn;
 222
 223   bitmap_copy (local_md, &md_bb_info->in);
 224   bitmap_copy (local_lr, &lr_bb_info->in);
 225
 226   /* Push a marker for the leave_block callback.  */
 227   reg_defs_stack.safe_push (NULL);
 228
 229   process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
 230   process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
 231
 232   /* We don't call df_simulate_initialize_forwards, as it may overestimate
 233      the live registers if there are unused artificial defs.  We prefer
 234      liveness to be underestimated.  */
 235
 236   FOR_BB_INSNS (bb, insn)
 237     if (INSN_P (insn))
 238       {
 239         unsigned int uid = INSN_UID (insn);
 240         process_uses (DF_INSN_UID_USES (uid), 0);
 241         process_uses (DF_INSN_UID_EQ_USES (uid), 0);
 242         process_defs (DF_INSN_UID_DEFS (uid), 0);
 243         df_simulate_one_insn_forwards (bb, insn, local_lr);
 244       }
 245
 246   process_uses (df_get_artificial_uses (bb_index), 0);
 247   process_defs (df_get_artificial_defs (bb_index), 0);
 248 }
 249
 250 /* Pop the definitions created in this basic block when leaving its
 251    dominated parts.  */
 252
 253 void
 254 single_def_use_dom_walker::after_dom_children (basic_block bb ATTRIBUTE_UNUSED)
 255 {
 256   df_ref saved_def;
 257   while ((saved_def = reg_defs_stack.pop ()) != NULL)
 258     {
 259       unsigned int dregno = DF_REF_REGNO (saved_def);
 260
 261       /* See also process_defs.  */
 262       if (saved_def == reg_defs[dregno])
 263         reg_defs[dregno] = NULL;
 264       else
 265         reg_defs[dregno] = saved_def;
 266     }
 267 }
 268
 269
 270 /* Build a vector holding the reaching definitions of uses reached by a
 271    single dominating definition.  */
 272
 273 static void
 274 build_single_def_use_links (void)
 275 {
 276   /* We use the multiple definitions problem to compute our restricted
 277      use-def chains.  */
 278   df_set_flags (DF_EQ_NOTES);
 279   df_md_add_problem ();
 280   df_note_add_problem ();
 281   df_analyze ();
 282   df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
 283
 284   use_def_ref.create (DF_USES_TABLE_SIZE ());
 285   use_def_ref.safe_grow_cleared (DF_USES_TABLE_SIZE ());
 286
 287   reg_defs.create (max_reg_num ());
 288   reg_defs.safe_grow_cleared (max_reg_num ());
 289
 290   reg_defs_stack.create (n_basic_blocks_for_fn (cfun) * 10);
 291   local_md = BITMAP_ALLOC (NULL);
 292   local_lr = BITMAP_ALLOC (NULL);
 293
 294   /* Walk the dominator tree looking for single reaching definitions
 295      dominating the uses.  This is similar to how SSA form is built.  */
 296   single_def_use_dom_walker (CDI_DOMINATORS)
 297     .walk (cfun->cfg->x_entry_block_ptr);
 298
 299   BITMAP_FREE (local_lr);
 300   BITMAP_FREE (local_md);
 301   reg_defs.release ();
 302   reg_defs_stack.release ();
 303 }
 304
 305 \f
 306 /* Do not try to replace constant addresses or addresses of local and
 307    argument slots.  These MEM expressions are made only once and inserted
 308    in many instructions, as well as being used to control symbol table
 309    output.  It is not safe to clobber them.
 310
 311    There are some uncommon cases where the address is already in a register
 312    for some reason, but we cannot take advantage of that because we have
 313    no easy way to unshare the MEM.  In addition, looking up all stack
 314    addresses is costly.  */
 315
 316 static bool
 317 can_simplify_addr (rtx addr)
 318 {
 319   rtx reg;
 320
 321   if (CONSTANT_ADDRESS_P (addr))
 322     return false;
 323
 324   if (GET_CODE (addr) == PLUS)
 325     reg = XEXP (addr, 0);
 326   else
 327     reg = addr;
 328
 329   return (!REG_P (reg)
 330           || (REGNO (reg) != FRAME_POINTER_REGNUM
 331               && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
 332               && REGNO (reg) != ARG_POINTER_REGNUM));
 333 }
 334
 335 /* Returns a canonical version of X for the address, from the point of view,
 336    that all multiplications are represented as MULT instead of the multiply
 337    by a power of 2 being represented as ASHIFT.
 338
 339    Every ASHIFT we find has been made by simplify_gen_binary and was not
 340    there before, so it is not shared.  So we can do this in place.  */
 341
 342 static void
 343 canonicalize_address (rtx x)
 344 {
 345   for (;;)
 346     switch (GET_CODE (x))
 347       {
 348       case ASHIFT:
 349         if (CONST_INT_P (XEXP (x, 1))
 350             && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
 351             && INTVAL (XEXP (x, 1)) >= 0)
 352           {
 353             HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
 354             PUT_CODE (x, MULT);
 355             XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
 356                                         GET_MODE (x));
 357           }
 358
 359         x = XEXP (x, 0);
 360         break;
 361
 362       case PLUS:
 363         if (GET_CODE (XEXP (x, 0)) == PLUS
 364             || GET_CODE (XEXP (x, 0)) == ASHIFT
 365             || GET_CODE (XEXP (x, 0)) == CONST)
 366           canonicalize_address (XEXP (x, 0));
 367
 368         x = XEXP (x, 1);
 369         break;
 370
 371       case CONST:
 372         x = XEXP (x, 0);
 373         break;
 374
 375       default:
 376         return;
 377       }
 378 }
 379
 380 /* OLD is a memory address.  Return whether it is good to use NEW instead,
 381    for a memory access in the given MODE.  */
 382
 383 static bool
 384 should_replace_address (rtx old_rtx, rtx new_rtx, machine_mode mode,
 385                         addr_space_t as, bool speed)
 386 {
 387   int gain;
 388
 389   if (rtx_equal_p (old_rtx, new_rtx)
 390       || !memory_address_addr_space_p (mode, new_rtx, as))
 391     return false;
 392
 393   /* Copy propagation is always ok.  */
 394   if (REG_P (old_rtx) && REG_P (new_rtx))
 395     return true;
 396
 397   /* Prefer the new address if it is less expensive.  */
 398   gain = (address_cost (old_rtx, mode, as, speed)
 399           - address_cost (new_rtx, mode, as, speed));
 400
 401   /* If the addresses have equivalent cost, prefer the new address
 402      if it has the highest `set_src_cost'.  That has the potential of
 403      eliminating the most insns without additional costs, and it
 404      is the same that cse.c used to do.  */
 405   if (gain == 0)
 406     gain = (set_src_cost (new_rtx, VOIDmode, speed)
 407             - set_src_cost (old_rtx, VOIDmode, speed));
 408
 409   return (gain > 0);
 410 }
 411
 412
 413 /* Flags for the last parameter of propagate_rtx_1.  */
 414
 415 enum {
 416   /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
 417      if it is false, propagate_rtx_1 returns false if, for at least
 418      one occurrence OLD, it failed to collapse the result to a constant.
 419      For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
 420      collapse to zero if replacing (reg:M B) with (reg:M A).
 421
 422      PR_CAN_APPEAR is disregarded inside MEMs: in that case,
 423      propagate_rtx_1 just tries to make cheaper and valid memory
 424      addresses.  */
 425   PR_CAN_APPEAR = 1,
 426
 427   /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
 428      outside memory addresses.  This is needed because propagate_rtx_1 does
 429      not do any analysis on memory; thus it is very conservative and in general
 430      it will fail if non-read-only MEMs are found in the source expression.
 431
 432      PR_HANDLE_MEM is set when the source of the propagation was not
 433      another MEM.  Then, it is safe not to treat non-read-only MEMs as
 434      ``opaque'' objects.  */
 435   PR_HANDLE_MEM = 2,
 436
 437   /* Set when costs should be optimized for speed.  */
 438   PR_OPTIMIZE_FOR_SPEED = 4
 439 };
 440
 441
 442 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
 443    resulting expression.  Replace *PX with a new RTL expression if an
 444    occurrence of OLD was found.
 445
 446    This is only a wrapper around simplify-rtx.c: do not add any pattern
 447    matching code here.  (The sole exception is the handling of LO_SUM, but
 448    that is because there is no simplify_gen_* function for LO_SUM).  */
 449
 450 static bool
 451 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 452 {
 453   rtx x = *px, tem = NULL_RTX, op0, op1, op2;
 454   enum rtx_code code = GET_CODE (x);
 455   machine_mode mode = GET_MODE (x);
 456   machine_mode op_mode;
 457   bool can_appear = (flags & PR_CAN_APPEAR) != 0;
 458   bool valid_ops = true;
 459
 460   if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
 461     {
 462       /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
 463          they have side effects or not).  */
 464       *px = (side_effects_p (x)
 465              ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
 466              : gen_rtx_SCRATCH (GET_MODE (x)));
 467       return false;
 468     }
 469
 470   /* If X is OLD_RTX, return NEW_RTX.  But not if replacing only within an
 471      address, and we are *not* inside one.  */
 472   if (x == old_rtx)
 473     {
 474       *px = new_rtx;
 475       return can_appear;
 476     }
 477
 478   /* If this is an expression, try recursive substitution.  */
 479   switch (GET_RTX_CLASS (code))
 480     {
 481     case RTX_UNARY:
 482       op0 = XEXP (x, 0);
 483       op_mode = GET_MODE (op0);
 484       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 485       if (op0 == XEXP (x, 0))
 486         return true;
 487       tem = simplify_gen_unary (code, mode, op0, op_mode);
 488       break;
 489
 490     case RTX_BIN_ARITH:
 491     case RTX_COMM_ARITH:
 492       op0 = XEXP (x, 0);
 493       op1 = XEXP (x, 1);
 494       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 495       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 496       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 497         return true;
 498       tem = simplify_gen_binary (code, mode, op0, op1);
 499       break;
 500
 501     case RTX_COMPARE:
 502     case RTX_COMM_COMPARE:
 503       op0 = XEXP (x, 0);
 504       op1 = XEXP (x, 1);
 505       op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
 506       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 507       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 508       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 509         return true;
 510       tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
 511       break;
 512
 513     case RTX_TERNARY:
 514     case RTX_BITFIELD_OPS:
 515       op0 = XEXP (x, 0);
 516       op1 = XEXP (x, 1);
 517       op2 = XEXP (x, 2);
 518       op_mode = GET_MODE (op0);
 519       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 520       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 521       valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
 522       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
 523         return true;
 524       if (op_mode == VOIDmode)
 525         op_mode = GET_MODE (op0);
 526       tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
 527       break;
 528
 529     case RTX_EXTRA:
 530       /* The only case we try to handle is a SUBREG.  */
 531       if (code == SUBREG)
 532         {
 533           op0 = XEXP (x, 0);
 534           valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 535           if (op0 == XEXP (x, 0))
 536             return true;
 537           tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
 538                                      SUBREG_BYTE (x));
 539         }
 540       break;
 541
 542     case RTX_OBJ:
 543       if (code == MEM && x != new_rtx)
 544         {
 545           rtx new_op0;
 546           op0 = XEXP (x, 0);
 547
 548           /* There are some addresses that we cannot work on.  */
 549           if (!can_simplify_addr (op0))
 550             return true;
 551
 552           op0 = new_op0 = targetm.delegitimize_address (op0);
 553           valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
 554                                         flags | PR_CAN_APPEAR);
 555
 556           /* Dismiss transformation that we do not want to carry on.  */
 557           if (!valid_ops
 558               || new_op0 == op0
 559               || !(GET_MODE (new_op0) == GET_MODE (op0)
 560                    || GET_MODE (new_op0) == VOIDmode))
 561             return true;
 562
 563           canonicalize_address (new_op0);
 564
 565           /* Copy propagations are always ok.  Otherwise check the costs.  */
 566           if (!(REG_P (old_rtx) && REG_P (new_rtx))
 567               && !should_replace_address (op0, new_op0, GET_MODE (x),
 568                                           MEM_ADDR_SPACE (x),
 569                                           flags & PR_OPTIMIZE_FOR_SPEED))
 570             return true;
 571
 572           tem = replace_equiv_address_nv (x, new_op0);
 573         }
 574
 575       else if (code == LO_SUM)
 576         {
 577           op0 = XEXP (x, 0);
 578           op1 = XEXP (x, 1);
 579
 580           /* The only simplification we do attempts to remove references to op0
 581              or make it constant -- in both cases, op0's invalidity will not
 582              make the result invalid.  */
 583           propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
 584           valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 585           if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 586             return true;
 587
 588           /* (lo_sum (high x) x) -> x  */
 589           if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
 590             tem = op1;
 591           else
 592             tem = gen_rtx_LO_SUM (mode, op0, op1);
 593
 594           /* OP1 is likely not a legitimate address, otherwise there would have
 595              been no LO_SUM.  We want it to disappear if it is invalid, return
 596              false in that case.  */
 597           return memory_address_p (mode, tem);
 598         }
 599
 600       else if (code == REG)
 601         {
 602           if (rtx_equal_p (x, old_rtx))
 603             {
 604               *px = new_rtx;
 605               return can_appear;
 606             }
 607         }
 608       break;
 609
 610     default:
 611       break;
 612     }
 613
 614   /* No change, no trouble.  */
 615   if (tem == NULL_RTX)
 616     return true;
 617
 618   *px = tem;
 619
 620   /* The replacement we made so far is valid, if all of the recursive
 621      replacements were valid, or we could simplify everything to
 622      a constant.  */
 623   return valid_ops || can_appear || CONSTANT_P (tem);
 624 }
 625
 626
 627 /* Return true if X constains a non-constant mem.  */
 628
 629 static bool
 630 varying_mem_p (const_rtx x)
 631 {
 632   subrtx_iterator::array_type array;
 633   FOR_EACH_SUBRTX (iter, array, x, NONCONST)
 634     if (MEM_P (*iter) && !MEM_READONLY_P (*iter))
 635       return true;
 636   return false;
 637 }
 638
 639
 640 /* Replace all occurrences of OLD in X with NEW and try to simplify the
 641    resulting expression (in mode MODE).  Return a new expression if it is
 642    a constant, otherwise X.
 643
 644    Simplifications where occurrences of NEW collapse to a constant are always
 645    accepted.  All simplifications are accepted if NEW is a pseudo too.
 646    Otherwise, we accept simplifications that have a lower or equal cost.  */
 647
 648 static rtx
 649 propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx,
 650                bool speed)
 651 {
 652   rtx tem;
 653   bool collapsed;
 654   int flags;
 655
 656   if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
 657     return NULL_RTX;
 658
 659   flags = 0;
 660   if (REG_P (new_rtx)
 661       || CONSTANT_P (new_rtx)
 662       || (GET_CODE (new_rtx) == SUBREG
 663           && REG_P (SUBREG_REG (new_rtx))
 664           && (GET_MODE_SIZE (mode)
 665               <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (new_rtx))))))
 666     flags |= PR_CAN_APPEAR;
 667   if (!varying_mem_p (new_rtx))
 668     flags |= PR_HANDLE_MEM;
 669
 670   if (speed)
 671     flags |= PR_OPTIMIZE_FOR_SPEED;
 672
 673   tem = x;
 674   collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
 675   if (tem == x || !collapsed)
 676     return NULL_RTX;
 677
 678   /* gen_lowpart_common will not be able to process VOIDmode entities other
 679      than CONST_INTs.  */
 680   if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
 681     return NULL_RTX;
 682
 683   if (GET_MODE (tem) == VOIDmode)
 684     tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
 685   else
 686     gcc_assert (GET_MODE (tem) == mode);
 687
 688   return tem;
 689 }
 690
 691
 692 \f
 693
 694 /* Return true if the register from reference REF is killed
 695    between FROM to (but not including) TO.  */
 696
 697 static bool
 698 local_ref_killed_between_p (df_ref ref, rtx_insn *from, rtx_insn *to)
 699 {
 700   rtx_insn *insn;
 701
 702   for (insn = from; insn != to; insn = NEXT_INSN (insn))
 703     {
 704       df_ref def;
 705       if (!INSN_P (insn))
 706         continue;
 707
 708       FOR_EACH_INSN_DEF (def, insn)
 709         if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
 710           return true;
 711     }
 712   return false;
 713 }
 714
 715
 716 /* Check if the given DEF is available in INSN.  This would require full
 717    computation of available expressions; we check only restricted conditions:
 718    - if DEF is the sole definition of its register, go ahead;
 719    - in the same basic block, we check for no definitions killing the
 720      definition of DEF_INSN;
 721    - if USE's basic block has DEF's basic block as the sole predecessor,
 722      we check if the definition is killed after DEF_INSN or before
 723      TARGET_INSN insn, in their respective basic blocks.  */
 724 static bool
 725 use_killed_between (df_ref use, rtx_insn *def_insn, rtx_insn *target_insn)
 726 {
 727   basic_block def_bb = BLOCK_FOR_INSN (def_insn);
 728   basic_block target_bb = BLOCK_FOR_INSN (target_insn);
 729   int regno;
 730   df_ref def;
 731
 732   /* We used to have a def reaching a use that is _before_ the def,
 733      with the def not dominating the use even though the use and def
 734      are in the same basic block, when a register may be used
 735      uninitialized in a loop.  This should not happen anymore since
 736      we do not use reaching definitions, but still we test for such
 737      cases and assume that DEF is not available.  */
 738   if (def_bb == target_bb
 739       ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
 740       : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
 741     return true;
 742
 743   /* Check if the reg in USE has only one definition.  We already
 744      know that this definition reaches use, or we wouldn't be here.
 745      However, this is invalid for hard registers because if they are
 746      live at the beginning of the function it does not mean that we
 747      have an uninitialized access.  */
 748   regno = DF_REF_REGNO (use);
 749   def = DF_REG_DEF_CHAIN (regno);
 750   if (def
 751       && DF_REF_NEXT_REG (def) == NULL
 752       && regno >= FIRST_PSEUDO_REGISTER)
 753     return false;
 754
 755   /* Check locally if we are in the same basic block.  */
 756   if (def_bb == target_bb)
 757     return local_ref_killed_between_p (use, def_insn, target_insn);
 758
 759   /* Finally, if DEF_BB is the sole predecessor of TARGET_BB.  */
 760   if (single_pred_p (target_bb)
 761       && single_pred (target_bb) == def_bb)
 762     {
 763       df_ref x;
 764
 765       /* See if USE is killed between DEF_INSN and the last insn in the
 766          basic block containing DEF_INSN.  */
 767       x = df_bb_regno_last_def_find (def_bb, regno);
 768       if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
 769         return true;
 770
 771       /* See if USE is killed between TARGET_INSN and the first insn in the
 772          basic block containing TARGET_INSN.  */
 773       x = df_bb_regno_first_def_find (target_bb, regno);
 774       if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
 775         return true;
 776
 777       return false;
 778     }
 779
 780   /* Otherwise assume the worst case.  */
 781   return true;
 782 }
 783
 784
 785 /* Check if all uses in DEF_INSN can be used in TARGET_INSN.  This
 786    would require full computation of available expressions;
 787    we check only restricted conditions, see use_killed_between.  */
 788 static bool
 789 all_uses_available_at (rtx_insn *def_insn, rtx_insn *target_insn)
 790 {
 791   df_ref use;
 792   struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
 793   rtx def_set = single_set (def_insn);
 794   rtx_insn *next;
 795
 796   gcc_assert (def_set);
 797
 798   /* If target_insn comes right after def_insn, which is very common
 799      for addresses, we can use a quicker test.  Ignore debug insns
 800      other than target insns for this.  */
 801   next = NEXT_INSN (def_insn);
 802   while (next && next != target_insn && DEBUG_INSN_P (next))
 803     next = NEXT_INSN (next);
 804   if (next == target_insn && REG_P (SET_DEST (def_set)))
 805     {
 806       rtx def_reg = SET_DEST (def_set);
 807
 808       /* If the insn uses the reg that it defines, the substitution is
 809          invalid.  */
 810       FOR_EACH_INSN_INFO_USE (use, insn_info)
 811         if (rtx_equal_p (DF_REF_REG (use), def_reg))
 812           return false;
 813       FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
 814         if (rtx_equal_p (DF_REF_REG (use), def_reg))
 815           return false;
 816     }
 817   else
 818     {
 819       rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
 820
 821       /* Look at all the uses of DEF_INSN, and see if they are not
 822          killed between DEF_INSN and TARGET_INSN.  */
 823       FOR_EACH_INSN_INFO_USE (use, insn_info)
 824         {
 825           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 826             return false;
 827           if (use_killed_between (use, def_insn, target_insn))
 828             return false;
 829         }
 830       FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
 831         {
 832           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 833             return false;
 834           if (use_killed_between (use, def_insn, target_insn))
 835             return false;
 836         }
 837     }
 838
 839   return true;
 840 }
 841
 842 \f
 843 static df_ref *active_defs;
 844 static sparseset active_defs_check;
 845
 846 /* Fill the ACTIVE_DEFS array with the use->def link for the registers
 847    mentioned in USE_REC.  Register the valid entries in ACTIVE_DEFS_CHECK
 848    too, for checking purposes.  */
 849
 850 static void
 851 register_active_defs (df_ref use)
 852 {
 853   for (; use; use = DF_REF_NEXT_LOC (use))
 854     {
 855       df_ref def = get_def_for_use (use);
 856       int regno = DF_REF_REGNO (use);
 857
 858       if (flag_checking)
 859         sparseset_set_bit (active_defs_check, regno);
 860       active_defs[regno] = def;
 861     }
 862 }
 863
 864
 865 /* Build the use->def links that we use to update the dataflow info
 866    for new uses.  Note that building the links is very cheap and if
 867    it were done earlier, they could be used to rule out invalid
 868    propagations (in addition to what is done in all_uses_available_at).
 869    I'm not doing this yet, though.  */
 870
 871 static void
 872 update_df_init (rtx_insn *def_insn, rtx_insn *insn)
 873 {
 874   if (flag_checking)
 875     sparseset_clear (active_defs_check);
 876   register_active_defs (DF_INSN_USES (def_insn));
 877   register_active_defs (DF_INSN_USES (insn));
 878   register_active_defs (DF_INSN_EQ_USES (insn));
 879 }
 880
 881
 882 /* Update the USE_DEF_REF array for the given use, using the active definitions
 883    in the ACTIVE_DEFS array to match pseudos to their def. */
 884
 885 static inline void
 886 update_uses (df_ref use)
 887 {
 888   for (; use; use = DF_REF_NEXT_LOC (use))
 889     {
 890       int regno = DF_REF_REGNO (use);
 891
 892       /* Set up the use-def chain.  */
 893       if (DF_REF_ID (use) >= (int) use_def_ref.length ())
 894         use_def_ref.safe_grow_cleared (DF_REF_ID (use) + 1);
 895
 896       gcc_checking_assert (sparseset_bit_p (active_defs_check, regno));
 897       use_def_ref[DF_REF_ID (use)] = active_defs[regno];
 898     }
 899 }
 900
 901
 902 /* Update the USE_DEF_REF array for the uses in INSN.  Only update note
 903    uses if NOTES_ONLY is true.  */
 904
 905 static void
 906 update_df (rtx_insn *insn, rtx note)
 907 {
 908   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
 909
 910   if (note)
 911     {
 912       df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE);
 913       df_notes_rescan (insn);
 914     }
 915   else
 916     {
 917       df_uses_create (&PATTERN (insn), insn, 0);
 918       df_insn_rescan (insn);
 919       update_uses (DF_INSN_INFO_USES (insn_info));
 920     }
 921
 922   update_uses (DF_INSN_INFO_EQ_USES (insn_info));
 923 }
 924
 925
 926 /* Try substituting NEW into LOC, which originated from forward propagation
 927    of USE's value from DEF_INSN.  SET_REG_EQUAL says whether we are
 928    substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
 929    new insn is not recognized.  Return whether the substitution was
 930    performed.  */
 931
 932 static bool
 933 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn,
 934                   bool set_reg_equal)
 935 {
 936   rtx_insn *insn = DF_REF_INSN (use);
 937   rtx set = single_set (insn);
 938   rtx note = NULL_RTX;
 939   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
 940   int old_cost = 0;
 941   bool ok;
 942
 943   update_df_init (def_insn, insn);
 944
 945   /* forward_propagate_subreg may be operating on an instruction with
 946      multiple sets.  If so, assume the cost of the new instruction is
 947      not greater than the old one.  */
 948   if (set)
 949     old_cost = set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed);
 950   if (dump_file)
 951     {
 952       fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
 953       print_inline_rtx (dump_file, *loc, 2);
 954       fprintf (dump_file, "\n with ");
 955       print_inline_rtx (dump_file, new_rtx, 2);
 956       fprintf (dump_file, "\n");
 957     }
 958
 959   validate_unshare_change (insn, loc, new_rtx, true);
 960   if (!verify_changes (0))
 961     {
 962       if (dump_file)
 963         fprintf (dump_file, "Changes to insn %d not recognized\n",
 964                  INSN_UID (insn));
 965       ok = false;
 966     }
 967
 968   else if (DF_REF_TYPE (use) == DF_REF_REG_USE
 969            && set
 970            && (set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed)
 971                > old_cost))
 972     {
 973       if (dump_file)
 974         fprintf (dump_file, "Changes to insn %d not profitable\n",
 975                  INSN_UID (insn));
 976       ok = false;
 977     }
 978
 979   else
 980     {
 981       if (dump_file)
 982         fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
 983       ok = true;
 984     }
 985
 986   if (ok)
 987     {
 988       confirm_change_group ();
 989       num_changes++;
 990     }
 991   else
 992     {
 993       cancel_changes (0);
 994
 995       /* Can also record a simplified value in a REG_EQUAL note,
 996          making a new one if one does not already exist.  */
 997       if (set_reg_equal)
 998         {
 999           if (dump_file)
1000             fprintf (dump_file, " Setting REG_EQUAL note\n");
1001
1002           note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
1003         }
1004     }
1005
1006   if ((ok || note) && !CONSTANT_P (new_rtx))
1007     update_df (insn, note);
1008
1009   return ok;
1010 }
1011
1012 /* For the given single_set INSN, containing SRC known to be a
1013    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
1014    is redundant due to the register being set by a LOAD_EXTEND_OP
1015    load from memory.  */
1016
1017 static bool
1018 free_load_extend (rtx src, rtx_insn *insn)
1019 {
1020   rtx reg;
1021   df_ref def, use;
1022
1023   reg = XEXP (src, 0);
1024 #ifdef LOAD_EXTEND_OP
1025   if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src))
1026 #endif
1027     return false;
1028
1029   FOR_EACH_INSN_USE (use, insn)
1030     if (!DF_REF_IS_ARTIFICIAL (use)
1031         && DF_REF_TYPE (use) == DF_REF_REG_USE
1032         && DF_REF_REG (use) == reg)
1033       break;
1034   if (!use)
1035     return false;
1036
1037   def = get_def_for_use (use);
1038   if (!def)
1039     return false;
1040
1041   if (DF_REF_IS_ARTIFICIAL (def))
1042     return false;
1043
1044   if (NONJUMP_INSN_P (DF_REF_INSN (def)))
1045     {
1046       rtx patt = PATTERN (DF_REF_INSN (def));
1047
1048       if (GET_CODE (patt) == SET
1049           && GET_CODE (SET_SRC (patt)) == MEM
1050           && rtx_equal_p (SET_DEST (patt), reg))
1051         return true;
1052     }
1053   return false;
1054 }
1055
1056 /* If USE is a subreg, see if it can be replaced by a pseudo.  */
1057
1058 static bool
1059 forward_propagate_subreg (df_ref use, rtx_insn *def_insn, rtx def_set)
1060 {
1061   rtx use_reg = DF_REF_REG (use);
1062   rtx_insn *use_insn;
1063   rtx src;
1064
1065   /* Only consider subregs... */
1066   machine_mode use_mode = GET_MODE (use_reg);
1067   if (GET_CODE (use_reg) != SUBREG
1068       || !REG_P (SET_DEST (def_set)))
1069     return false;
1070
1071   /* If this is a paradoxical SUBREG...  */
1072   if (GET_MODE_SIZE (use_mode)
1073       > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
1074     {
1075       /* If this is a paradoxical SUBREG, we have no idea what value the
1076          extra bits would have.  However, if the operand is equivalent to
1077          a SUBREG whose operand is the same as our mode, and all the modes
1078          are within a word, we can just use the inner operand because
1079          these SUBREGs just say how to treat the register.  */
1080       use_insn = DF_REF_INSN (use);
1081       src = SET_SRC (def_set);
1082       if (GET_CODE (src) == SUBREG
1083           && REG_P (SUBREG_REG (src))
1084           && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
1085           && GET_MODE (SUBREG_REG (src)) == use_mode
1086           && subreg_lowpart_p (src)
1087           && all_uses_available_at (def_insn, use_insn))
1088         return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
1089                                  def_insn, false);
1090     }
1091
1092   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
1093      is the low part of the reg being extended then just use the inner
1094      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
1095      be removed due to it matching a LOAD_EXTEND_OP load from memory,
1096      or due to the operation being a no-op when applied to registers.
1097      For example, if we have:
1098
1099          A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
1100          B: (... (subreg:SI (reg:DI X)) ...)
1101
1102      and mode_rep_extended says that Y is already sign-extended,
1103      the backend will typically allow A to be combined with the
1104      definition of Y or, failing that, allow A to be deleted after
1105      reload through register tying.  Introducing more uses of Y
1106      prevents both optimisations.  */
1107   else if (subreg_lowpart_p (use_reg))
1108     {
1109       use_insn = DF_REF_INSN (use);
1110       src = SET_SRC (def_set);
1111       if ((GET_CODE (src) == ZERO_EXTEND
1112            || GET_CODE (src) == SIGN_EXTEND)
1113           && REG_P (XEXP (src, 0))
1114           && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
1115           && GET_MODE (XEXP (src, 0)) == use_mode
1116           && !free_load_extend (src, def_insn)
1117           && (targetm.mode_rep_extended (use_mode, GET_MODE (src))
1118               != (int) GET_CODE (src))
1119           && all_uses_available_at (def_insn, use_insn))
1120         return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
1121                                  def_insn, false);
1122     }
1123
1124   return false;
1125 }
1126
1127 /* Try to replace USE with SRC (defined in DEF_INSN) in __asm.  */
1128
1129 static bool
1130 forward_propagate_asm (df_ref use, rtx_insn *def_insn, rtx def_set, rtx reg)
1131 {
1132   rtx_insn *use_insn = DF_REF_INSN (use);
1133   rtx src, use_pat, asm_operands, new_rtx, *loc;
1134   int speed_p, i;
1135   df_ref uses;
1136
1137   gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
1138
1139   src = SET_SRC (def_set);
1140   use_pat = PATTERN (use_insn);
1141
1142   /* In __asm don't replace if src might need more registers than
1143      reg, as that could increase register pressure on the __asm.  */
1144   uses = DF_INSN_USES (def_insn);
1145   if (uses && DF_REF_NEXT_LOC (uses))
1146     return false;
1147
1148   update_df_init (def_insn, use_insn);
1149   speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
1150   asm_operands = NULL_RTX;
1151   switch (GET_CODE (use_pat))
1152     {
1153     case ASM_OPERANDS:
1154       asm_operands = use_pat;
1155       break;
1156     case SET:
1157       if (MEM_P (SET_DEST (use_pat)))
1158         {
1159           loc = &SET_DEST (use_pat);
1160           new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1161           if (new_rtx)
1162             validate_unshare_change (use_insn, loc, new_rtx, true);
1163         }
1164       asm_operands = SET_SRC (use_pat);
1165       break;
1166     case PARALLEL:
1167       for (i = 0; i < XVECLEN (use_pat, 0); i++)
1168         if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1169           {
1170             if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1171               {
1172                 loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1173                 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1174                                          src, speed_p);
1175                 if (new_rtx)
1176                   validate_unshare_change (use_insn, loc, new_rtx, true);
1177               }
1178             asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1179           }
1180         else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1181           asm_operands = XVECEXP (use_pat, 0, i);
1182       break;
1183     default:
1184       gcc_unreachable ();
1185     }
1186
1187   gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1188   for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1189     {
1190       loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1191       new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1192       if (new_rtx)
1193         validate_unshare_change (use_insn, loc, new_rtx, true);
1194     }
1195
1196   if (num_changes_pending () == 0 || !apply_change_group ())
1197     return false;
1198
1199   update_df (use_insn, NULL);
1200   num_changes++;
1201   return true;
1202 }
1203
1204 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1205    result.  */
1206
1207 static bool
1208 forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set)
1209 {
1210   rtx_insn *use_insn = DF_REF_INSN (use);
1211   rtx use_set = single_set (use_insn);
1212   rtx src, reg, new_rtx, *loc;
1213   bool set_reg_equal;
1214   machine_mode mode;
1215   int asm_use = -1;
1216
1217   if (INSN_CODE (use_insn) < 0)
1218     asm_use = asm_noperands (PATTERN (use_insn));
1219
1220   if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
1221     return false;
1222
1223   /* Do not propagate into PC, CC0, etc.  */
1224   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1225     return false;
1226
1227   /* If def and use are subreg, check if they match.  */
1228   reg = DF_REF_REG (use);
1229   if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG)
1230     {
1231       if (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg))
1232         return false;
1233     }
1234   /* Check if the def had a subreg, but the use has the whole reg.  */
1235   else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1236     return false;
1237   /* Check if the use has a subreg, but the def had the whole reg.  Unlike the
1238      previous case, the optimization is possible and often useful indeed.  */
1239   else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1240     reg = SUBREG_REG (reg);
1241
1242   /* Make sure that we can treat REG as having the same mode as the
1243      source of DEF_SET.  */
1244   if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))
1245     return false;
1246
1247   /* Check if the substitution is valid (last, because it's the most
1248      expensive check!).  */
1249   src = SET_SRC (def_set);
1250   if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1251     return false;
1252
1253   /* Check if the def is loading something from the constant pool; in this
1254      case we would undo optimization such as compress_float_constant.
1255      Still, we can set a REG_EQUAL note.  */
1256   if (MEM_P (src) && MEM_READONLY_P (src))
1257     {
1258       rtx x = avoid_constant_pool_reference (src);
1259       if (x != src && use_set)
1260         {
1261           rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1262           rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1263           rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1264           if (old_rtx != new_rtx)
1265             set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1266         }
1267       return false;
1268     }
1269
1270   if (asm_use >= 0)
1271     return forward_propagate_asm (use, def_insn, def_set, reg);
1272
1273   /* Else try simplifying.  */
1274
1275   if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1276     {
1277       loc = &SET_DEST (use_set);
1278       set_reg_equal = false;
1279     }
1280   else if (!use_set)
1281     {
1282       loc = &INSN_VAR_LOCATION_LOC (use_insn);
1283       set_reg_equal = false;
1284     }
1285   else
1286     {
1287       rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1288       if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1289         loc = &XEXP (note, 0);
1290       else
1291         loc = &SET_SRC (use_set);
1292
1293       /* Do not replace an existing REG_EQUAL note if the insn is not
1294          recognized.  Either we're already replacing in the note, or we'll
1295          separately try plugging the definition in the note and simplifying.
1296          And only install a REQ_EQUAL note when the destination is a REG
1297          that isn't mentioned in USE_SET, as the note would be invalid
1298          otherwise.  We also don't want to install a note if we are merely
1299          propagating a pseudo since verifying that this pseudo isn't dead
1300          is a pain; moreover such a note won't help anything.  */
1301       set_reg_equal = (note == NULL_RTX
1302                        && REG_P (SET_DEST (use_set))
1303                        && !REG_P (src)
1304                        && !(GET_CODE (src) == SUBREG
1305                             && REG_P (SUBREG_REG (src)))
1306                        && !reg_mentioned_p (SET_DEST (use_set),
1307                                             SET_SRC (use_set)));
1308     }
1309
1310   if (GET_MODE (*loc) == VOIDmode)
1311     mode = GET_MODE (SET_DEST (use_set));
1312   else
1313     mode = GET_MODE (*loc);
1314
1315   new_rtx = propagate_rtx (*loc, mode, reg, src,
1316                            optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1317
1318   if (!new_rtx)
1319     return false;
1320
1321   return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1322 }
1323
1324
1325 /* Given a use USE of an insn, if it has a single reaching
1326    definition, try to forward propagate it into that insn.
1327    Return true if cfg cleanup will be needed.  */
1328
1329 static bool
1330 forward_propagate_into (df_ref use)
1331 {
1332   df_ref def;
1333   rtx_insn *def_insn, *use_insn;
1334   rtx def_set;
1335   rtx parent;
1336
1337   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1338     return false;
1339   if (DF_REF_IS_ARTIFICIAL (use))
1340     return false;
1341
1342   /* Only consider uses that have a single definition.  */
1343   def = get_def_for_use (use);
1344   if (!def)
1345     return false;
1346   if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1347     return false;
1348   if (DF_REF_IS_ARTIFICIAL (def))
1349     return false;
1350
1351   /* Do not propagate loop invariant definitions inside the loop.  */
1352   if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1353     return false;
1354
1355   /* Check if the use is still present in the insn!  */
1356   use_insn = DF_REF_INSN (use);
1357   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1358     parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1359   else
1360     parent = PATTERN (use_insn);
1361
1362   if (!reg_mentioned_p (DF_REF_REG (use), parent))
1363     return false;
1364
1365   def_insn = DF_REF_INSN (def);
1366   if (multiple_sets (def_insn))
1367     return false;
1368   def_set = single_set (def_insn);
1369   if (!def_set)
1370     return false;
1371
1372   /* Only try one kind of propagation.  If two are possible, we'll
1373      do it on the following iterations.  */
1374   if (forward_propagate_and_simplify (use, def_insn, def_set)
1375       || forward_propagate_subreg (use, def_insn, def_set))
1376     {
1377       if (cfun->can_throw_non_call_exceptions
1378           && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX)
1379           && purge_dead_edges (DF_REF_BB (use)))
1380         return true;
1381     }
1382   return false;
1383 }
1384
1385 \f
1386 static void
1387 fwprop_init (void)
1388 {
1389   num_changes = 0;
1390   calculate_dominance_info (CDI_DOMINATORS);
1391
1392   /* We do not always want to propagate into loops, so we have to find
1393      loops and be careful about them.  Avoid CFG modifications so that
1394      we don't have to update dominance information afterwards for
1395      build_single_def_use_links.  */
1396   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
1397
1398   build_single_def_use_links ();
1399   df_set_flags (DF_DEFER_INSN_RESCAN);
1400
1401   active_defs = XNEWVEC (df_ref, max_reg_num ());
1402   if (flag_checking)
1403     active_defs_check = sparseset_alloc (max_reg_num ());
1404 }
1405
1406 static void
1407 fwprop_done (void)
1408 {
1409   loop_optimizer_finalize ();
1410
1411   use_def_ref.release ();
1412   free (active_defs);
1413   if (flag_checking)
1414     sparseset_free (active_defs_check);
1415
1416   free_dominance_info (CDI_DOMINATORS);
1417   cleanup_cfg (0);
1418   delete_trivially_dead_insns (get_insns (), max_reg_num ());
1419
1420   if (dump_file)
1421     fprintf (dump_file,
1422              "\nNumber of successful forward propagations: %d\n\n",
1423              num_changes);
1424 }
1425
1426
1427 /* Main entry point.  */
1428
1429 static bool
1430 gate_fwprop (void)
1431 {
1432   return optimize > 0 && flag_forward_propagate;
1433 }
1434
1435 static unsigned int
1436 fwprop (void)
1437 {
1438   unsigned i;
1439   bool need_cleanup = false;
1440
1441   fwprop_init ();
1442
1443   /* Go through all the uses.  df_uses_create will create new ones at the
1444      end, and we'll go through them as well.
1445
1446      Do not forward propagate addresses into loops until after unrolling.
1447      CSE did so because it was able to fix its own mess, but we are not.  */
1448
1449   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1450     {
1451       df_ref use = DF_USES_GET (i);
1452       if (use)
1453         if (DF_REF_TYPE (use) == DF_REF_REG_USE
1454             || DF_REF_BB (use)->loop_father == NULL
1455             /* The outer most loop is not really a loop.  */
1456             || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1457           need_cleanup |= forward_propagate_into (use);
1458     }
1459
1460   fwprop_done ();
1461   if (need_cleanup)
1462     cleanup_cfg (0);
1463   return 0;
1464 }
1465
1466 namespace {
1467
1468 const pass_data pass_data_rtl_fwprop =
1469 {
1470   RTL_PASS, /* type */
1471   "fwprop1", /* name */
1472   OPTGROUP_NONE, /* optinfo_flags */
1473   TV_FWPROP, /* tv_id */
1474   0, /* properties_required */
1475   0, /* properties_provided */
1476   0, /* properties_destroyed */
1477   0, /* todo_flags_start */
1478   TODO_df_finish, /* todo_flags_finish */
1479 };
1480
1481 class pass_rtl_fwprop : public rtl_opt_pass
1482 {
1483 public:
1484   pass_rtl_fwprop (gcc::context *ctxt)
1485     : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1486   {}
1487
1488   /* opt_pass methods: */
1489   virtual bool gate (function *) { return gate_fwprop (); }
1490   virtual unsigned int execute (function *) { return fwprop (); }
1491
1492 }; // class pass_rtl_fwprop
1493
1494 } // anon namespace
1495
1496 rtl_opt_pass *
1497 make_pass_rtl_fwprop (gcc::context *ctxt)
1498 {
1499   return new pass_rtl_fwprop (ctxt);
1500 }
1501
1502 static unsigned int
1503 fwprop_addr (void)
1504 {
1505   unsigned i;
1506   bool need_cleanup = false;
1507
1508   fwprop_init ();
1509
1510   /* Go through all the uses.  df_uses_create will create new ones at the
1511      end, and we'll go through them as well.  */
1512   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1513     {
1514       df_ref use = DF_USES_GET (i);
1515       if (use)
1516         if (DF_REF_TYPE (use) != DF_REF_REG_USE
1517             && DF_REF_BB (use)->loop_father != NULL
1518             /* The outer most loop is not really a loop.  */
1519             && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1520           need_cleanup |= forward_propagate_into (use);
1521     }
1522
1523   fwprop_done ();
1524
1525   if (need_cleanup)
1526     cleanup_cfg (0);
1527   return 0;
1528 }
1529
1530 namespace {
1531
1532 const pass_data pass_data_rtl_fwprop_addr =
1533 {
1534   RTL_PASS, /* type */
1535   "fwprop2", /* name */
1536   OPTGROUP_NONE, /* optinfo_flags */
1537   TV_FWPROP, /* tv_id */
1538   0, /* properties_required */
1539   0, /* properties_provided */
1540   0, /* properties_destroyed */
1541   0, /* todo_flags_start */
1542   TODO_df_finish, /* todo_flags_finish */
1543 };
1544
1545 class pass_rtl_fwprop_addr : public rtl_opt_pass
1546 {
1547 public:
1548   pass_rtl_fwprop_addr (gcc::context *ctxt)
1549     : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1550   {}
1551
1552   /* opt_pass methods: */
1553   virtual bool gate (function *) { return gate_fwprop (); }
1554   virtual unsigned int execute (function *) { return fwprop_addr (); }
1555
1556 }; // class pass_rtl_fwprop_addr
1557
1558 } // anon namespace
1559
1560 rtl_opt_pass *
1561 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1562 {
1563   return new pass_rtl_fwprop_addr (ctxt);
1564 }