gcc/fwprop.c

   1 /* RTL-based forward propagation pass for GNU compiler.
   2    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4    Contributed by Paolo Bonzini and Steven Bosscher.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27
  28 #include "sparseset.h"
  29 #include "timevar.h"
  30 #include "rtl.h"
  31 #include "tm_p.h"
  32 #include "insn-config.h"
  33 #include "recog.h"
  34 #include "flags.h"
  35 #include "obstack.h"
  36 #include "basic-block.h"
  37 #include "output.h"
  38 #include "df.h"
  39 #include "target.h"
  40 #include "cfgloop.h"
  41 #include "tree-pass.h"
  42 #include "domwalk.h"
  43 #include "emit-rtl.h"
  44
  45
  46 /* This pass does simple forward propagation and simplification when an
  47    operand of an insn can only come from a single def.  This pass uses
  48    df.c, so it is global.  However, we only do limited analysis of
  49    available expressions.
  50
  51    1) The pass tries to propagate the source of the def into the use,
  52    and checks if the result is independent of the substituted value.
  53    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
  54    zero, independent of the source register.
  55
  56    In particular, we propagate constants into the use site.  Sometimes
  57    RTL expansion did not put the constant in the same insn on purpose,
  58    to satisfy a predicate, and the result will fail to be recognized;
  59    but this happens rarely and in this case we can still create a
  60    REG_EQUAL note.  For multi-word operations, this
  61
  62       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
  63       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
  64       (set (subreg:SI (reg:DI 122) 0)
  65          (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
  66       (set (subreg:SI (reg:DI 122) 4)
  67          (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
  68
  69    can be simplified to the much simpler
  70
  71       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
  72       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
  73
  74    This particular propagation is also effective at putting together
  75    complex addressing modes.  We are more aggressive inside MEMs, in
  76    that all definitions are propagated if the use is in a MEM; if the
  77    result is a valid memory address we check address_cost to decide
  78    whether the substitution is worthwhile.
  79
  80    2) The pass propagates register copies.  This is not as effective as
  81    the copy propagation done by CSE's canon_reg, which works by walking
  82    the instruction chain, it can help the other transformations.
  83
  84    We should consider removing this optimization, and instead reorder the
  85    RTL passes, because GCSE does this transformation too.  With some luck,
  86    the CSE pass at the end of rest_of_handle_gcse could also go away.
  87
  88    3) The pass looks for paradoxical subregs that are actually unnecessary.
  89    Things like this:
  90
  91      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
  92      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
  93      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
  94                                 (subreg:SI (reg:QI 121) 0)))
  95
  96    are very common on machines that can only do word-sized operations.
  97    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
  98    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
  99    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
 100    above will simplify this to
 101
 102      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
 103      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
 104      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
 105
 106    where the first two insns are now dead.
 107
 108    We used to use reaching definitions to find which uses have a
 109    single reaching definition (sounds obvious...), but this is too
 110    complex a problem in nasty testcases like PR33928.  Now we use the
 111    multiple definitions problem in df-problems.c.  The similarity
 112    between that problem and SSA form creation is taken further, in
 113    that fwprop does a dominator walk to create its chains; however,
 114    instead of creating a PHI function where multiple definitions meet
 115    I just punt and record only singleton use-def chains, which is
 116    all that is needed by fwprop.  */
 117
 118
 119 static int num_changes;
 120
 121 DEF_VEC_P(df_ref);
 122 DEF_VEC_ALLOC_P(df_ref,heap);
 123 static VEC(df_ref,heap) *use_def_ref;
 124 static VEC(df_ref,heap) *reg_defs;
 125 static VEC(df_ref,heap) *reg_defs_stack;
 126
 127 /* The MD bitmaps are trimmed to include only live registers to cut
 128    memory usage on testcases like insn-recog.c.  Track live registers
 129    in the basic block and do not perform forward propagation if the
 130    destination is a dead pseudo occurring in a note.  */
 131 static bitmap local_md;
 132 static bitmap local_lr;
 133
 134 /* Return the only def in USE's use-def chain, or NULL if there is
 135    more than one def in the chain.  */
 136
 137 static inline df_ref
 138 get_def_for_use (df_ref use)
 139 {
 140   return VEC_index (df_ref, use_def_ref, DF_REF_ID (use));
 141 }
 142
 143
 144 /* Update the reg_defs vector with non-partial definitions in DEF_REC.
 145    TOP_FLAG says which artificials uses should be used, when DEF_REC
 146    is an artificial def vector.  LOCAL_MD is modified as after a
 147    df_md_simulate_* function; we do more or less the same processing
 148    done there, so we do not use those functions.  */
 149
 150 #define DF_MD_GEN_FLAGS \
 151         (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
 152
 153 static void
 154 process_defs (df_ref *def_rec, int top_flag)
 155 {
 156   df_ref def;
 157   while ((def = *def_rec++) != NULL)
 158     {
 159       df_ref curr_def = VEC_index (df_ref, reg_defs, DF_REF_REGNO (def));
 160       unsigned int dregno;
 161
 162       if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
 163         continue;
 164
 165       dregno = DF_REF_REGNO (def);
 166       if (curr_def)
 167         VEC_safe_push (df_ref, heap, reg_defs_stack, curr_def);
 168       else
 169         {
 170           /* Do not store anything if "transitioning" from NULL to NULL.  But
 171              otherwise, push a special entry on the stack to tell the
 172              leave_block callback that the entry in reg_defs was NULL.  */
 173           if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 174             ;
 175           else
 176             VEC_safe_push (df_ref, heap, reg_defs_stack, def);
 177         }
 178
 179       if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 180         {
 181           bitmap_set_bit (local_md, dregno);
 182           VEC_replace (df_ref, reg_defs, dregno, NULL);
 183         }
 184       else
 185         {
 186           bitmap_clear_bit (local_md, dregno);
 187           VEC_replace (df_ref, reg_defs, dregno, def);
 188         }
 189     }
 190 }
 191
 192
 193 /* Fill the use_def_ref vector with values for the uses in USE_REC,
 194    taking reaching definitions info from LOCAL_MD and REG_DEFS.
 195    TOP_FLAG says which artificials uses should be used, when USE_REC
 196    is an artificial use vector.  */
 197
 198 static void
 199 process_uses (df_ref *use_rec, int top_flag)
 200 {
 201   df_ref use;
 202   while ((use = *use_rec++) != NULL)
 203     if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
 204       {
 205         unsigned int uregno = DF_REF_REGNO (use);
 206         if (VEC_index (df_ref, reg_defs, uregno)
 207             && !bitmap_bit_p (local_md, uregno)
 208             && bitmap_bit_p (local_lr, uregno))
 209           VEC_replace (df_ref, use_def_ref, DF_REF_ID (use),
 210                        VEC_index (df_ref, reg_defs, uregno));
 211       }
 212 }
 213
 214
 215 static void
 216 single_def_use_enter_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 217                             basic_block bb)
 218 {
 219   int bb_index = bb->index;
 220   struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
 221   struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
 222   rtx insn;
 223
 224   bitmap_copy (local_md, &md_bb_info->in);
 225   bitmap_copy (local_lr, &lr_bb_info->in);
 226
 227   /* Push a marker for the leave_block callback.  */
 228   VEC_safe_push (df_ref, heap, reg_defs_stack, NULL);
 229
 230   process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
 231   process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
 232
 233   /* We don't call df_simulate_initialize_forwards, as it may overestimate
 234      the live registers if there are unused artificial defs.  We prefer
 235      liveness to be underestimated.  */
 236
 237   FOR_BB_INSNS (bb, insn)
 238     if (INSN_P (insn))
 239       {
 240         unsigned int uid = INSN_UID (insn);
 241         process_uses (DF_INSN_UID_USES (uid), 0);
 242         process_uses (DF_INSN_UID_EQ_USES (uid), 0);
 243         process_defs (DF_INSN_UID_DEFS (uid), 0);
 244         df_simulate_one_insn_forwards (bb, insn, local_lr);
 245       }
 246
 247   process_uses (df_get_artificial_uses (bb_index), 0);
 248   process_defs (df_get_artificial_defs (bb_index), 0);
 249 }
 250
 251 /* Pop the definitions created in this basic block when leaving its
 252    dominated parts.  */
 253
 254 static void
 255 single_def_use_leave_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 256                             basic_block bb ATTRIBUTE_UNUSED)
 257 {
 258   df_ref saved_def;
 259   while ((saved_def = VEC_pop (df_ref, reg_defs_stack)) != NULL)
 260     {
 261       unsigned int dregno = DF_REF_REGNO (saved_def);
 262
 263       /* See also process_defs.  */
 264       if (saved_def == VEC_index (df_ref, reg_defs, dregno))
 265         VEC_replace (df_ref, reg_defs, dregno, NULL);
 266       else
 267         VEC_replace (df_ref, reg_defs, dregno, saved_def);
 268     }
 269 }
 270
 271
 272 /* Build a vector holding the reaching definitions of uses reached by a
 273    single dominating definition.  */
 274
 275 static void
 276 build_single_def_use_links (void)
 277 {
 278   struct dom_walk_data walk_data;
 279
 280   /* We use the multiple definitions problem to compute our restricted
 281      use-def chains.  */
 282   df_set_flags (DF_EQ_NOTES);
 283   df_md_add_problem ();
 284   df_note_add_problem ();
 285   df_analyze ();
 286   df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
 287
 288   use_def_ref = VEC_alloc (df_ref, heap, DF_USES_TABLE_SIZE ());
 289   VEC_safe_grow_cleared (df_ref, heap, use_def_ref, DF_USES_TABLE_SIZE ());
 290
 291   reg_defs = VEC_alloc (df_ref, heap, max_reg_num ());
 292   VEC_safe_grow_cleared (df_ref, heap, reg_defs, max_reg_num ());
 293
 294   reg_defs_stack = VEC_alloc (df_ref, heap, n_basic_blocks * 10);
 295   local_md = BITMAP_ALLOC (NULL);
 296   local_lr = BITMAP_ALLOC (NULL);
 297
 298   /* Walk the dominator tree looking for single reaching definitions
 299      dominating the uses.  This is similar to how SSA form is built.  */
 300   walk_data.dom_direction = CDI_DOMINATORS;
 301   walk_data.initialize_block_local_data = NULL;
 302   walk_data.before_dom_children = single_def_use_enter_block;
 303   walk_data.after_dom_children = single_def_use_leave_block;
 304
 305   init_walk_dominator_tree (&walk_data);
 306   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
 307   fini_walk_dominator_tree (&walk_data);
 308
 309   BITMAP_FREE (local_lr);
 310   BITMAP_FREE (local_md);
 311   VEC_free (df_ref, heap, reg_defs);
 312   VEC_free (df_ref, heap, reg_defs_stack);
 313 }
 314
 315 \f
 316 /* Do not try to replace constant addresses or addresses of local and
 317    argument slots.  These MEM expressions are made only once and inserted
 318    in many instructions, as well as being used to control symbol table
 319    output.  It is not safe to clobber them.
 320
 321    There are some uncommon cases where the address is already in a register
 322    for some reason, but we cannot take advantage of that because we have
 323    no easy way to unshare the MEM.  In addition, looking up all stack
 324    addresses is costly.  */
 325
 326 static bool
 327 can_simplify_addr (rtx addr)
 328 {
 329   rtx reg;
 330
 331   if (CONSTANT_ADDRESS_P (addr))
 332     return false;
 333
 334   if (GET_CODE (addr) == PLUS)
 335     reg = XEXP (addr, 0);
 336   else
 337     reg = addr;
 338
 339   return (!REG_P (reg)
 340           || (REGNO (reg) != FRAME_POINTER_REGNUM
 341               && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
 342               && REGNO (reg) != ARG_POINTER_REGNUM));
 343 }
 344
 345 /* Returns a canonical version of X for the address, from the point of view,
 346    that all multiplications are represented as MULT instead of the multiply
 347    by a power of 2 being represented as ASHIFT.
 348
 349    Every ASHIFT we find has been made by simplify_gen_binary and was not
 350    there before, so it is not shared.  So we can do this in place.  */
 351
 352 static void
 353 canonicalize_address (rtx x)
 354 {
 355   for (;;)
 356     switch (GET_CODE (x))
 357       {
 358       case ASHIFT:
 359         if (CONST_INT_P (XEXP (x, 1))
 360             && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
 361             && INTVAL (XEXP (x, 1)) >= 0)
 362           {
 363             HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
 364             PUT_CODE (x, MULT);
 365             XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
 366                                         GET_MODE (x));
 367           }
 368
 369         x = XEXP (x, 0);
 370         break;
 371
 372       case PLUS:
 373         if (GET_CODE (XEXP (x, 0)) == PLUS
 374             || GET_CODE (XEXP (x, 0)) == ASHIFT
 375             || GET_CODE (XEXP (x, 0)) == CONST)
 376           canonicalize_address (XEXP (x, 0));
 377
 378         x = XEXP (x, 1);
 379         break;
 380
 381       case CONST:
 382         x = XEXP (x, 0);
 383         break;
 384
 385       default:
 386         return;
 387       }
 388 }
 389
 390 /* OLD is a memory address.  Return whether it is good to use NEW instead,
 391    for a memory access in the given MODE.  */
 392
 393 static bool
 394 should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode,
 395                         addr_space_t as, bool speed)
 396 {
 397   int gain;
 398
 399   if (rtx_equal_p (old_rtx, new_rtx)
 400       || !memory_address_addr_space_p (mode, new_rtx, as))
 401     return false;
 402
 403   /* Copy propagation is always ok.  */
 404   if (REG_P (old_rtx) && REG_P (new_rtx))
 405     return true;
 406
 407   /* Prefer the new address if it is less expensive.  */
 408   gain = (address_cost (old_rtx, mode, as, speed)
 409           - address_cost (new_rtx, mode, as, speed));
 410
 411   /* If the addresses have equivalent cost, prefer the new address
 412      if it has the highest `set_src_cost'.  That has the potential of
 413      eliminating the most insns without additional costs, and it
 414      is the same that cse.c used to do.  */
 415   if (gain == 0)
 416     gain = set_src_cost (new_rtx, speed) - set_src_cost (old_rtx, speed);
 417
 418   return (gain > 0);
 419 }
 420
 421
 422 /* Flags for the last parameter of propagate_rtx_1.  */
 423
 424 enum {
 425   /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
 426      if it is false, propagate_rtx_1 returns false if, for at least
 427      one occurrence OLD, it failed to collapse the result to a constant.
 428      For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
 429      collapse to zero if replacing (reg:M B) with (reg:M A).
 430
 431      PR_CAN_APPEAR is disregarded inside MEMs: in that case,
 432      propagate_rtx_1 just tries to make cheaper and valid memory
 433      addresses.  */
 434   PR_CAN_APPEAR = 1,
 435
 436   /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
 437      outside memory addresses.  This is needed because propagate_rtx_1 does
 438      not do any analysis on memory; thus it is very conservative and in general
 439      it will fail if non-read-only MEMs are found in the source expression.
 440
 441      PR_HANDLE_MEM is set when the source of the propagation was not
 442      another MEM.  Then, it is safe not to treat non-read-only MEMs as
 443      ``opaque'' objects.  */
 444   PR_HANDLE_MEM = 2,
 445
 446   /* Set when costs should be optimized for speed.  */
 447   PR_OPTIMIZE_FOR_SPEED = 4
 448 };
 449
 450
 451 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
 452    resulting expression.  Replace *PX with a new RTL expression if an
 453    occurrence of OLD was found.
 454
 455    This is only a wrapper around simplify-rtx.c: do not add any pattern
 456    matching code here.  (The sole exception is the handling of LO_SUM, but
 457    that is because there is no simplify_gen_* function for LO_SUM).  */
 458
 459 static bool
 460 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 461 {
 462   rtx x = *px, tem = NULL_RTX, op0, op1, op2;
 463   enum rtx_code code = GET_CODE (x);
 464   enum machine_mode mode = GET_MODE (x);
 465   enum machine_mode op_mode;
 466   bool can_appear = (flags & PR_CAN_APPEAR) != 0;
 467   bool valid_ops = true;
 468
 469   if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
 470     {
 471       /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
 472          they have side effects or not).  */
 473       *px = (side_effects_p (x)
 474              ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
 475              : gen_rtx_SCRATCH (GET_MODE (x)));
 476       return false;
 477     }
 478
 479   /* If X is OLD_RTX, return NEW_RTX.  But not if replacing only within an
 480      address, and we are *not* inside one.  */
 481   if (x == old_rtx)
 482     {
 483       *px = new_rtx;
 484       return can_appear;
 485     }
 486
 487   /* If this is an expression, try recursive substitution.  */
 488   switch (GET_RTX_CLASS (code))
 489     {
 490     case RTX_UNARY:
 491       op0 = XEXP (x, 0);
 492       op_mode = GET_MODE (op0);
 493       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 494       if (op0 == XEXP (x, 0))
 495         return true;
 496       tem = simplify_gen_unary (code, mode, op0, op_mode);
 497       break;
 498
 499     case RTX_BIN_ARITH:
 500     case RTX_COMM_ARITH:
 501       op0 = XEXP (x, 0);
 502       op1 = XEXP (x, 1);
 503       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 504       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 505       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 506         return true;
 507       tem = simplify_gen_binary (code, mode, op0, op1);
 508       break;
 509
 510     case RTX_COMPARE:
 511     case RTX_COMM_COMPARE:
 512       op0 = XEXP (x, 0);
 513       op1 = XEXP (x, 1);
 514       op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
 515       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 516       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 517       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 518         return true;
 519       tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
 520       break;
 521
 522     case RTX_TERNARY:
 523     case RTX_BITFIELD_OPS:
 524       op0 = XEXP (x, 0);
 525       op1 = XEXP (x, 1);
 526       op2 = XEXP (x, 2);
 527       op_mode = GET_MODE (op0);
 528       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 529       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 530       valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
 531       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
 532         return true;
 533       if (op_mode == VOIDmode)
 534         op_mode = GET_MODE (op0);
 535       tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
 536       break;
 537
 538     case RTX_EXTRA:
 539       /* The only case we try to handle is a SUBREG.  */
 540       if (code == SUBREG)
 541         {
 542           op0 = XEXP (x, 0);
 543           valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 544           if (op0 == XEXP (x, 0))
 545             return true;
 546           tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
 547                                      SUBREG_BYTE (x));
 548         }
 549       break;
 550
 551     case RTX_OBJ:
 552       if (code == MEM && x != new_rtx)
 553         {
 554           rtx new_op0;
 555           op0 = XEXP (x, 0);
 556
 557           /* There are some addresses that we cannot work on.  */
 558           if (!can_simplify_addr (op0))
 559             return true;
 560
 561           op0 = new_op0 = targetm.delegitimize_address (op0);
 562           valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
 563                                         flags | PR_CAN_APPEAR);
 564
 565           /* Dismiss transformation that we do not want to carry on.  */
 566           if (!valid_ops
 567               || new_op0 == op0
 568               || !(GET_MODE (new_op0) == GET_MODE (op0)
 569                    || GET_MODE (new_op0) == VOIDmode))
 570             return true;
 571
 572           canonicalize_address (new_op0);
 573
 574           /* Copy propagations are always ok.  Otherwise check the costs.  */
 575           if (!(REG_P (old_rtx) && REG_P (new_rtx))
 576               && !should_replace_address (op0, new_op0, GET_MODE (x),
 577                                           MEM_ADDR_SPACE (x),
 578                                           flags & PR_OPTIMIZE_FOR_SPEED))
 579             return true;
 580
 581           tem = replace_equiv_address_nv (x, new_op0);
 582         }
 583
 584       else if (code == LO_SUM)
 585         {
 586           op0 = XEXP (x, 0);
 587           op1 = XEXP (x, 1);
 588
 589           /* The only simplification we do attempts to remove references to op0
 590              or make it constant -- in both cases, op0's invalidity will not
 591              make the result invalid.  */
 592           propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
 593           valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 594           if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 595             return true;
 596
 597           /* (lo_sum (high x) x) -> x  */
 598           if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
 599             tem = op1;
 600           else
 601             tem = gen_rtx_LO_SUM (mode, op0, op1);
 602
 603           /* OP1 is likely not a legitimate address, otherwise there would have
 604              been no LO_SUM.  We want it to disappear if it is invalid, return
 605              false in that case.  */
 606           return memory_address_p (mode, tem);
 607         }
 608
 609       else if (code == REG)
 610         {
 611           if (rtx_equal_p (x, old_rtx))
 612             {
 613               *px = new_rtx;
 614               return can_appear;
 615             }
 616         }
 617       break;
 618
 619     default:
 620       break;
 621     }
 622
 623   /* No change, no trouble.  */
 624   if (tem == NULL_RTX)
 625     return true;
 626
 627   *px = tem;
 628
 629   /* The replacement we made so far is valid, if all of the recursive
 630      replacements were valid, or we could simplify everything to
 631      a constant.  */
 632   return valid_ops || can_appear || CONSTANT_P (tem);
 633 }
 634
 635
 636 /* for_each_rtx traversal function that returns 1 if BODY points to
 637    a non-constant mem.  */
 638
 639 static int
 640 varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED)
 641 {
 642   rtx x = *body;
 643   return MEM_P (x) && !MEM_READONLY_P (x);
 644 }
 645
 646
 647 /* Replace all occurrences of OLD in X with NEW and try to simplify the
 648    resulting expression (in mode MODE).  Return a new expression if it is
 649    a constant, otherwise X.
 650
 651    Simplifications where occurrences of NEW collapse to a constant are always
 652    accepted.  All simplifications are accepted if NEW is a pseudo too.
 653    Otherwise, we accept simplifications that have a lower or equal cost.  */
 654
 655 static rtx
 656 propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx,
 657                bool speed)
 658 {
 659   rtx tem;
 660   bool collapsed;
 661   int flags;
 662
 663   if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
 664     return NULL_RTX;
 665
 666   flags = 0;
 667   if (REG_P (new_rtx)
 668       || CONSTANT_P (new_rtx)
 669       || (GET_CODE (new_rtx) == SUBREG
 670           && REG_P (SUBREG_REG (new_rtx))
 671           && (GET_MODE_SIZE (mode)
 672               <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (new_rtx))))))
 673     flags |= PR_CAN_APPEAR;
 674   if (!for_each_rtx (&new_rtx, varying_mem_p, NULL))
 675     flags |= PR_HANDLE_MEM;
 676
 677   if (speed)
 678     flags |= PR_OPTIMIZE_FOR_SPEED;
 679
 680   tem = x;
 681   collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
 682   if (tem == x || !collapsed)
 683     return NULL_RTX;
 684
 685   /* gen_lowpart_common will not be able to process VOIDmode entities other
 686      than CONST_INTs.  */
 687   if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
 688     return NULL_RTX;
 689
 690   if (GET_MODE (tem) == VOIDmode)
 691     tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
 692   else
 693     gcc_assert (GET_MODE (tem) == mode);
 694
 695   return tem;
 696 }
 697
 698
 699 \f
 700
 701 /* Return true if the register from reference REF is killed
 702    between FROM to (but not including) TO.  */
 703
 704 static bool
 705 local_ref_killed_between_p (df_ref ref, rtx from, rtx to)
 706 {
 707   rtx insn;
 708
 709   for (insn = from; insn != to; insn = NEXT_INSN (insn))
 710     {
 711       df_ref *def_rec;
 712       if (!INSN_P (insn))
 713         continue;
 714
 715       for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
 716         {
 717           df_ref def = *def_rec;
 718           if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
 719             return true;
 720         }
 721     }
 722   return false;
 723 }
 724
 725
 726 /* Check if the given DEF is available in INSN.  This would require full
 727    computation of available expressions; we check only restricted conditions:
 728    - if DEF is the sole definition of its register, go ahead;
 729    - in the same basic block, we check for no definitions killing the
 730      definition of DEF_INSN;
 731    - if USE's basic block has DEF's basic block as the sole predecessor,
 732      we check if the definition is killed after DEF_INSN or before
 733      TARGET_INSN insn, in their respective basic blocks.  */
 734 static bool
 735 use_killed_between (df_ref use, rtx def_insn, rtx target_insn)
 736 {
 737   basic_block def_bb = BLOCK_FOR_INSN (def_insn);
 738   basic_block target_bb = BLOCK_FOR_INSN (target_insn);
 739   int regno;
 740   df_ref def;
 741
 742   /* We used to have a def reaching a use that is _before_ the def,
 743      with the def not dominating the use even though the use and def
 744      are in the same basic block, when a register may be used
 745      uninitialized in a loop.  This should not happen anymore since
 746      we do not use reaching definitions, but still we test for such
 747      cases and assume that DEF is not available.  */
 748   if (def_bb == target_bb
 749       ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
 750       : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
 751     return true;
 752
 753   /* Check if the reg in USE has only one definition.  We already
 754      know that this definition reaches use, or we wouldn't be here.
 755      However, this is invalid for hard registers because if they are
 756      live at the beginning of the function it does not mean that we
 757      have an uninitialized access.  */
 758   regno = DF_REF_REGNO (use);
 759   def = DF_REG_DEF_CHAIN (regno);
 760   if (def
 761       && DF_REF_NEXT_REG (def) == NULL
 762       && regno >= FIRST_PSEUDO_REGISTER)
 763     return false;
 764
 765   /* Check locally if we are in the same basic block.  */
 766   if (def_bb == target_bb)
 767     return local_ref_killed_between_p (use, def_insn, target_insn);
 768
 769   /* Finally, if DEF_BB is the sole predecessor of TARGET_BB.  */
 770   if (single_pred_p (target_bb)
 771       && single_pred (target_bb) == def_bb)
 772     {
 773       df_ref x;
 774
 775       /* See if USE is killed between DEF_INSN and the last insn in the
 776          basic block containing DEF_INSN.  */
 777       x = df_bb_regno_last_def_find (def_bb, regno);
 778       if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
 779         return true;
 780
 781       /* See if USE is killed between TARGET_INSN and the first insn in the
 782          basic block containing TARGET_INSN.  */
 783       x = df_bb_regno_first_def_find (target_bb, regno);
 784       if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
 785         return true;
 786
 787       return false;
 788     }
 789
 790   /* Otherwise assume the worst case.  */
 791   return true;
 792 }
 793
 794
 795 /* Check if all uses in DEF_INSN can be used in TARGET_INSN.  This
 796    would require full computation of available expressions;
 797    we check only restricted conditions, see use_killed_between.  */
 798 static bool
 799 all_uses_available_at (rtx def_insn, rtx target_insn)
 800 {
 801   df_ref *use_rec;
 802   struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
 803   rtx def_set = single_set (def_insn);
 804
 805   gcc_assert (def_set);
 806
 807   /* If target_insn comes right after def_insn, which is very common
 808      for addresses, we can use a quicker test.  */
 809   if (NEXT_INSN (def_insn) == target_insn
 810       && REG_P (SET_DEST (def_set)))
 811     {
 812       rtx def_reg = SET_DEST (def_set);
 813
 814       /* If the insn uses the reg that it defines, the substitution is
 815          invalid.  */
 816       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 817         {
 818           df_ref use = *use_rec;
 819           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 820             return false;
 821         }
 822       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 823         {
 824           df_ref use = *use_rec;
 825           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 826             return false;
 827         }
 828     }
 829   else
 830     {
 831       rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
 832
 833       /* Look at all the uses of DEF_INSN, and see if they are not
 834          killed between DEF_INSN and TARGET_INSN.  */
 835       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 836         {
 837           df_ref use = *use_rec;
 838           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 839             return false;
 840           if (use_killed_between (use, def_insn, target_insn))
 841             return false;
 842         }
 843       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 844         {
 845           df_ref use = *use_rec;
 846           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 847             return false;
 848           if (use_killed_between (use, def_insn, target_insn))
 849             return false;
 850         }
 851     }
 852
 853   return true;
 854 }
 855
 856 \f
 857 static df_ref *active_defs;
 858 #ifdef ENABLE_CHECKING
 859 static sparseset active_defs_check;
 860 #endif
 861
 862 /* Fill the ACTIVE_DEFS array with the use->def link for the registers
 863    mentioned in USE_REC.  Register the valid entries in ACTIVE_DEFS_CHECK
 864    too, for checking purposes.  */
 865
 866 static void
 867 register_active_defs (df_ref *use_rec)
 868 {
 869   while (*use_rec)
 870     {
 871       df_ref use = *use_rec++;
 872       df_ref def = get_def_for_use (use);
 873       int regno = DF_REF_REGNO (use);
 874
 875 #ifdef ENABLE_CHECKING
 876       sparseset_set_bit (active_defs_check, regno);
 877 #endif
 878       active_defs[regno] = def;
 879     }
 880 }
 881
 882
 883 /* Build the use->def links that we use to update the dataflow info
 884    for new uses.  Note that building the links is very cheap and if
 885    it were done earlier, they could be used to rule out invalid
 886    propagations (in addition to what is done in all_uses_available_at).
 887    I'm not doing this yet, though.  */
 888
 889 static void
 890 update_df_init (rtx def_insn, rtx insn)
 891 {
 892 #ifdef ENABLE_CHECKING
 893   sparseset_clear (active_defs_check);
 894 #endif
 895   register_active_defs (DF_INSN_USES (def_insn));
 896   register_active_defs (DF_INSN_USES (insn));
 897   register_active_defs (DF_INSN_EQ_USES (insn));
 898 }
 899
 900
 901 /* Update the USE_DEF_REF array for the given use, using the active definitions
 902    in the ACTIVE_DEFS array to match pseudos to their def. */
 903
 904 static inline void
 905 update_uses (df_ref *use_rec)
 906 {
 907   while (*use_rec)
 908     {
 909       df_ref use = *use_rec++;
 910       int regno = DF_REF_REGNO (use);
 911
 912       /* Set up the use-def chain.  */
 913       if (DF_REF_ID (use) >= (int) VEC_length (df_ref, use_def_ref))
 914         VEC_safe_grow_cleared (df_ref, heap, use_def_ref,
 915                                DF_REF_ID (use) + 1);
 916
 917 #ifdef ENABLE_CHECKING
 918       gcc_assert (sparseset_bit_p (active_defs_check, regno));
 919 #endif
 920       VEC_replace (df_ref, use_def_ref, DF_REF_ID (use), active_defs[regno]);
 921     }
 922 }
 923
 924
 925 /* Update the USE_DEF_REF array for the uses in INSN.  Only update note
 926    uses if NOTES_ONLY is true.  */
 927
 928 static void
 929 update_df (rtx insn, rtx note)
 930 {
 931   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
 932
 933   if (note)
 934     {
 935       df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE);
 936       df_notes_rescan (insn);
 937     }
 938   else
 939     {
 940       df_uses_create (&PATTERN (insn), insn, 0);
 941       df_insn_rescan (insn);
 942       update_uses (DF_INSN_INFO_USES (insn_info));
 943     }
 944
 945   update_uses (DF_INSN_INFO_EQ_USES (insn_info));
 946 }
 947
 948
 949 /* Try substituting NEW into LOC, which originated from forward propagation
 950    of USE's value from DEF_INSN.  SET_REG_EQUAL says whether we are
 951    substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
 952    new insn is not recognized.  Return whether the substitution was
 953    performed.  */
 954
 955 static bool
 956 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal)
 957 {
 958   rtx insn = DF_REF_INSN (use);
 959   rtx set = single_set (insn);
 960   rtx note = NULL_RTX;
 961   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
 962   int old_cost = 0;
 963   bool ok;
 964
 965   update_df_init (def_insn, insn);
 966
 967   /* forward_propagate_subreg may be operating on an instruction with
 968      multiple sets.  If so, assume the cost of the new instruction is
 969      not greater than the old one.  */
 970   if (set)
 971     old_cost = set_src_cost (SET_SRC (set), speed);
 972   if (dump_file)
 973     {
 974       fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
 975       print_inline_rtx (dump_file, *loc, 2);
 976       fprintf (dump_file, "\n with ");
 977       print_inline_rtx (dump_file, new_rtx, 2);
 978       fprintf (dump_file, "\n");
 979     }
 980
 981   validate_unshare_change (insn, loc, new_rtx, true);
 982   if (!verify_changes (0))
 983     {
 984       if (dump_file)
 985         fprintf (dump_file, "Changes to insn %d not recognized\n",
 986                  INSN_UID (insn));
 987       ok = false;
 988     }
 989
 990   else if (DF_REF_TYPE (use) == DF_REF_REG_USE
 991            && set
 992            && set_src_cost (SET_SRC (set), speed) > old_cost)
 993     {
 994       if (dump_file)
 995         fprintf (dump_file, "Changes to insn %d not profitable\n",
 996                  INSN_UID (insn));
 997       ok = false;
 998     }
 999
1000   else
1001     {
1002       if (dump_file)
1003         fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
1004       ok = true;
1005     }
1006
1007   if (ok)
1008     {
1009       confirm_change_group ();
1010       num_changes++;
1011     }
1012   else
1013     {
1014       cancel_changes (0);
1015
1016       /* Can also record a simplified value in a REG_EQUAL note,
1017          making a new one if one does not already exist.  */
1018       if (set_reg_equal)
1019         {
1020           if (dump_file)
1021             fprintf (dump_file, " Setting REG_EQUAL note\n");
1022
1023           note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
1024         }
1025     }
1026
1027   if ((ok || note) && !CONSTANT_P (new_rtx))
1028     update_df (insn, note);
1029
1030   return ok;
1031 }
1032
1033 /* For the given single_set INSN, containing SRC known to be a
1034    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
1035    is redundant due to the register being set by a LOAD_EXTEND_OP
1036    load from memory.  */
1037
1038 static bool
1039 free_load_extend (rtx src, rtx insn)
1040 {
1041   rtx reg;
1042   df_ref *use_vec;
1043   df_ref use = 0, def;
1044
1045   reg = XEXP (src, 0);
1046 #ifdef LOAD_EXTEND_OP
1047   if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src))
1048 #endif
1049     return false;
1050
1051   for (use_vec = DF_INSN_USES (insn); *use_vec; use_vec++)
1052     {
1053       use = *use_vec;
1054
1055       if (!DF_REF_IS_ARTIFICIAL (use)
1056           && DF_REF_TYPE (use) == DF_REF_REG_USE
1057           && DF_REF_REG (use) == reg)
1058         break;
1059     }
1060   if (!use)
1061     return false;
1062
1063   def = get_def_for_use (use);
1064   if (!def)
1065     return false;
1066
1067   if (DF_REF_IS_ARTIFICIAL (def))
1068     return false;
1069
1070   if (NONJUMP_INSN_P (DF_REF_INSN (def)))
1071     {
1072       rtx patt = PATTERN (DF_REF_INSN (def));
1073
1074       if (GET_CODE (patt) == SET
1075           && GET_CODE (SET_SRC (patt)) == MEM
1076           && rtx_equal_p (SET_DEST (patt), reg))
1077         return true;
1078     }
1079   return false;
1080 }
1081
1082 /* If USE is a subreg, see if it can be replaced by a pseudo.  */
1083
1084 static bool
1085 forward_propagate_subreg (df_ref use, rtx def_insn, rtx def_set)
1086 {
1087   rtx use_reg = DF_REF_REG (use);
1088   rtx use_insn, src;
1089
1090   /* Only consider subregs... */
1091   enum machine_mode use_mode = GET_MODE (use_reg);
1092   if (GET_CODE (use_reg) != SUBREG
1093       || !REG_P (SET_DEST (def_set)))
1094     return false;
1095
1096   /* If this is a paradoxical SUBREG...  */
1097   if (GET_MODE_SIZE (use_mode)
1098       > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
1099     {
1100       /* If this is a paradoxical SUBREG, we have no idea what value the
1101          extra bits would have.  However, if the operand is equivalent to
1102          a SUBREG whose operand is the same as our mode, and all the modes
1103          are within a word, we can just use the inner operand because
1104          these SUBREGs just say how to treat the register.  */
1105       use_insn = DF_REF_INSN (use);
1106       src = SET_SRC (def_set);
1107       if (GET_CODE (src) == SUBREG
1108           && REG_P (SUBREG_REG (src))
1109           && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
1110           && GET_MODE (SUBREG_REG (src)) == use_mode
1111           && subreg_lowpart_p (src)
1112           && all_uses_available_at (def_insn, use_insn))
1113         return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
1114                                  def_insn, false);
1115     }
1116
1117   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
1118      is the low part of the reg being extended then just use the inner
1119      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
1120      be removed due to it matching a LOAD_EXTEND_OP load from memory,
1121      or due to the operation being a no-op when applied to registers.
1122      For example, if we have:
1123
1124          A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
1125          B: (... (subreg:SI (reg:DI X)) ...)
1126
1127      and mode_rep_extended says that Y is already sign-extended,
1128      the backend will typically allow A to be combined with the
1129      definition of Y or, failing that, allow A to be deleted after
1130      reload through register tying.  Introducing more uses of Y
1131      prevents both optimisations.  */
1132   else if (subreg_lowpart_p (use_reg))
1133     {
1134       use_insn = DF_REF_INSN (use);
1135       src = SET_SRC (def_set);
1136       if ((GET_CODE (src) == ZERO_EXTEND
1137            || GET_CODE (src) == SIGN_EXTEND)
1138           && REG_P (XEXP (src, 0))
1139           && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
1140           && GET_MODE (XEXP (src, 0)) == use_mode
1141           && !free_load_extend (src, def_insn)
1142           && (targetm.mode_rep_extended (use_mode, GET_MODE (src))
1143               != (int) GET_CODE (src))
1144           && all_uses_available_at (def_insn, use_insn))
1145         return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
1146                                  def_insn, false);
1147     }
1148
1149   return false;
1150 }
1151
1152 /* Try to replace USE with SRC (defined in DEF_INSN) in __asm.  */
1153
1154 static bool
1155 forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg)
1156 {
1157   rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc;
1158   int speed_p, i;
1159   df_ref *use_vec;
1160
1161   gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
1162
1163   src = SET_SRC (def_set);
1164   use_pat = PATTERN (use_insn);
1165
1166   /* In __asm don't replace if src might need more registers than
1167      reg, as that could increase register pressure on the __asm.  */
1168   use_vec = DF_INSN_USES (def_insn);
1169   if (use_vec[0] && use_vec[1])
1170     return false;
1171
1172   update_df_init (def_insn, use_insn);
1173   speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
1174   asm_operands = NULL_RTX;
1175   switch (GET_CODE (use_pat))
1176     {
1177     case ASM_OPERANDS:
1178       asm_operands = use_pat;
1179       break;
1180     case SET:
1181       if (MEM_P (SET_DEST (use_pat)))
1182         {
1183           loc = &SET_DEST (use_pat);
1184           new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1185           if (new_rtx)
1186             validate_unshare_change (use_insn, loc, new_rtx, true);
1187         }
1188       asm_operands = SET_SRC (use_pat);
1189       break;
1190     case PARALLEL:
1191       for (i = 0; i < XVECLEN (use_pat, 0); i++)
1192         if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1193           {
1194             if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1195               {
1196                 loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1197                 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1198                                          src, speed_p);
1199                 if (new_rtx)
1200                   validate_unshare_change (use_insn, loc, new_rtx, true);
1201               }
1202             asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1203           }
1204         else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1205           asm_operands = XVECEXP (use_pat, 0, i);
1206       break;
1207     default:
1208       gcc_unreachable ();
1209     }
1210
1211   gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1212   for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1213     {
1214       loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1215       new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1216       if (new_rtx)
1217         validate_unshare_change (use_insn, loc, new_rtx, true);
1218     }
1219
1220   if (num_changes_pending () == 0 || !apply_change_group ())
1221     return false;
1222
1223   update_df (use_insn, NULL);
1224   num_changes++;
1225   return true;
1226 }
1227
1228 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1229    result.  */
1230
1231 static bool
1232 forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set)
1233 {
1234   rtx use_insn = DF_REF_INSN (use);
1235   rtx use_set = single_set (use_insn);
1236   rtx src, reg, new_rtx, *loc;
1237   bool set_reg_equal;
1238   enum machine_mode mode;
1239   int asm_use = -1;
1240
1241   if (INSN_CODE (use_insn) < 0)
1242     asm_use = asm_noperands (PATTERN (use_insn));
1243
1244   if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
1245     return false;
1246
1247   /* Do not propagate into PC, CC0, etc.  */
1248   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1249     return false;
1250
1251   /* If def and use are subreg, check if they match.  */
1252   reg = DF_REF_REG (use);
1253   if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG)
1254     {
1255       if (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg))
1256         return false;
1257     }
1258   /* Check if the def had a subreg, but the use has the whole reg.  */
1259   else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1260     return false;
1261   /* Check if the use has a subreg, but the def had the whole reg.  Unlike the
1262      previous case, the optimization is possible and often useful indeed.  */
1263   else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1264     reg = SUBREG_REG (reg);
1265
1266   /* Make sure that we can treat REG as having the same mode as the
1267      source of DEF_SET.  */
1268   if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))
1269     return false;
1270
1271   /* Check if the substitution is valid (last, because it's the most
1272      expensive check!).  */
1273   src = SET_SRC (def_set);
1274   if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1275     return false;
1276
1277   /* Check if the def is loading something from the constant pool; in this
1278      case we would undo optimization such as compress_float_constant.
1279      Still, we can set a REG_EQUAL note.  */
1280   if (MEM_P (src) && MEM_READONLY_P (src))
1281     {
1282       rtx x = avoid_constant_pool_reference (src);
1283       if (x != src && use_set)
1284         {
1285           rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1286           rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1287           rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1288           if (old_rtx != new_rtx)
1289             set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1290         }
1291       return false;
1292     }
1293
1294   if (asm_use >= 0)
1295     return forward_propagate_asm (use, def_insn, def_set, reg);
1296
1297   /* Else try simplifying.  */
1298
1299   if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1300     {
1301       loc = &SET_DEST (use_set);
1302       set_reg_equal = false;
1303     }
1304   else if (!use_set)
1305     {
1306       loc = &INSN_VAR_LOCATION_LOC (use_insn);
1307       set_reg_equal = false;
1308     }
1309   else
1310     {
1311       rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1312       if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1313         loc = &XEXP (note, 0);
1314       else
1315         loc = &SET_SRC (use_set);
1316
1317       /* Do not replace an existing REG_EQUAL note if the insn is not
1318          recognized.  Either we're already replacing in the note, or we'll
1319          separately try plugging the definition in the note and simplifying.
1320          And only install a REQ_EQUAL note when the destination is a REG,
1321          as the note would be invalid otherwise.  */
1322       set_reg_equal = (note == NULL_RTX && REG_P (SET_DEST (use_set)));
1323     }
1324
1325   if (GET_MODE (*loc) == VOIDmode)
1326     mode = GET_MODE (SET_DEST (use_set));
1327   else
1328     mode = GET_MODE (*loc);
1329
1330   new_rtx = propagate_rtx (*loc, mode, reg, src,
1331                            optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1332
1333   if (!new_rtx)
1334     return false;
1335
1336   return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1337 }
1338
1339
1340 /* Given a use USE of an insn, if it has a single reaching
1341    definition, try to forward propagate it into that insn.
1342    Return true if cfg cleanup will be needed.  */
1343
1344 static bool
1345 forward_propagate_into (df_ref use)
1346 {
1347   df_ref def;
1348   rtx def_insn, def_set, use_insn;
1349   rtx parent;
1350
1351   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1352     return false;
1353   if (DF_REF_IS_ARTIFICIAL (use))
1354     return false;
1355
1356   /* Only consider uses that have a single definition.  */
1357   def = get_def_for_use (use);
1358   if (!def)
1359     return false;
1360   if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1361     return false;
1362   if (DF_REF_IS_ARTIFICIAL (def))
1363     return false;
1364
1365   /* Do not propagate loop invariant definitions inside the loop.  */
1366   if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1367     return false;
1368
1369   /* Check if the use is still present in the insn!  */
1370   use_insn = DF_REF_INSN (use);
1371   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1372     parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1373   else
1374     parent = PATTERN (use_insn);
1375
1376   if (!reg_mentioned_p (DF_REF_REG (use), parent))
1377     return false;
1378
1379   def_insn = DF_REF_INSN (def);
1380   if (multiple_sets (def_insn))
1381     return false;
1382   def_set = single_set (def_insn);
1383   if (!def_set)
1384     return false;
1385
1386   /* Only try one kind of propagation.  If two are possible, we'll
1387      do it on the following iterations.  */
1388   if (forward_propagate_and_simplify (use, def_insn, def_set)
1389       || forward_propagate_subreg (use, def_insn, def_set))
1390     {
1391       if (cfun->can_throw_non_call_exceptions
1392           && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX)
1393           && purge_dead_edges (DF_REF_BB (use)))
1394         return true;
1395     }
1396   return false;
1397 }
1398
1399 \f
1400 static void
1401 fwprop_init (void)
1402 {
1403   num_changes = 0;
1404   calculate_dominance_info (CDI_DOMINATORS);
1405
1406   /* We do not always want to propagate into loops, so we have to find
1407      loops and be careful about them.  But we have to call flow_loops_find
1408      before df_analyze, because flow_loops_find may introduce new jump
1409      insns (sadly) if we are not working in cfglayout mode.  */
1410   loop_optimizer_init (0);
1411
1412   build_single_def_use_links ();
1413   df_set_flags (DF_DEFER_INSN_RESCAN);
1414
1415   active_defs = XNEWVEC (df_ref, max_reg_num ());
1416 #ifdef ENABLE_CHECKING
1417   active_defs_check = sparseset_alloc (max_reg_num ());
1418 #endif
1419 }
1420
1421 static void
1422 fwprop_done (void)
1423 {
1424   loop_optimizer_finalize ();
1425
1426   VEC_free (df_ref, heap, use_def_ref);
1427   free (active_defs);
1428 #ifdef ENABLE_CHECKING
1429   sparseset_free (active_defs_check);
1430 #endif
1431
1432   free_dominance_info (CDI_DOMINATORS);
1433   cleanup_cfg (0);
1434   delete_trivially_dead_insns (get_insns (), max_reg_num ());
1435
1436   if (dump_file)
1437     fprintf (dump_file,
1438              "\nNumber of successful forward propagations: %d\n\n",
1439              num_changes);
1440 }
1441
1442
1443 /* Main entry point.  */
1444
1445 static bool
1446 gate_fwprop (void)
1447 {
1448   return optimize > 0 && flag_forward_propagate;
1449 }
1450
1451 static unsigned int
1452 fwprop (void)
1453 {
1454   unsigned i;
1455   bool need_cleanup = false;
1456
1457   fwprop_init ();
1458
1459   /* Go through all the uses.  df_uses_create will create new ones at the
1460      end, and we'll go through them as well.
1461
1462      Do not forward propagate addresses into loops until after unrolling.
1463      CSE did so because it was able to fix its own mess, but we are not.  */
1464
1465   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1466     {
1467       df_ref use = DF_USES_GET (i);
1468       if (use)
1469         if (DF_REF_TYPE (use) == DF_REF_REG_USE
1470             || DF_REF_BB (use)->loop_father == NULL
1471             /* The outer most loop is not really a loop.  */
1472             || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1473           need_cleanup |= forward_propagate_into (use);
1474     }
1475
1476   fwprop_done ();
1477   if (need_cleanup)
1478     cleanup_cfg (0);
1479   return 0;
1480 }
1481
1482 struct rtl_opt_pass pass_rtl_fwprop =
1483 {
1484  {
1485   RTL_PASS,
1486   "fwprop1",                            /* name */
1487   gate_fwprop,                          /* gate */
1488   fwprop,                               /* execute */
1489   NULL,                                 /* sub */
1490   NULL,                                 /* next */
1491   0,                                    /* static_pass_number */
1492   TV_FWPROP,                            /* tv_id */
1493   0,                                    /* properties_required */
1494   0,                                    /* properties_provided */
1495   0,                                    /* properties_destroyed */
1496   0,                                    /* todo_flags_start */
1497   TODO_df_finish
1498     | TODO_verify_flow
1499     | TODO_verify_rtl_sharing           /* todo_flags_finish */
1500  }
1501 };
1502
1503 static unsigned int
1504 fwprop_addr (void)
1505 {
1506   unsigned i;
1507   bool need_cleanup = false;
1508
1509   fwprop_init ();
1510
1511   /* Go through all the uses.  df_uses_create will create new ones at the
1512      end, and we'll go through them as well.  */
1513   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1514     {
1515       df_ref use = DF_USES_GET (i);
1516       if (use)
1517         if (DF_REF_TYPE (use) != DF_REF_REG_USE
1518             && DF_REF_BB (use)->loop_father != NULL
1519             /* The outer most loop is not really a loop.  */
1520             && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1521           need_cleanup |= forward_propagate_into (use);
1522     }
1523
1524   fwprop_done ();
1525
1526   if (need_cleanup)
1527     cleanup_cfg (0);
1528   return 0;
1529 }
1530
1531 struct rtl_opt_pass pass_rtl_fwprop_addr =
1532 {
1533  {
1534   RTL_PASS,
1535   "fwprop2",                            /* name */
1536   gate_fwprop,                          /* gate */
1537   fwprop_addr,                          /* execute */
1538   NULL,                                 /* sub */
1539   NULL,                                 /* next */
1540   0,                                    /* static_pass_number */
1541   TV_FWPROP,                            /* tv_id */
1542   0,                                    /* properties_required */
1543   0,                                    /* properties_provided */
1544   0,                                    /* properties_destroyed */
1545   0,                                    /* todo_flags_start */
1546   TODO_df_finish | TODO_verify_rtl_sharing  /* todo_flags_finish */
1547  }
1548 };