gcc/fwprop.c

   1 /* RTL-based forward propagation pass for GNU compiler.
   2    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
   3    Free Software Foundation, Inc.
   4    Contributed by Paolo Bonzini and Steven Bosscher.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27
  28 #include "sparseset.h"
  29 #include "rtl.h"
  30 #include "tm_p.h"
  31 #include "insn-config.h"
  32 #include "recog.h"
  33 #include "flags.h"
  34 #include "obstack.h"
  35 #include "basic-block.h"
  36 #include "df.h"
  37 #include "target.h"
  38 #include "cfgloop.h"
  39 #include "tree-pass.h"
  40 #include "domwalk.h"
  41 #include "emit-rtl.h"
  42
  43
  44 /* This pass does simple forward propagation and simplification when an
  45    operand of an insn can only come from a single def.  This pass uses
  46    df.c, so it is global.  However, we only do limited analysis of
  47    available expressions.
  48
  49    1) The pass tries to propagate the source of the def into the use,
  50    and checks if the result is independent of the substituted value.
  51    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
  52    zero, independent of the source register.
  53
  54    In particular, we propagate constants into the use site.  Sometimes
  55    RTL expansion did not put the constant in the same insn on purpose,
  56    to satisfy a predicate, and the result will fail to be recognized;
  57    but this happens rarely and in this case we can still create a
  58    REG_EQUAL note.  For multi-word operations, this
  59
  60       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
  61       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
  62       (set (subreg:SI (reg:DI 122) 0)
  63          (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
  64       (set (subreg:SI (reg:DI 122) 4)
  65          (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
  66
  67    can be simplified to the much simpler
  68
  69       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
  70       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
  71
  72    This particular propagation is also effective at putting together
  73    complex addressing modes.  We are more aggressive inside MEMs, in
  74    that all definitions are propagated if the use is in a MEM; if the
  75    result is a valid memory address we check address_cost to decide
  76    whether the substitution is worthwhile.
  77
  78    2) The pass propagates register copies.  This is not as effective as
  79    the copy propagation done by CSE's canon_reg, which works by walking
  80    the instruction chain, it can help the other transformations.
  81
  82    We should consider removing this optimization, and instead reorder the
  83    RTL passes, because GCSE does this transformation too.  With some luck,
  84    the CSE pass at the end of rest_of_handle_gcse could also go away.
  85
  86    3) The pass looks for paradoxical subregs that are actually unnecessary.
  87    Things like this:
  88
  89      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
  90      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
  91      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
  92                                 (subreg:SI (reg:QI 121) 0)))
  93
  94    are very common on machines that can only do word-sized operations.
  95    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
  96    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
  97    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
  98    above will simplify this to
  99
 100      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
 101      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
 102      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
 103
 104    where the first two insns are now dead.
 105
 106    We used to use reaching definitions to find which uses have a
 107    single reaching definition (sounds obvious...), but this is too
 108    complex a problem in nasty testcases like PR33928.  Now we use the
 109    multiple definitions problem in df-problems.c.  The similarity
 110    between that problem and SSA form creation is taken further, in
 111    that fwprop does a dominator walk to create its chains; however,
 112    instead of creating a PHI function where multiple definitions meet
 113    I just punt and record only singleton use-def chains, which is
 114    all that is needed by fwprop.  */
 115
 116
 117 static int num_changes;
 118
 119 DEF_VEC_P(df_ref);
 120 DEF_VEC_ALLOC_P(df_ref,heap);
 121 static VEC(df_ref,heap) *use_def_ref;
 122 static VEC(df_ref,heap) *reg_defs;
 123 static VEC(df_ref,heap) *reg_defs_stack;
 124
 125 /* The MD bitmaps are trimmed to include only live registers to cut
 126    memory usage on testcases like insn-recog.c.  Track live registers
 127    in the basic block and do not perform forward propagation if the
 128    destination is a dead pseudo occurring in a note.  */
 129 static bitmap local_md;
 130 static bitmap local_lr;
 131
 132 /* Return the only def in USE's use-def chain, or NULL if there is
 133    more than one def in the chain.  */
 134
 135 static inline df_ref
 136 get_def_for_use (df_ref use)
 137 {
 138   return VEC_index (df_ref, use_def_ref, DF_REF_ID (use));
 139 }
 140
 141
 142 /* Update the reg_defs vector with non-partial definitions in DEF_REC.
 143    TOP_FLAG says which artificials uses should be used, when DEF_REC
 144    is an artificial def vector.  LOCAL_MD is modified as after a
 145    df_md_simulate_* function; we do more or less the same processing
 146    done there, so we do not use those functions.  */
 147
 148 #define DF_MD_GEN_FLAGS \
 149         (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
 150
 151 static void
 152 process_defs (df_ref *def_rec, int top_flag)
 153 {
 154   df_ref def;
 155   while ((def = *def_rec++) != NULL)
 156     {
 157       df_ref curr_def = VEC_index (df_ref, reg_defs, DF_REF_REGNO (def));
 158       unsigned int dregno;
 159
 160       if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
 161         continue;
 162
 163       dregno = DF_REF_REGNO (def);
 164       if (curr_def)
 165         VEC_safe_push (df_ref, heap, reg_defs_stack, curr_def);
 166       else
 167         {
 168           /* Do not store anything if "transitioning" from NULL to NULL.  But
 169              otherwise, push a special entry on the stack to tell the
 170              leave_block callback that the entry in reg_defs was NULL.  */
 171           if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 172             ;
 173           else
 174             VEC_safe_push (df_ref, heap, reg_defs_stack, def);
 175         }
 176
 177       if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 178         {
 179           bitmap_set_bit (local_md, dregno);
 180           VEC_replace (df_ref, reg_defs, dregno, NULL);
 181         }
 182       else
 183         {
 184           bitmap_clear_bit (local_md, dregno);
 185           VEC_replace (df_ref, reg_defs, dregno, def);
 186         }
 187     }
 188 }
 189
 190
 191 /* Fill the use_def_ref vector with values for the uses in USE_REC,
 192    taking reaching definitions info from LOCAL_MD and REG_DEFS.
 193    TOP_FLAG says which artificials uses should be used, when USE_REC
 194    is an artificial use vector.  */
 195
 196 static void
 197 process_uses (df_ref *use_rec, int top_flag)
 198 {
 199   df_ref use;
 200   while ((use = *use_rec++) != NULL)
 201     if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
 202       {
 203         unsigned int uregno = DF_REF_REGNO (use);
 204         if (VEC_index (df_ref, reg_defs, uregno)
 205             && !bitmap_bit_p (local_md, uregno)
 206             && bitmap_bit_p (local_lr, uregno))
 207           VEC_replace (df_ref, use_def_ref, DF_REF_ID (use),
 208                        VEC_index (df_ref, reg_defs, uregno));
 209       }
 210 }
 211
 212
 213 static void
 214 single_def_use_enter_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 215                             basic_block bb)
 216 {
 217   int bb_index = bb->index;
 218   struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
 219   struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
 220   rtx insn;
 221
 222   bitmap_copy (local_md, &md_bb_info->in);
 223   bitmap_copy (local_lr, &lr_bb_info->in);
 224
 225   /* Push a marker for the leave_block callback.  */
 226   VEC_safe_push (df_ref, heap, reg_defs_stack, NULL);
 227
 228   process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
 229   process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
 230
 231   /* We don't call df_simulate_initialize_forwards, as it may overestimate
 232      the live registers if there are unused artificial defs.  We prefer
 233      liveness to be underestimated.  */
 234
 235   FOR_BB_INSNS (bb, insn)
 236     if (INSN_P (insn))
 237       {
 238         unsigned int uid = INSN_UID (insn);
 239         process_uses (DF_INSN_UID_USES (uid), 0);
 240         process_uses (DF_INSN_UID_EQ_USES (uid), 0);
 241         process_defs (DF_INSN_UID_DEFS (uid), 0);
 242         df_simulate_one_insn_forwards (bb, insn, local_lr);
 243       }
 244
 245   process_uses (df_get_artificial_uses (bb_index), 0);
 246   process_defs (df_get_artificial_defs (bb_index), 0);
 247 }
 248
 249 /* Pop the definitions created in this basic block when leaving its
 250    dominated parts.  */
 251
 252 static void
 253 single_def_use_leave_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 254                             basic_block bb ATTRIBUTE_UNUSED)
 255 {
 256   df_ref saved_def;
 257   while ((saved_def = VEC_pop (df_ref, reg_defs_stack)) != NULL)
 258     {
 259       unsigned int dregno = DF_REF_REGNO (saved_def);
 260
 261       /* See also process_defs.  */
 262       if (saved_def == VEC_index (df_ref, reg_defs, dregno))
 263         VEC_replace (df_ref, reg_defs, dregno, NULL);
 264       else
 265         VEC_replace (df_ref, reg_defs, dregno, saved_def);
 266     }
 267 }
 268
 269
 270 /* Build a vector holding the reaching definitions of uses reached by a
 271    single dominating definition.  */
 272
 273 static void
 274 build_single_def_use_links (void)
 275 {
 276   struct dom_walk_data walk_data;
 277
 278   /* We use the multiple definitions problem to compute our restricted
 279      use-def chains.  */
 280   df_set_flags (DF_EQ_NOTES);
 281   df_md_add_problem ();
 282   df_note_add_problem ();
 283   df_analyze ();
 284   df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
 285
 286   use_def_ref = VEC_alloc (df_ref, heap, DF_USES_TABLE_SIZE ());
 287   VEC_safe_grow_cleared (df_ref, heap, use_def_ref, DF_USES_TABLE_SIZE ());
 288
 289   reg_defs = VEC_alloc (df_ref, heap, max_reg_num ());
 290   VEC_safe_grow_cleared (df_ref, heap, reg_defs, max_reg_num ());
 291
 292   reg_defs_stack = VEC_alloc (df_ref, heap, n_basic_blocks * 10);
 293   local_md = BITMAP_ALLOC (NULL);
 294   local_lr = BITMAP_ALLOC (NULL);
 295
 296   /* Walk the dominator tree looking for single reaching definitions
 297      dominating the uses.  This is similar to how SSA form is built.  */
 298   walk_data.dom_direction = CDI_DOMINATORS;
 299   walk_data.initialize_block_local_data = NULL;
 300   walk_data.before_dom_children = single_def_use_enter_block;
 301   walk_data.after_dom_children = single_def_use_leave_block;
 302
 303   init_walk_dominator_tree (&walk_data);
 304   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
 305   fini_walk_dominator_tree (&walk_data);
 306
 307   BITMAP_FREE (local_lr);
 308   BITMAP_FREE (local_md);
 309   VEC_free (df_ref, heap, reg_defs);
 310   VEC_free (df_ref, heap, reg_defs_stack);
 311 }
 312
 313 \f
 314 /* Do not try to replace constant addresses or addresses of local and
 315    argument slots.  These MEM expressions are made only once and inserted
 316    in many instructions, as well as being used to control symbol table
 317    output.  It is not safe to clobber them.
 318
 319    There are some uncommon cases where the address is already in a register
 320    for some reason, but we cannot take advantage of that because we have
 321    no easy way to unshare the MEM.  In addition, looking up all stack
 322    addresses is costly.  */
 323
 324 static bool
 325 can_simplify_addr (rtx addr)
 326 {
 327   rtx reg;
 328
 329   if (CONSTANT_ADDRESS_P (addr))
 330     return false;
 331
 332   if (GET_CODE (addr) == PLUS)
 333     reg = XEXP (addr, 0);
 334   else
 335     reg = addr;
 336
 337   return (!REG_P (reg)
 338           || (REGNO (reg) != FRAME_POINTER_REGNUM
 339               && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
 340               && REGNO (reg) != ARG_POINTER_REGNUM));
 341 }
 342
 343 /* Returns a canonical version of X for the address, from the point of view,
 344    that all multiplications are represented as MULT instead of the multiply
 345    by a power of 2 being represented as ASHIFT.
 346
 347    Every ASHIFT we find has been made by simplify_gen_binary and was not
 348    there before, so it is not shared.  So we can do this in place.  */
 349
 350 static void
 351 canonicalize_address (rtx x)
 352 {
 353   for (;;)
 354     switch (GET_CODE (x))
 355       {
 356       case ASHIFT:
 357         if (CONST_INT_P (XEXP (x, 1))
 358             && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
 359             && INTVAL (XEXP (x, 1)) >= 0)
 360           {
 361             HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
 362             PUT_CODE (x, MULT);
 363             XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
 364                                         GET_MODE (x));
 365           }
 366
 367         x = XEXP (x, 0);
 368         break;
 369
 370       case PLUS:
 371         if (GET_CODE (XEXP (x, 0)) == PLUS
 372             || GET_CODE (XEXP (x, 0)) == ASHIFT
 373             || GET_CODE (XEXP (x, 0)) == CONST)
 374           canonicalize_address (XEXP (x, 0));
 375
 376         x = XEXP (x, 1);
 377         break;
 378
 379       case CONST:
 380         x = XEXP (x, 0);
 381         break;
 382
 383       default:
 384         return;
 385       }
 386 }
 387
 388 /* OLD is a memory address.  Return whether it is good to use NEW instead,
 389    for a memory access in the given MODE.  */
 390
 391 static bool
 392 should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode,
 393                         addr_space_t as, bool speed)
 394 {
 395   int gain;
 396
 397   if (rtx_equal_p (old_rtx, new_rtx)
 398       || !memory_address_addr_space_p (mode, new_rtx, as))
 399     return false;
 400
 401   /* Copy propagation is always ok.  */
 402   if (REG_P (old_rtx) && REG_P (new_rtx))
 403     return true;
 404
 405   /* Prefer the new address if it is less expensive.  */
 406   gain = (address_cost (old_rtx, mode, as, speed)
 407           - address_cost (new_rtx, mode, as, speed));
 408
 409   /* If the addresses have equivalent cost, prefer the new address
 410      if it has the highest `set_src_cost'.  That has the potential of
 411      eliminating the most insns without additional costs, and it
 412      is the same that cse.c used to do.  */
 413   if (gain == 0)
 414     gain = set_src_cost (new_rtx, speed) - set_src_cost (old_rtx, speed);
 415
 416   return (gain > 0);
 417 }
 418
 419
 420 /* Flags for the last parameter of propagate_rtx_1.  */
 421
 422 enum {
 423   /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
 424      if it is false, propagate_rtx_1 returns false if, for at least
 425      one occurrence OLD, it failed to collapse the result to a constant.
 426      For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
 427      collapse to zero if replacing (reg:M B) with (reg:M A).
 428
 429      PR_CAN_APPEAR is disregarded inside MEMs: in that case,
 430      propagate_rtx_1 just tries to make cheaper and valid memory
 431      addresses.  */
 432   PR_CAN_APPEAR = 1,
 433
 434   /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
 435      outside memory addresses.  This is needed because propagate_rtx_1 does
 436      not do any analysis on memory; thus it is very conservative and in general
 437      it will fail if non-read-only MEMs are found in the source expression.
 438
 439      PR_HANDLE_MEM is set when the source of the propagation was not
 440      another MEM.  Then, it is safe not to treat non-read-only MEMs as
 441      ``opaque'' objects.  */
 442   PR_HANDLE_MEM = 2,
 443
 444   /* Set when costs should be optimized for speed.  */
 445   PR_OPTIMIZE_FOR_SPEED = 4
 446 };
 447
 448
 449 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
 450    resulting expression.  Replace *PX with a new RTL expression if an
 451    occurrence of OLD was found.
 452
 453    This is only a wrapper around simplify-rtx.c: do not add any pattern
 454    matching code here.  (The sole exception is the handling of LO_SUM, but
 455    that is because there is no simplify_gen_* function for LO_SUM).  */
 456
 457 static bool
 458 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 459 {
 460   rtx x = *px, tem = NULL_RTX, op0, op1, op2;
 461   enum rtx_code code = GET_CODE (x);
 462   enum machine_mode mode = GET_MODE (x);
 463   enum machine_mode op_mode;
 464   bool can_appear = (flags & PR_CAN_APPEAR) != 0;
 465   bool valid_ops = true;
 466
 467   if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
 468     {
 469       /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
 470          they have side effects or not).  */
 471       *px = (side_effects_p (x)
 472              ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
 473              : gen_rtx_SCRATCH (GET_MODE (x)));
 474       return false;
 475     }
 476
 477   /* If X is OLD_RTX, return NEW_RTX.  But not if replacing only within an
 478      address, and we are *not* inside one.  */
 479   if (x == old_rtx)
 480     {
 481       *px = new_rtx;
 482       return can_appear;
 483     }
 484
 485   /* If this is an expression, try recursive substitution.  */
 486   switch (GET_RTX_CLASS (code))
 487     {
 488     case RTX_UNARY:
 489       op0 = XEXP (x, 0);
 490       op_mode = GET_MODE (op0);
 491       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 492       if (op0 == XEXP (x, 0))
 493         return true;
 494       tem = simplify_gen_unary (code, mode, op0, op_mode);
 495       break;
 496
 497     case RTX_BIN_ARITH:
 498     case RTX_COMM_ARITH:
 499       op0 = XEXP (x, 0);
 500       op1 = XEXP (x, 1);
 501       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 502       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 503       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 504         return true;
 505       tem = simplify_gen_binary (code, mode, op0, op1);
 506       break;
 507
 508     case RTX_COMPARE:
 509     case RTX_COMM_COMPARE:
 510       op0 = XEXP (x, 0);
 511       op1 = XEXP (x, 1);
 512       op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
 513       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 514       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 515       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 516         return true;
 517       tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
 518       break;
 519
 520     case RTX_TERNARY:
 521     case RTX_BITFIELD_OPS:
 522       op0 = XEXP (x, 0);
 523       op1 = XEXP (x, 1);
 524       op2 = XEXP (x, 2);
 525       op_mode = GET_MODE (op0);
 526       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 527       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 528       valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
 529       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
 530         return true;
 531       if (op_mode == VOIDmode)
 532         op_mode = GET_MODE (op0);
 533       tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
 534       break;
 535
 536     case RTX_EXTRA:
 537       /* The only case we try to handle is a SUBREG.  */
 538       if (code == SUBREG)
 539         {
 540           op0 = XEXP (x, 0);
 541           valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 542           if (op0 == XEXP (x, 0))
 543             return true;
 544           tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
 545                                      SUBREG_BYTE (x));
 546         }
 547       break;
 548
 549     case RTX_OBJ:
 550       if (code == MEM && x != new_rtx)
 551         {
 552           rtx new_op0;
 553           op0 = XEXP (x, 0);
 554
 555           /* There are some addresses that we cannot work on.  */
 556           if (!can_simplify_addr (op0))
 557             return true;
 558
 559           op0 = new_op0 = targetm.delegitimize_address (op0);
 560           valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
 561                                         flags | PR_CAN_APPEAR);
 562
 563           /* Dismiss transformation that we do not want to carry on.  */
 564           if (!valid_ops
 565               || new_op0 == op0
 566               || !(GET_MODE (new_op0) == GET_MODE (op0)
 567                    || GET_MODE (new_op0) == VOIDmode))
 568             return true;
 569
 570           canonicalize_address (new_op0);
 571
 572           /* Copy propagations are always ok.  Otherwise check the costs.  */
 573           if (!(REG_P (old_rtx) && REG_P (new_rtx))
 574               && !should_replace_address (op0, new_op0, GET_MODE (x),
 575                                           MEM_ADDR_SPACE (x),
 576                                           flags & PR_OPTIMIZE_FOR_SPEED))
 577             return true;
 578
 579           tem = replace_equiv_address_nv (x, new_op0);
 580         }
 581
 582       else if (code == LO_SUM)
 583         {
 584           op0 = XEXP (x, 0);
 585           op1 = XEXP (x, 1);
 586
 587           /* The only simplification we do attempts to remove references to op0
 588              or make it constant -- in both cases, op0's invalidity will not
 589              make the result invalid.  */
 590           propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
 591           valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 592           if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 593             return true;
 594
 595           /* (lo_sum (high x) x) -> x  */
 596           if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
 597             tem = op1;
 598           else
 599             tem = gen_rtx_LO_SUM (mode, op0, op1);
 600
 601           /* OP1 is likely not a legitimate address, otherwise there would have
 602              been no LO_SUM.  We want it to disappear if it is invalid, return
 603              false in that case.  */
 604           return memory_address_p (mode, tem);
 605         }
 606
 607       else if (code == REG)
 608         {
 609           if (rtx_equal_p (x, old_rtx))
 610             {
 611               *px = new_rtx;
 612               return can_appear;
 613             }
 614         }
 615       break;
 616
 617     default:
 618       break;
 619     }
 620
 621   /* No change, no trouble.  */
 622   if (tem == NULL_RTX)
 623     return true;
 624
 625   *px = tem;
 626
 627   /* The replacement we made so far is valid, if all of the recursive
 628      replacements were valid, or we could simplify everything to
 629      a constant.  */
 630   return valid_ops || can_appear || CONSTANT_P (tem);
 631 }
 632
 633
 634 /* for_each_rtx traversal function that returns 1 if BODY points to
 635    a non-constant mem.  */
 636
 637 static int
 638 varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED)
 639 {
 640   rtx x = *body;
 641   return MEM_P (x) && !MEM_READONLY_P (x);
 642 }
 643
 644
 645 /* Replace all occurrences of OLD in X with NEW and try to simplify the
 646    resulting expression (in mode MODE).  Return a new expression if it is
 647    a constant, otherwise X.
 648
 649    Simplifications where occurrences of NEW collapse to a constant are always
 650    accepted.  All simplifications are accepted if NEW is a pseudo too.
 651    Otherwise, we accept simplifications that have a lower or equal cost.  */
 652
 653 static rtx
 654 propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx,
 655                bool speed)
 656 {
 657   rtx tem;
 658   bool collapsed;
 659   int flags;
 660
 661   if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
 662     return NULL_RTX;
 663
 664   flags = 0;
 665   if (REG_P (new_rtx)
 666       || CONSTANT_P (new_rtx)
 667       || (GET_CODE (new_rtx) == SUBREG
 668           && REG_P (SUBREG_REG (new_rtx))
 669           && (GET_MODE_SIZE (mode)
 670               <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (new_rtx))))))
 671     flags |= PR_CAN_APPEAR;
 672   if (!for_each_rtx (&new_rtx, varying_mem_p, NULL))
 673     flags |= PR_HANDLE_MEM;
 674
 675   if (speed)
 676     flags |= PR_OPTIMIZE_FOR_SPEED;
 677
 678   tem = x;
 679   collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
 680   if (tem == x || !collapsed)
 681     return NULL_RTX;
 682
 683   /* gen_lowpart_common will not be able to process VOIDmode entities other
 684      than CONST_INTs.  */
 685   if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
 686     return NULL_RTX;
 687
 688   if (GET_MODE (tem) == VOIDmode)
 689     tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
 690   else
 691     gcc_assert (GET_MODE (tem) == mode);
 692
 693   return tem;
 694 }
 695
 696
 697 \f
 698
 699 /* Return true if the register from reference REF is killed
 700    between FROM to (but not including) TO.  */
 701
 702 static bool
 703 local_ref_killed_between_p (df_ref ref, rtx from, rtx to)
 704 {
 705   rtx insn;
 706
 707   for (insn = from; insn != to; insn = NEXT_INSN (insn))
 708     {
 709       df_ref *def_rec;
 710       if (!INSN_P (insn))
 711         continue;
 712
 713       for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
 714         {
 715           df_ref def = *def_rec;
 716           if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
 717             return true;
 718         }
 719     }
 720   return false;
 721 }
 722
 723
 724 /* Check if the given DEF is available in INSN.  This would require full
 725    computation of available expressions; we check only restricted conditions:
 726    - if DEF is the sole definition of its register, go ahead;
 727    - in the same basic block, we check for no definitions killing the
 728      definition of DEF_INSN;
 729    - if USE's basic block has DEF's basic block as the sole predecessor,
 730      we check if the definition is killed after DEF_INSN or before
 731      TARGET_INSN insn, in their respective basic blocks.  */
 732 static bool
 733 use_killed_between (df_ref use, rtx def_insn, rtx target_insn)
 734 {
 735   basic_block def_bb = BLOCK_FOR_INSN (def_insn);
 736   basic_block target_bb = BLOCK_FOR_INSN (target_insn);
 737   int regno;
 738   df_ref def;
 739
 740   /* We used to have a def reaching a use that is _before_ the def,
 741      with the def not dominating the use even though the use and def
 742      are in the same basic block, when a register may be used
 743      uninitialized in a loop.  This should not happen anymore since
 744      we do not use reaching definitions, but still we test for such
 745      cases and assume that DEF is not available.  */
 746   if (def_bb == target_bb
 747       ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
 748       : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
 749     return true;
 750
 751   /* Check if the reg in USE has only one definition.  We already
 752      know that this definition reaches use, or we wouldn't be here.
 753      However, this is invalid for hard registers because if they are
 754      live at the beginning of the function it does not mean that we
 755      have an uninitialized access.  */
 756   regno = DF_REF_REGNO (use);
 757   def = DF_REG_DEF_CHAIN (regno);
 758   if (def
 759       && DF_REF_NEXT_REG (def) == NULL
 760       && regno >= FIRST_PSEUDO_REGISTER)
 761     return false;
 762
 763   /* Check locally if we are in the same basic block.  */
 764   if (def_bb == target_bb)
 765     return local_ref_killed_between_p (use, def_insn, target_insn);
 766
 767   /* Finally, if DEF_BB is the sole predecessor of TARGET_BB.  */
 768   if (single_pred_p (target_bb)
 769       && single_pred (target_bb) == def_bb)
 770     {
 771       df_ref x;
 772
 773       /* See if USE is killed between DEF_INSN and the last insn in the
 774          basic block containing DEF_INSN.  */
 775       x = df_bb_regno_last_def_find (def_bb, regno);
 776       if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
 777         return true;
 778
 779       /* See if USE is killed between TARGET_INSN and the first insn in the
 780          basic block containing TARGET_INSN.  */
 781       x = df_bb_regno_first_def_find (target_bb, regno);
 782       if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
 783         return true;
 784
 785       return false;
 786     }
 787
 788   /* Otherwise assume the worst case.  */
 789   return true;
 790 }
 791
 792
 793 /* Check if all uses in DEF_INSN can be used in TARGET_INSN.  This
 794    would require full computation of available expressions;
 795    we check only restricted conditions, see use_killed_between.  */
 796 static bool
 797 all_uses_available_at (rtx def_insn, rtx target_insn)
 798 {
 799   df_ref *use_rec;
 800   struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
 801   rtx def_set = single_set (def_insn);
 802   rtx next;
 803
 804   gcc_assert (def_set);
 805
 806   /* If target_insn comes right after def_insn, which is very common
 807      for addresses, we can use a quicker test.  Ignore debug insns
 808      other than target insns for this.  */
 809   next = NEXT_INSN (def_insn);
 810   while (next && next != target_insn && DEBUG_INSN_P (next))
 811     next = NEXT_INSN (next);
 812   if (next == target_insn && REG_P (SET_DEST (def_set)))
 813     {
 814       rtx def_reg = SET_DEST (def_set);
 815
 816       /* If the insn uses the reg that it defines, the substitution is
 817          invalid.  */
 818       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 819         {
 820           df_ref use = *use_rec;
 821           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 822             return false;
 823         }
 824       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 825         {
 826           df_ref use = *use_rec;
 827           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 828             return false;
 829         }
 830     }
 831   else
 832     {
 833       rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
 834
 835       /* Look at all the uses of DEF_INSN, and see if they are not
 836          killed between DEF_INSN and TARGET_INSN.  */
 837       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 838         {
 839           df_ref use = *use_rec;
 840           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 841             return false;
 842           if (use_killed_between (use, def_insn, target_insn))
 843             return false;
 844         }
 845       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 846         {
 847           df_ref use = *use_rec;
 848           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 849             return false;
 850           if (use_killed_between (use, def_insn, target_insn))
 851             return false;
 852         }
 853     }
 854
 855   return true;
 856 }
 857
 858 \f
 859 static df_ref *active_defs;
 860 #ifdef ENABLE_CHECKING
 861 static sparseset active_defs_check;
 862 #endif
 863
 864 /* Fill the ACTIVE_DEFS array with the use->def link for the registers
 865    mentioned in USE_REC.  Register the valid entries in ACTIVE_DEFS_CHECK
 866    too, for checking purposes.  */
 867
 868 static void
 869 register_active_defs (df_ref *use_rec)
 870 {
 871   while (*use_rec)
 872     {
 873       df_ref use = *use_rec++;
 874       df_ref def = get_def_for_use (use);
 875       int regno = DF_REF_REGNO (use);
 876
 877 #ifdef ENABLE_CHECKING
 878       sparseset_set_bit (active_defs_check, regno);
 879 #endif
 880       active_defs[regno] = def;
 881     }
 882 }
 883
 884
 885 /* Build the use->def links that we use to update the dataflow info
 886    for new uses.  Note that building the links is very cheap and if
 887    it were done earlier, they could be used to rule out invalid
 888    propagations (in addition to what is done in all_uses_available_at).
 889    I'm not doing this yet, though.  */
 890
 891 static void
 892 update_df_init (rtx def_insn, rtx insn)
 893 {
 894 #ifdef ENABLE_CHECKING
 895   sparseset_clear (active_defs_check);
 896 #endif
 897   register_active_defs (DF_INSN_USES (def_insn));
 898   register_active_defs (DF_INSN_USES (insn));
 899   register_active_defs (DF_INSN_EQ_USES (insn));
 900 }
 901
 902
 903 /* Update the USE_DEF_REF array for the given use, using the active definitions
 904    in the ACTIVE_DEFS array to match pseudos to their def. */
 905
 906 static inline void
 907 update_uses (df_ref *use_rec)
 908 {
 909   while (*use_rec)
 910     {
 911       df_ref use = *use_rec++;
 912       int regno = DF_REF_REGNO (use);
 913
 914       /* Set up the use-def chain.  */
 915       if (DF_REF_ID (use) >= (int) VEC_length (df_ref, use_def_ref))
 916         VEC_safe_grow_cleared (df_ref, heap, use_def_ref,
 917                                DF_REF_ID (use) + 1);
 918
 919 #ifdef ENABLE_CHECKING
 920       gcc_assert (sparseset_bit_p (active_defs_check, regno));
 921 #endif
 922       VEC_replace (df_ref, use_def_ref, DF_REF_ID (use), active_defs[regno]);
 923     }
 924 }
 925
 926
 927 /* Update the USE_DEF_REF array for the uses in INSN.  Only update note
 928    uses if NOTES_ONLY is true.  */
 929
 930 static void
 931 update_df (rtx insn, rtx note)
 932 {
 933   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
 934
 935   if (note)
 936     {
 937       df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE);
 938       df_notes_rescan (insn);
 939     }
 940   else
 941     {
 942       df_uses_create (&PATTERN (insn), insn, 0);
 943       df_insn_rescan (insn);
 944       update_uses (DF_INSN_INFO_USES (insn_info));
 945     }
 946
 947   update_uses (DF_INSN_INFO_EQ_USES (insn_info));
 948 }
 949
 950
 951 /* Try substituting NEW into LOC, which originated from forward propagation
 952    of USE's value from DEF_INSN.  SET_REG_EQUAL says whether we are
 953    substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
 954    new insn is not recognized.  Return whether the substitution was
 955    performed.  */
 956
 957 static bool
 958 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal)
 959 {
 960   rtx insn = DF_REF_INSN (use);
 961   rtx set = single_set (insn);
 962   rtx note = NULL_RTX;
 963   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
 964   int old_cost = 0;
 965   bool ok;
 966
 967   update_df_init (def_insn, insn);
 968
 969   /* forward_propagate_subreg may be operating on an instruction with
 970      multiple sets.  If so, assume the cost of the new instruction is
 971      not greater than the old one.  */
 972   if (set)
 973     old_cost = set_src_cost (SET_SRC (set), speed);
 974   if (dump_file)
 975     {
 976       fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
 977       print_inline_rtx (dump_file, *loc, 2);
 978       fprintf (dump_file, "\n with ");
 979       print_inline_rtx (dump_file, new_rtx, 2);
 980       fprintf (dump_file, "\n");
 981     }
 982
 983   validate_unshare_change (insn, loc, new_rtx, true);
 984   if (!verify_changes (0))
 985     {
 986       if (dump_file)
 987         fprintf (dump_file, "Changes to insn %d not recognized\n",
 988                  INSN_UID (insn));
 989       ok = false;
 990     }
 991
 992   else if (DF_REF_TYPE (use) == DF_REF_REG_USE
 993            && set
 994            && set_src_cost (SET_SRC (set), speed) > old_cost)
 995     {
 996       if (dump_file)
 997         fprintf (dump_file, "Changes to insn %d not profitable\n",
 998                  INSN_UID (insn));
 999       ok = false;
1000     }
1001
1002   else
1003     {
1004       if (dump_file)
1005         fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
1006       ok = true;
1007     }
1008
1009   if (ok)
1010     {
1011       confirm_change_group ();
1012       num_changes++;
1013     }
1014   else
1015     {
1016       cancel_changes (0);
1017
1018       /* Can also record a simplified value in a REG_EQUAL note,
1019          making a new one if one does not already exist.  */
1020       if (set_reg_equal)
1021         {
1022           if (dump_file)
1023             fprintf (dump_file, " Setting REG_EQUAL note\n");
1024
1025           note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
1026         }
1027     }
1028
1029   if ((ok || note) && !CONSTANT_P (new_rtx))
1030     update_df (insn, note);
1031
1032   return ok;
1033 }
1034
1035 /* For the given single_set INSN, containing SRC known to be a
1036    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
1037    is redundant due to the register being set by a LOAD_EXTEND_OP
1038    load from memory.  */
1039
1040 static bool
1041 free_load_extend (rtx src, rtx insn)
1042 {
1043   rtx reg;
1044   df_ref *use_vec;
1045   df_ref use = 0, def;
1046
1047   reg = XEXP (src, 0);
1048 #ifdef LOAD_EXTEND_OP
1049   if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src))
1050 #endif
1051     return false;
1052
1053   for (use_vec = DF_INSN_USES (insn); *use_vec; use_vec++)
1054     {
1055       use = *use_vec;
1056
1057       if (!DF_REF_IS_ARTIFICIAL (use)
1058           && DF_REF_TYPE (use) == DF_REF_REG_USE
1059           && DF_REF_REG (use) == reg)
1060         break;
1061     }
1062   if (!use)
1063     return false;
1064
1065   def = get_def_for_use (use);
1066   if (!def)
1067     return false;
1068
1069   if (DF_REF_IS_ARTIFICIAL (def))
1070     return false;
1071
1072   if (NONJUMP_INSN_P (DF_REF_INSN (def)))
1073     {
1074       rtx patt = PATTERN (DF_REF_INSN (def));
1075
1076       if (GET_CODE (patt) == SET
1077           && GET_CODE (SET_SRC (patt)) == MEM
1078           && rtx_equal_p (SET_DEST (patt), reg))
1079         return true;
1080     }
1081   return false;
1082 }
1083
1084 /* If USE is a subreg, see if it can be replaced by a pseudo.  */
1085
1086 static bool
1087 forward_propagate_subreg (df_ref use, rtx def_insn, rtx def_set)
1088 {
1089   rtx use_reg = DF_REF_REG (use);
1090   rtx use_insn, src;
1091
1092   /* Only consider subregs... */
1093   enum machine_mode use_mode = GET_MODE (use_reg);
1094   if (GET_CODE (use_reg) != SUBREG
1095       || !REG_P (SET_DEST (def_set)))
1096     return false;
1097
1098   /* If this is a paradoxical SUBREG...  */
1099   if (GET_MODE_SIZE (use_mode)
1100       > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
1101     {
1102       /* If this is a paradoxical SUBREG, we have no idea what value the
1103          extra bits would have.  However, if the operand is equivalent to
1104          a SUBREG whose operand is the same as our mode, and all the modes
1105          are within a word, we can just use the inner operand because
1106          these SUBREGs just say how to treat the register.  */
1107       use_insn = DF_REF_INSN (use);
1108       src = SET_SRC (def_set);
1109       if (GET_CODE (src) == SUBREG
1110           && REG_P (SUBREG_REG (src))
1111           && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
1112           && GET_MODE (SUBREG_REG (src)) == use_mode
1113           && subreg_lowpart_p (src)
1114           && all_uses_available_at (def_insn, use_insn))
1115         return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
1116                                  def_insn, false);
1117     }
1118
1119   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
1120      is the low part of the reg being extended then just use the inner
1121      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
1122      be removed due to it matching a LOAD_EXTEND_OP load from memory,
1123      or due to the operation being a no-op when applied to registers.
1124      For example, if we have:
1125
1126          A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
1127          B: (... (subreg:SI (reg:DI X)) ...)
1128
1129      and mode_rep_extended says that Y is already sign-extended,
1130      the backend will typically allow A to be combined with the
1131      definition of Y or, failing that, allow A to be deleted after
1132      reload through register tying.  Introducing more uses of Y
1133      prevents both optimisations.  */
1134   else if (subreg_lowpart_p (use_reg))
1135     {
1136       use_insn = DF_REF_INSN (use);
1137       src = SET_SRC (def_set);
1138       if ((GET_CODE (src) == ZERO_EXTEND
1139            || GET_CODE (src) == SIGN_EXTEND)
1140           && REG_P (XEXP (src, 0))
1141           && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
1142           && GET_MODE (XEXP (src, 0)) == use_mode
1143           && !free_load_extend (src, def_insn)
1144           && (targetm.mode_rep_extended (use_mode, GET_MODE (src))
1145               != (int) GET_CODE (src))
1146           && all_uses_available_at (def_insn, use_insn))
1147         return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
1148                                  def_insn, false);
1149     }
1150
1151   return false;
1152 }
1153
1154 /* Try to replace USE with SRC (defined in DEF_INSN) in __asm.  */
1155
1156 static bool
1157 forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg)
1158 {
1159   rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc;
1160   int speed_p, i;
1161   df_ref *use_vec;
1162
1163   gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
1164
1165   src = SET_SRC (def_set);
1166   use_pat = PATTERN (use_insn);
1167
1168   /* In __asm don't replace if src might need more registers than
1169      reg, as that could increase register pressure on the __asm.  */
1170   use_vec = DF_INSN_USES (def_insn);
1171   if (use_vec[0] && use_vec[1])
1172     return false;
1173
1174   update_df_init (def_insn, use_insn);
1175   speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
1176   asm_operands = NULL_RTX;
1177   switch (GET_CODE (use_pat))
1178     {
1179     case ASM_OPERANDS:
1180       asm_operands = use_pat;
1181       break;
1182     case SET:
1183       if (MEM_P (SET_DEST (use_pat)))
1184         {
1185           loc = &SET_DEST (use_pat);
1186           new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1187           if (new_rtx)
1188             validate_unshare_change (use_insn, loc, new_rtx, true);
1189         }
1190       asm_operands = SET_SRC (use_pat);
1191       break;
1192     case PARALLEL:
1193       for (i = 0; i < XVECLEN (use_pat, 0); i++)
1194         if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1195           {
1196             if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1197               {
1198                 loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1199                 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1200                                          src, speed_p);
1201                 if (new_rtx)
1202                   validate_unshare_change (use_insn, loc, new_rtx, true);
1203               }
1204             asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1205           }
1206         else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1207           asm_operands = XVECEXP (use_pat, 0, i);
1208       break;
1209     default:
1210       gcc_unreachable ();
1211     }
1212
1213   gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1214   for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1215     {
1216       loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1217       new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1218       if (new_rtx)
1219         validate_unshare_change (use_insn, loc, new_rtx, true);
1220     }
1221
1222   if (num_changes_pending () == 0 || !apply_change_group ())
1223     return false;
1224
1225   update_df (use_insn, NULL);
1226   num_changes++;
1227   return true;
1228 }
1229
1230 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1231    result.  */
1232
1233 static bool
1234 forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set)
1235 {
1236   rtx use_insn = DF_REF_INSN (use);
1237   rtx use_set = single_set (use_insn);
1238   rtx src, reg, new_rtx, *loc;
1239   bool set_reg_equal;
1240   enum machine_mode mode;
1241   int asm_use = -1;
1242
1243   if (INSN_CODE (use_insn) < 0)
1244     asm_use = asm_noperands (PATTERN (use_insn));
1245
1246   if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
1247     return false;
1248
1249   /* Do not propagate into PC, CC0, etc.  */
1250   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1251     return false;
1252
1253   /* If def and use are subreg, check if they match.  */
1254   reg = DF_REF_REG (use);
1255   if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG)
1256     {
1257       if (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg))
1258         return false;
1259     }
1260   /* Check if the def had a subreg, but the use has the whole reg.  */
1261   else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1262     return false;
1263   /* Check if the use has a subreg, but the def had the whole reg.  Unlike the
1264      previous case, the optimization is possible and often useful indeed.  */
1265   else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1266     reg = SUBREG_REG (reg);
1267
1268   /* Make sure that we can treat REG as having the same mode as the
1269      source of DEF_SET.  */
1270   if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))
1271     return false;
1272
1273   /* Check if the substitution is valid (last, because it's the most
1274      expensive check!).  */
1275   src = SET_SRC (def_set);
1276   if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1277     return false;
1278
1279   /* Check if the def is loading something from the constant pool; in this
1280      case we would undo optimization such as compress_float_constant.
1281      Still, we can set a REG_EQUAL note.  */
1282   if (MEM_P (src) && MEM_READONLY_P (src))
1283     {
1284       rtx x = avoid_constant_pool_reference (src);
1285       if (x != src && use_set)
1286         {
1287           rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1288           rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1289           rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1290           if (old_rtx != new_rtx)
1291             set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1292         }
1293       return false;
1294     }
1295
1296   if (asm_use >= 0)
1297     return forward_propagate_asm (use, def_insn, def_set, reg);
1298
1299   /* Else try simplifying.  */
1300
1301   if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1302     {
1303       loc = &SET_DEST (use_set);
1304       set_reg_equal = false;
1305     }
1306   else if (!use_set)
1307     {
1308       loc = &INSN_VAR_LOCATION_LOC (use_insn);
1309       set_reg_equal = false;
1310     }
1311   else
1312     {
1313       rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1314       if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1315         loc = &XEXP (note, 0);
1316       else
1317         loc = &SET_SRC (use_set);
1318
1319       /* Do not replace an existing REG_EQUAL note if the insn is not
1320          recognized.  Either we're already replacing in the note, or we'll
1321          separately try plugging the definition in the note and simplifying.
1322          And only install a REQ_EQUAL note when the destination is a REG,
1323          as the note would be invalid otherwise.  */
1324       set_reg_equal = (note == NULL_RTX && REG_P (SET_DEST (use_set)));
1325     }
1326
1327   if (GET_MODE (*loc) == VOIDmode)
1328     mode = GET_MODE (SET_DEST (use_set));
1329   else
1330     mode = GET_MODE (*loc);
1331
1332   new_rtx = propagate_rtx (*loc, mode, reg, src,
1333                            optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1334
1335   if (!new_rtx)
1336     return false;
1337
1338   return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1339 }
1340
1341
1342 /* Given a use USE of an insn, if it has a single reaching
1343    definition, try to forward propagate it into that insn.
1344    Return true if cfg cleanup will be needed.  */
1345
1346 static bool
1347 forward_propagate_into (df_ref use)
1348 {
1349   df_ref def;
1350   rtx def_insn, def_set, use_insn;
1351   rtx parent;
1352
1353   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1354     return false;
1355   if (DF_REF_IS_ARTIFICIAL (use))
1356     return false;
1357
1358   /* Only consider uses that have a single definition.  */
1359   def = get_def_for_use (use);
1360   if (!def)
1361     return false;
1362   if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1363     return false;
1364   if (DF_REF_IS_ARTIFICIAL (def))
1365     return false;
1366
1367   /* Do not propagate loop invariant definitions inside the loop.  */
1368   if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1369     return false;
1370
1371   /* Check if the use is still present in the insn!  */
1372   use_insn = DF_REF_INSN (use);
1373   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1374     parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1375   else
1376     parent = PATTERN (use_insn);
1377
1378   if (!reg_mentioned_p (DF_REF_REG (use), parent))
1379     return false;
1380
1381   def_insn = DF_REF_INSN (def);
1382   if (multiple_sets (def_insn))
1383     return false;
1384   def_set = single_set (def_insn);
1385   if (!def_set)
1386     return false;
1387
1388   /* Only try one kind of propagation.  If two are possible, we'll
1389      do it on the following iterations.  */
1390   if (forward_propagate_and_simplify (use, def_insn, def_set)
1391       || forward_propagate_subreg (use, def_insn, def_set))
1392     {
1393       if (cfun->can_throw_non_call_exceptions
1394           && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX)
1395           && purge_dead_edges (DF_REF_BB (use)))
1396         return true;
1397     }
1398   return false;
1399 }
1400
1401 \f
1402 static void
1403 fwprop_init (void)
1404 {
1405   num_changes = 0;
1406   calculate_dominance_info (CDI_DOMINATORS);
1407
1408   /* We do not always want to propagate into loops, so we have to find
1409      loops and be careful about them.  But we have to call flow_loops_find
1410      before df_analyze, because flow_loops_find may introduce new jump
1411      insns (sadly) if we are not working in cfglayout mode.  */
1412   loop_optimizer_init (0);
1413
1414   build_single_def_use_links ();
1415   df_set_flags (DF_DEFER_INSN_RESCAN);
1416
1417   active_defs = XNEWVEC (df_ref, max_reg_num ());
1418 #ifdef ENABLE_CHECKING
1419   active_defs_check = sparseset_alloc (max_reg_num ());
1420 #endif
1421 }
1422
1423 static void
1424 fwprop_done (void)
1425 {
1426   loop_optimizer_finalize ();
1427
1428   VEC_free (df_ref, heap, use_def_ref);
1429   free (active_defs);
1430 #ifdef ENABLE_CHECKING
1431   sparseset_free (active_defs_check);
1432 #endif
1433
1434   free_dominance_info (CDI_DOMINATORS);
1435   cleanup_cfg (0);
1436   delete_trivially_dead_insns (get_insns (), max_reg_num ());
1437
1438   if (dump_file)
1439     fprintf (dump_file,
1440              "\nNumber of successful forward propagations: %d\n\n",
1441              num_changes);
1442 }
1443
1444
1445 /* Main entry point.  */
1446
1447 static bool
1448 gate_fwprop (void)
1449 {
1450   return optimize > 0 && flag_forward_propagate;
1451 }
1452
1453 static unsigned int
1454 fwprop (void)
1455 {
1456   unsigned i;
1457   bool need_cleanup = false;
1458
1459   fwprop_init ();
1460
1461   /* Go through all the uses.  df_uses_create will create new ones at the
1462      end, and we'll go through them as well.
1463
1464      Do not forward propagate addresses into loops until after unrolling.
1465      CSE did so because it was able to fix its own mess, but we are not.  */
1466
1467   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1468     {
1469       df_ref use = DF_USES_GET (i);
1470       if (use)
1471         if (DF_REF_TYPE (use) == DF_REF_REG_USE
1472             || DF_REF_BB (use)->loop_father == NULL
1473             /* The outer most loop is not really a loop.  */
1474             || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1475           need_cleanup |= forward_propagate_into (use);
1476     }
1477
1478   fwprop_done ();
1479   if (need_cleanup)
1480     cleanup_cfg (0);
1481   return 0;
1482 }
1483
1484 struct rtl_opt_pass pass_rtl_fwprop =
1485 {
1486  {
1487   RTL_PASS,
1488   "fwprop1",                            /* name */
1489   gate_fwprop,                          /* gate */
1490   fwprop,                               /* execute */
1491   NULL,                                 /* sub */
1492   NULL,                                 /* next */
1493   0,                                    /* static_pass_number */
1494   TV_FWPROP,                            /* tv_id */
1495   0,                                    /* properties_required */
1496   0,                                    /* properties_provided */
1497   0,                                    /* properties_destroyed */
1498   0,                                    /* todo_flags_start */
1499   TODO_df_finish
1500     | TODO_verify_flow
1501     | TODO_verify_rtl_sharing           /* todo_flags_finish */
1502  }
1503 };
1504
1505 static unsigned int
1506 fwprop_addr (void)
1507 {
1508   unsigned i;
1509   bool need_cleanup = false;
1510
1511   fwprop_init ();
1512
1513   /* Go through all the uses.  df_uses_create will create new ones at the
1514      end, and we'll go through them as well.  */
1515   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1516     {
1517       df_ref use = DF_USES_GET (i);
1518       if (use)
1519         if (DF_REF_TYPE (use) != DF_REF_REG_USE
1520             && DF_REF_BB (use)->loop_father != NULL
1521             /* The outer most loop is not really a loop.  */
1522             && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1523           need_cleanup |= forward_propagate_into (use);
1524     }
1525
1526   fwprop_done ();
1527
1528   if (need_cleanup)
1529     cleanup_cfg (0);
1530   return 0;
1531 }
1532
1533 struct rtl_opt_pass pass_rtl_fwprop_addr =
1534 {
1535  {
1536   RTL_PASS,
1537   "fwprop2",                            /* name */
1538   gate_fwprop,                          /* gate */
1539   fwprop_addr,                          /* execute */
1540   NULL,                                 /* sub */
1541   NULL,                                 /* next */
1542   0,                                    /* static_pass_number */
1543   TV_FWPROP,                            /* tv_id */
1544   0,                                    /* properties_required */
1545   0,                                    /* properties_provided */
1546   0,                                    /* properties_destroyed */
1547   0,                                    /* todo_flags_start */
1548   TODO_df_finish | TODO_verify_rtl_sharing  /* todo_flags_finish */
1549  }
1550 };