gcc/fwprop.c

   1 /* RTL-based forward propagation pass for GNU compiler.
   2    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4    Contributed by Paolo Bonzini and Steven Bosscher.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27
  28 #include "sparseset.h"
  29 #include "timevar.h"
  30 #include "rtl.h"
  31 #include "tm_p.h"
  32 #include "insn-config.h"
  33 #include "recog.h"
  34 #include "flags.h"
  35 #include "obstack.h"
  36 #include "basic-block.h"
  37 #include "df.h"
  38 #include "target.h"
  39 #include "cfgloop.h"
  40 #include "tree-pass.h"
  41 #include "domwalk.h"
  42 #include "emit-rtl.h"
  43
  44
  45 /* This pass does simple forward propagation and simplification when an
  46    operand of an insn can only come from a single def.  This pass uses
  47    df.c, so it is global.  However, we only do limited analysis of
  48    available expressions.
  49
  50    1) The pass tries to propagate the source of the def into the use,
  51    and checks if the result is independent of the substituted value.
  52    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
  53    zero, independent of the source register.
  54
  55    In particular, we propagate constants into the use site.  Sometimes
  56    RTL expansion did not put the constant in the same insn on purpose,
  57    to satisfy a predicate, and the result will fail to be recognized;
  58    but this happens rarely and in this case we can still create a
  59    REG_EQUAL note.  For multi-word operations, this
  60
  61       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
  62       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
  63       (set (subreg:SI (reg:DI 122) 0)
  64          (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
  65       (set (subreg:SI (reg:DI 122) 4)
  66          (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
  67
  68    can be simplified to the much simpler
  69
  70       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
  71       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
  72
  73    This particular propagation is also effective at putting together
  74    complex addressing modes.  We are more aggressive inside MEMs, in
  75    that all definitions are propagated if the use is in a MEM; if the
  76    result is a valid memory address we check address_cost to decide
  77    whether the substitution is worthwhile.
  78
  79    2) The pass propagates register copies.  This is not as effective as
  80    the copy propagation done by CSE's canon_reg, which works by walking
  81    the instruction chain, it can help the other transformations.
  82
  83    We should consider removing this optimization, and instead reorder the
  84    RTL passes, because GCSE does this transformation too.  With some luck,
  85    the CSE pass at the end of rest_of_handle_gcse could also go away.
  86
  87    3) The pass looks for paradoxical subregs that are actually unnecessary.
  88    Things like this:
  89
  90      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
  91      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
  92      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
  93                                 (subreg:SI (reg:QI 121) 0)))
  94
  95    are very common on machines that can only do word-sized operations.
  96    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
  97    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
  98    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
  99    above will simplify this to
 100
 101      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
 102      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
 103      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
 104
 105    where the first two insns are now dead.
 106
 107    We used to use reaching definitions to find which uses have a
 108    single reaching definition (sounds obvious...), but this is too
 109    complex a problem in nasty testcases like PR33928.  Now we use the
 110    multiple definitions problem in df-problems.c.  The similarity
 111    between that problem and SSA form creation is taken further, in
 112    that fwprop does a dominator walk to create its chains; however,
 113    instead of creating a PHI function where multiple definitions meet
 114    I just punt and record only singleton use-def chains, which is
 115    all that is needed by fwprop.  */
 116
 117
 118 static int num_changes;
 119
 120 DEF_VEC_P(df_ref);
 121 DEF_VEC_ALLOC_P(df_ref,heap);
 122 static VEC(df_ref,heap) *use_def_ref;
 123 static VEC(df_ref,heap) *reg_defs;
 124 static VEC(df_ref,heap) *reg_defs_stack;
 125
 126 /* The MD bitmaps are trimmed to include only live registers to cut
 127    memory usage on testcases like insn-recog.c.  Track live registers
 128    in the basic block and do not perform forward propagation if the
 129    destination is a dead pseudo occurring in a note.  */
 130 static bitmap local_md;
 131 static bitmap local_lr;
 132
 133 /* Return the only def in USE's use-def chain, or NULL if there is
 134    more than one def in the chain.  */
 135
 136 static inline df_ref
 137 get_def_for_use (df_ref use)
 138 {
 139   return VEC_index (df_ref, use_def_ref, DF_REF_ID (use));
 140 }
 141
 142
 143 /* Update the reg_defs vector with non-partial definitions in DEF_REC.
 144    TOP_FLAG says which artificials uses should be used, when DEF_REC
 145    is an artificial def vector.  LOCAL_MD is modified as after a
 146    df_md_simulate_* function; we do more or less the same processing
 147    done there, so we do not use those functions.  */
 148
 149 #define DF_MD_GEN_FLAGS \
 150         (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
 151
 152 static void
 153 process_defs (df_ref *def_rec, int top_flag)
 154 {
 155   df_ref def;
 156   while ((def = *def_rec++) != NULL)
 157     {
 158       df_ref curr_def = VEC_index (df_ref, reg_defs, DF_REF_REGNO (def));
 159       unsigned int dregno;
 160
 161       if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
 162         continue;
 163
 164       dregno = DF_REF_REGNO (def);
 165       if (curr_def)
 166         VEC_safe_push (df_ref, heap, reg_defs_stack, curr_def);
 167       else
 168         {
 169           /* Do not store anything if "transitioning" from NULL to NULL.  But
 170              otherwise, push a special entry on the stack to tell the
 171              leave_block callback that the entry in reg_defs was NULL.  */
 172           if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 173             ;
 174           else
 175             VEC_safe_push (df_ref, heap, reg_defs_stack, def);
 176         }
 177
 178       if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 179         {
 180           bitmap_set_bit (local_md, dregno);
 181           VEC_replace (df_ref, reg_defs, dregno, NULL);
 182         }
 183       else
 184         {
 185           bitmap_clear_bit (local_md, dregno);
 186           VEC_replace (df_ref, reg_defs, dregno, def);
 187         }
 188     }
 189 }
 190
 191
 192 /* Fill the use_def_ref vector with values for the uses in USE_REC,
 193    taking reaching definitions info from LOCAL_MD and REG_DEFS.
 194    TOP_FLAG says which artificials uses should be used, when USE_REC
 195    is an artificial use vector.  */
 196
 197 static void
 198 process_uses (df_ref *use_rec, int top_flag)
 199 {
 200   df_ref use;
 201   while ((use = *use_rec++) != NULL)
 202     if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
 203       {
 204         unsigned int uregno = DF_REF_REGNO (use);
 205         if (VEC_index (df_ref, reg_defs, uregno)
 206             && !bitmap_bit_p (local_md, uregno)
 207             && bitmap_bit_p (local_lr, uregno))
 208           VEC_replace (df_ref, use_def_ref, DF_REF_ID (use),
 209                        VEC_index (df_ref, reg_defs, uregno));
 210       }
 211 }
 212
 213
 214 static void
 215 single_def_use_enter_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 216                             basic_block bb)
 217 {
 218   int bb_index = bb->index;
 219   struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
 220   struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
 221   rtx insn;
 222
 223   bitmap_copy (local_md, &md_bb_info->in);
 224   bitmap_copy (local_lr, &lr_bb_info->in);
 225
 226   /* Push a marker for the leave_block callback.  */
 227   VEC_safe_push (df_ref, heap, reg_defs_stack, NULL);
 228
 229   process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
 230   process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
 231
 232   /* We don't call df_simulate_initialize_forwards, as it may overestimate
 233      the live registers if there are unused artificial defs.  We prefer
 234      liveness to be underestimated.  */
 235
 236   FOR_BB_INSNS (bb, insn)
 237     if (INSN_P (insn))
 238       {
 239         unsigned int uid = INSN_UID (insn);
 240         process_uses (DF_INSN_UID_USES (uid), 0);
 241         process_uses (DF_INSN_UID_EQ_USES (uid), 0);
 242         process_defs (DF_INSN_UID_DEFS (uid), 0);
 243         df_simulate_one_insn_forwards (bb, insn, local_lr);
 244       }
 245
 246   process_uses (df_get_artificial_uses (bb_index), 0);
 247   process_defs (df_get_artificial_defs (bb_index), 0);
 248 }
 249
 250 /* Pop the definitions created in this basic block when leaving its
 251    dominated parts.  */
 252
 253 static void
 254 single_def_use_leave_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 255                             basic_block bb ATTRIBUTE_UNUSED)
 256 {
 257   df_ref saved_def;
 258   while ((saved_def = VEC_pop (df_ref, reg_defs_stack)) != NULL)
 259     {
 260       unsigned int dregno = DF_REF_REGNO (saved_def);
 261
 262       /* See also process_defs.  */
 263       if (saved_def == VEC_index (df_ref, reg_defs, dregno))
 264         VEC_replace (df_ref, reg_defs, dregno, NULL);
 265       else
 266         VEC_replace (df_ref, reg_defs, dregno, saved_def);
 267     }
 268 }
 269
 270
 271 /* Build a vector holding the reaching definitions of uses reached by a
 272    single dominating definition.  */
 273
 274 static void
 275 build_single_def_use_links (void)
 276 {
 277   struct dom_walk_data walk_data;
 278
 279   /* We use the multiple definitions problem to compute our restricted
 280      use-def chains.  */
 281   df_set_flags (DF_EQ_NOTES);
 282   df_md_add_problem ();
 283   df_note_add_problem ();
 284   df_analyze ();
 285   df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
 286
 287   use_def_ref = VEC_alloc (df_ref, heap, DF_USES_TABLE_SIZE ());
 288   VEC_safe_grow_cleared (df_ref, heap, use_def_ref, DF_USES_TABLE_SIZE ());
 289
 290   reg_defs = VEC_alloc (df_ref, heap, max_reg_num ());
 291   VEC_safe_grow_cleared (df_ref, heap, reg_defs, max_reg_num ());
 292
 293   reg_defs_stack = VEC_alloc (df_ref, heap, n_basic_blocks * 10);
 294   local_md = BITMAP_ALLOC (NULL);
 295   local_lr = BITMAP_ALLOC (NULL);
 296
 297   /* Walk the dominator tree looking for single reaching definitions
 298      dominating the uses.  This is similar to how SSA form is built.  */
 299   walk_data.dom_direction = CDI_DOMINATORS;
 300   walk_data.initialize_block_local_data = NULL;
 301   walk_data.before_dom_children = single_def_use_enter_block;
 302   walk_data.after_dom_children = single_def_use_leave_block;
 303
 304   init_walk_dominator_tree (&walk_data);
 305   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
 306   fini_walk_dominator_tree (&walk_data);
 307
 308   BITMAP_FREE (local_lr);
 309   BITMAP_FREE (local_md);
 310   VEC_free (df_ref, heap, reg_defs);
 311   VEC_free (df_ref, heap, reg_defs_stack);
 312 }
 313
 314 \f
 315 /* Do not try to replace constant addresses or addresses of local and
 316    argument slots.  These MEM expressions are made only once and inserted
 317    in many instructions, as well as being used to control symbol table
 318    output.  It is not safe to clobber them.
 319
 320    There are some uncommon cases where the address is already in a register
 321    for some reason, but we cannot take advantage of that because we have
 322    no easy way to unshare the MEM.  In addition, looking up all stack
 323    addresses is costly.  */
 324
 325 static bool
 326 can_simplify_addr (rtx addr)
 327 {
 328   rtx reg;
 329
 330   if (CONSTANT_ADDRESS_P (addr))
 331     return false;
 332
 333   if (GET_CODE (addr) == PLUS)
 334     reg = XEXP (addr, 0);
 335   else
 336     reg = addr;
 337
 338   return (!REG_P (reg)
 339           || (REGNO (reg) != FRAME_POINTER_REGNUM
 340               && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
 341               && REGNO (reg) != ARG_POINTER_REGNUM));
 342 }
 343
 344 /* Returns a canonical version of X for the address, from the point of view,
 345    that all multiplications are represented as MULT instead of the multiply
 346    by a power of 2 being represented as ASHIFT.
 347
 348    Every ASHIFT we find has been made by simplify_gen_binary and was not
 349    there before, so it is not shared.  So we can do this in place.  */
 350
 351 static void
 352 canonicalize_address (rtx x)
 353 {
 354   for (;;)
 355     switch (GET_CODE (x))
 356       {
 357       case ASHIFT:
 358         if (CONST_INT_P (XEXP (x, 1))
 359             && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
 360             && INTVAL (XEXP (x, 1)) >= 0)
 361           {
 362             HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
 363             PUT_CODE (x, MULT);
 364             XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
 365                                         GET_MODE (x));
 366           }
 367
 368         x = XEXP (x, 0);
 369         break;
 370
 371       case PLUS:
 372         if (GET_CODE (XEXP (x, 0)) == PLUS
 373             || GET_CODE (XEXP (x, 0)) == ASHIFT
 374             || GET_CODE (XEXP (x, 0)) == CONST)
 375           canonicalize_address (XEXP (x, 0));
 376
 377         x = XEXP (x, 1);
 378         break;
 379
 380       case CONST:
 381         x = XEXP (x, 0);
 382         break;
 383
 384       default:
 385         return;
 386       }
 387 }
 388
 389 /* OLD is a memory address.  Return whether it is good to use NEW instead,
 390    for a memory access in the given MODE.  */
 391
 392 static bool
 393 should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode,
 394                         addr_space_t as, bool speed)
 395 {
 396   int gain;
 397
 398   if (rtx_equal_p (old_rtx, new_rtx)
 399       || !memory_address_addr_space_p (mode, new_rtx, as))
 400     return false;
 401
 402   /* Copy propagation is always ok.  */
 403   if (REG_P (old_rtx) && REG_P (new_rtx))
 404     return true;
 405
 406   /* Prefer the new address if it is less expensive.  */
 407   gain = (address_cost (old_rtx, mode, as, speed)
 408           - address_cost (new_rtx, mode, as, speed));
 409
 410   /* If the addresses have equivalent cost, prefer the new address
 411      if it has the highest `set_src_cost'.  That has the potential of
 412      eliminating the most insns without additional costs, and it
 413      is the same that cse.c used to do.  */
 414   if (gain == 0)
 415     gain = set_src_cost (new_rtx, speed) - set_src_cost (old_rtx, speed);
 416
 417   return (gain > 0);
 418 }
 419
 420
 421 /* Flags for the last parameter of propagate_rtx_1.  */
 422
 423 enum {
 424   /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
 425      if it is false, propagate_rtx_1 returns false if, for at least
 426      one occurrence OLD, it failed to collapse the result to a constant.
 427      For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
 428      collapse to zero if replacing (reg:M B) with (reg:M A).
 429
 430      PR_CAN_APPEAR is disregarded inside MEMs: in that case,
 431      propagate_rtx_1 just tries to make cheaper and valid memory
 432      addresses.  */
 433   PR_CAN_APPEAR = 1,
 434
 435   /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
 436      outside memory addresses.  This is needed because propagate_rtx_1 does
 437      not do any analysis on memory; thus it is very conservative and in general
 438      it will fail if non-read-only MEMs are found in the source expression.
 439
 440      PR_HANDLE_MEM is set when the source of the propagation was not
 441      another MEM.  Then, it is safe not to treat non-read-only MEMs as
 442      ``opaque'' objects.  */
 443   PR_HANDLE_MEM = 2,
 444
 445   /* Set when costs should be optimized for speed.  */
 446   PR_OPTIMIZE_FOR_SPEED = 4
 447 };
 448
 449
 450 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
 451    resulting expression.  Replace *PX with a new RTL expression if an
 452    occurrence of OLD was found.
 453
 454    This is only a wrapper around simplify-rtx.c: do not add any pattern
 455    matching code here.  (The sole exception is the handling of LO_SUM, but
 456    that is because there is no simplify_gen_* function for LO_SUM).  */
 457
 458 static bool
 459 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 460 {
 461   rtx x = *px, tem = NULL_RTX, op0, op1, op2;
 462   enum rtx_code code = GET_CODE (x);
 463   enum machine_mode mode = GET_MODE (x);
 464   enum machine_mode op_mode;
 465   bool can_appear = (flags & PR_CAN_APPEAR) != 0;
 466   bool valid_ops = true;
 467
 468   if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
 469     {
 470       /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
 471          they have side effects or not).  */
 472       *px = (side_effects_p (x)
 473              ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
 474              : gen_rtx_SCRATCH (GET_MODE (x)));
 475       return false;
 476     }
 477
 478   /* If X is OLD_RTX, return NEW_RTX.  But not if replacing only within an
 479      address, and we are *not* inside one.  */
 480   if (x == old_rtx)
 481     {
 482       *px = new_rtx;
 483       return can_appear;
 484     }
 485
 486   /* If this is an expression, try recursive substitution.  */
 487   switch (GET_RTX_CLASS (code))
 488     {
 489     case RTX_UNARY:
 490       op0 = XEXP (x, 0);
 491       op_mode = GET_MODE (op0);
 492       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 493       if (op0 == XEXP (x, 0))
 494         return true;
 495       tem = simplify_gen_unary (code, mode, op0, op_mode);
 496       break;
 497
 498     case RTX_BIN_ARITH:
 499     case RTX_COMM_ARITH:
 500       op0 = XEXP (x, 0);
 501       op1 = XEXP (x, 1);
 502       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 503       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 504       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 505         return true;
 506       tem = simplify_gen_binary (code, mode, op0, op1);
 507       break;
 508
 509     case RTX_COMPARE:
 510     case RTX_COMM_COMPARE:
 511       op0 = XEXP (x, 0);
 512       op1 = XEXP (x, 1);
 513       op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
 514       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 515       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 516       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 517         return true;
 518       tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
 519       break;
 520
 521     case RTX_TERNARY:
 522     case RTX_BITFIELD_OPS:
 523       op0 = XEXP (x, 0);
 524       op1 = XEXP (x, 1);
 525       op2 = XEXP (x, 2);
 526       op_mode = GET_MODE (op0);
 527       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 528       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 529       valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
 530       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
 531         return true;
 532       if (op_mode == VOIDmode)
 533         op_mode = GET_MODE (op0);
 534       tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
 535       break;
 536
 537     case RTX_EXTRA:
 538       /* The only case we try to handle is a SUBREG.  */
 539       if (code == SUBREG)
 540         {
 541           op0 = XEXP (x, 0);
 542           valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 543           if (op0 == XEXP (x, 0))
 544             return true;
 545           tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
 546                                      SUBREG_BYTE (x));
 547         }
 548       break;
 549
 550     case RTX_OBJ:
 551       if (code == MEM && x != new_rtx)
 552         {
 553           rtx new_op0;
 554           op0 = XEXP (x, 0);
 555
 556           /* There are some addresses that we cannot work on.  */
 557           if (!can_simplify_addr (op0))
 558             return true;
 559
 560           op0 = new_op0 = targetm.delegitimize_address (op0);
 561           valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
 562                                         flags | PR_CAN_APPEAR);
 563
 564           /* Dismiss transformation that we do not want to carry on.  */
 565           if (!valid_ops
 566               || new_op0 == op0
 567               || !(GET_MODE (new_op0) == GET_MODE (op0)
 568                    || GET_MODE (new_op0) == VOIDmode))
 569             return true;
 570
 571           canonicalize_address (new_op0);
 572
 573           /* Copy propagations are always ok.  Otherwise check the costs.  */
 574           if (!(REG_P (old_rtx) && REG_P (new_rtx))
 575               && !should_replace_address (op0, new_op0, GET_MODE (x),
 576                                           MEM_ADDR_SPACE (x),
 577                                           flags & PR_OPTIMIZE_FOR_SPEED))
 578             return true;
 579
 580           tem = replace_equiv_address_nv (x, new_op0);
 581         }
 582
 583       else if (code == LO_SUM)
 584         {
 585           op0 = XEXP (x, 0);
 586           op1 = XEXP (x, 1);
 587
 588           /* The only simplification we do attempts to remove references to op0
 589              or make it constant -- in both cases, op0's invalidity will not
 590              make the result invalid.  */
 591           propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
 592           valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 593           if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 594             return true;
 595
 596           /* (lo_sum (high x) x) -> x  */
 597           if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
 598             tem = op1;
 599           else
 600             tem = gen_rtx_LO_SUM (mode, op0, op1);
 601
 602           /* OP1 is likely not a legitimate address, otherwise there would have
 603              been no LO_SUM.  We want it to disappear if it is invalid, return
 604              false in that case.  */
 605           return memory_address_p (mode, tem);
 606         }
 607
 608       else if (code == REG)
 609         {
 610           if (rtx_equal_p (x, old_rtx))
 611             {
 612               *px = new_rtx;
 613               return can_appear;
 614             }
 615         }
 616       break;
 617
 618     default:
 619       break;
 620     }
 621
 622   /* No change, no trouble.  */
 623   if (tem == NULL_RTX)
 624     return true;
 625
 626   *px = tem;
 627
 628   /* The replacement we made so far is valid, if all of the recursive
 629      replacements were valid, or we could simplify everything to
 630      a constant.  */
 631   return valid_ops || can_appear || CONSTANT_P (tem);
 632 }
 633
 634
 635 /* for_each_rtx traversal function that returns 1 if BODY points to
 636    a non-constant mem.  */
 637
 638 static int
 639 varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED)
 640 {
 641   rtx x = *body;
 642   return MEM_P (x) && !MEM_READONLY_P (x);
 643 }
 644
 645
 646 /* Replace all occurrences of OLD in X with NEW and try to simplify the
 647    resulting expression (in mode MODE).  Return a new expression if it is
 648    a constant, otherwise X.
 649
 650    Simplifications where occurrences of NEW collapse to a constant are always
 651    accepted.  All simplifications are accepted if NEW is a pseudo too.
 652    Otherwise, we accept simplifications that have a lower or equal cost.  */
 653
 654 static rtx
 655 propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx,
 656                bool speed)
 657 {
 658   rtx tem;
 659   bool collapsed;
 660   int flags;
 661
 662   if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
 663     return NULL_RTX;
 664
 665   flags = 0;
 666   if (REG_P (new_rtx)
 667       || CONSTANT_P (new_rtx)
 668       || (GET_CODE (new_rtx) == SUBREG
 669           && REG_P (SUBREG_REG (new_rtx))
 670           && (GET_MODE_SIZE (mode)
 671               <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (new_rtx))))))
 672     flags |= PR_CAN_APPEAR;
 673   if (!for_each_rtx (&new_rtx, varying_mem_p, NULL))
 674     flags |= PR_HANDLE_MEM;
 675
 676   if (speed)
 677     flags |= PR_OPTIMIZE_FOR_SPEED;
 678
 679   tem = x;
 680   collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
 681   if (tem == x || !collapsed)
 682     return NULL_RTX;
 683
 684   /* gen_lowpart_common will not be able to process VOIDmode entities other
 685      than CONST_INTs.  */
 686   if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
 687     return NULL_RTX;
 688
 689   if (GET_MODE (tem) == VOIDmode)
 690     tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
 691   else
 692     gcc_assert (GET_MODE (tem) == mode);
 693
 694   return tem;
 695 }
 696
 697
 698 \f
 699
 700 /* Return true if the register from reference REF is killed
 701    between FROM to (but not including) TO.  */
 702
 703 static bool
 704 local_ref_killed_between_p (df_ref ref, rtx from, rtx to)
 705 {
 706   rtx insn;
 707
 708   for (insn = from; insn != to; insn = NEXT_INSN (insn))
 709     {
 710       df_ref *def_rec;
 711       if (!INSN_P (insn))
 712         continue;
 713
 714       for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
 715         {
 716           df_ref def = *def_rec;
 717           if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
 718             return true;
 719         }
 720     }
 721   return false;
 722 }
 723
 724
 725 /* Check if the given DEF is available in INSN.  This would require full
 726    computation of available expressions; we check only restricted conditions:
 727    - if DEF is the sole definition of its register, go ahead;
 728    - in the same basic block, we check for no definitions killing the
 729      definition of DEF_INSN;
 730    - if USE's basic block has DEF's basic block as the sole predecessor,
 731      we check if the definition is killed after DEF_INSN or before
 732      TARGET_INSN insn, in their respective basic blocks.  */
 733 static bool
 734 use_killed_between (df_ref use, rtx def_insn, rtx target_insn)
 735 {
 736   basic_block def_bb = BLOCK_FOR_INSN (def_insn);
 737   basic_block target_bb = BLOCK_FOR_INSN (target_insn);
 738   int regno;
 739   df_ref def;
 740
 741   /* We used to have a def reaching a use that is _before_ the def,
 742      with the def not dominating the use even though the use and def
 743      are in the same basic block, when a register may be used
 744      uninitialized in a loop.  This should not happen anymore since
 745      we do not use reaching definitions, but still we test for such
 746      cases and assume that DEF is not available.  */
 747   if (def_bb == target_bb
 748       ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
 749       : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
 750     return true;
 751
 752   /* Check if the reg in USE has only one definition.  We already
 753      know that this definition reaches use, or we wouldn't be here.
 754      However, this is invalid for hard registers because if they are
 755      live at the beginning of the function it does not mean that we
 756      have an uninitialized access.  */
 757   regno = DF_REF_REGNO (use);
 758   def = DF_REG_DEF_CHAIN (regno);
 759   if (def
 760       && DF_REF_NEXT_REG (def) == NULL
 761       && regno >= FIRST_PSEUDO_REGISTER)
 762     return false;
 763
 764   /* Check locally if we are in the same basic block.  */
 765   if (def_bb == target_bb)
 766     return local_ref_killed_between_p (use, def_insn, target_insn);
 767
 768   /* Finally, if DEF_BB is the sole predecessor of TARGET_BB.  */
 769   if (single_pred_p (target_bb)
 770       && single_pred (target_bb) == def_bb)
 771     {
 772       df_ref x;
 773
 774       /* See if USE is killed between DEF_INSN and the last insn in the
 775          basic block containing DEF_INSN.  */
 776       x = df_bb_regno_last_def_find (def_bb, regno);
 777       if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
 778         return true;
 779
 780       /* See if USE is killed between TARGET_INSN and the first insn in the
 781          basic block containing TARGET_INSN.  */
 782       x = df_bb_regno_first_def_find (target_bb, regno);
 783       if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
 784         return true;
 785
 786       return false;
 787     }
 788
 789   /* Otherwise assume the worst case.  */
 790   return true;
 791 }
 792
 793
 794 /* Check if all uses in DEF_INSN can be used in TARGET_INSN.  This
 795    would require full computation of available expressions;
 796    we check only restricted conditions, see use_killed_between.  */
 797 static bool
 798 all_uses_available_at (rtx def_insn, rtx target_insn)
 799 {
 800   df_ref *use_rec;
 801   struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
 802   rtx def_set = single_set (def_insn);
 803
 804   gcc_assert (def_set);
 805
 806   /* If target_insn comes right after def_insn, which is very common
 807      for addresses, we can use a quicker test.  */
 808   if (NEXT_INSN (def_insn) == target_insn
 809       && REG_P (SET_DEST (def_set)))
 810     {
 811       rtx def_reg = SET_DEST (def_set);
 812
 813       /* If the insn uses the reg that it defines, the substitution is
 814          invalid.  */
 815       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 816         {
 817           df_ref use = *use_rec;
 818           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 819             return false;
 820         }
 821       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 822         {
 823           df_ref use = *use_rec;
 824           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 825             return false;
 826         }
 827     }
 828   else
 829     {
 830       rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
 831
 832       /* Look at all the uses of DEF_INSN, and see if they are not
 833          killed between DEF_INSN and TARGET_INSN.  */
 834       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 835         {
 836           df_ref use = *use_rec;
 837           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 838             return false;
 839           if (use_killed_between (use, def_insn, target_insn))
 840             return false;
 841         }
 842       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 843         {
 844           df_ref use = *use_rec;
 845           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 846             return false;
 847           if (use_killed_between (use, def_insn, target_insn))
 848             return false;
 849         }
 850     }
 851
 852   return true;
 853 }
 854
 855 \f
 856 static df_ref *active_defs;
 857 #ifdef ENABLE_CHECKING
 858 static sparseset active_defs_check;
 859 #endif
 860
 861 /* Fill the ACTIVE_DEFS array with the use->def link for the registers
 862    mentioned in USE_REC.  Register the valid entries in ACTIVE_DEFS_CHECK
 863    too, for checking purposes.  */
 864
 865 static void
 866 register_active_defs (df_ref *use_rec)
 867 {
 868   while (*use_rec)
 869     {
 870       df_ref use = *use_rec++;
 871       df_ref def = get_def_for_use (use);
 872       int regno = DF_REF_REGNO (use);
 873
 874 #ifdef ENABLE_CHECKING
 875       sparseset_set_bit (active_defs_check, regno);
 876 #endif
 877       active_defs[regno] = def;
 878     }
 879 }
 880
 881
 882 /* Build the use->def links that we use to update the dataflow info
 883    for new uses.  Note that building the links is very cheap and if
 884    it were done earlier, they could be used to rule out invalid
 885    propagations (in addition to what is done in all_uses_available_at).
 886    I'm not doing this yet, though.  */
 887
 888 static void
 889 update_df_init (rtx def_insn, rtx insn)
 890 {
 891 #ifdef ENABLE_CHECKING
 892   sparseset_clear (active_defs_check);
 893 #endif
 894   register_active_defs (DF_INSN_USES (def_insn));
 895   register_active_defs (DF_INSN_USES (insn));
 896   register_active_defs (DF_INSN_EQ_USES (insn));
 897 }
 898
 899
 900 /* Update the USE_DEF_REF array for the given use, using the active definitions
 901    in the ACTIVE_DEFS array to match pseudos to their def. */
 902
 903 static inline void
 904 update_uses (df_ref *use_rec)
 905 {
 906   while (*use_rec)
 907     {
 908       df_ref use = *use_rec++;
 909       int regno = DF_REF_REGNO (use);
 910
 911       /* Set up the use-def chain.  */
 912       if (DF_REF_ID (use) >= (int) VEC_length (df_ref, use_def_ref))
 913         VEC_safe_grow_cleared (df_ref, heap, use_def_ref,
 914                                DF_REF_ID (use) + 1);
 915
 916 #ifdef ENABLE_CHECKING
 917       gcc_assert (sparseset_bit_p (active_defs_check, regno));
 918 #endif
 919       VEC_replace (df_ref, use_def_ref, DF_REF_ID (use), active_defs[regno]);
 920     }
 921 }
 922
 923
 924 /* Update the USE_DEF_REF array for the uses in INSN.  Only update note
 925    uses if NOTES_ONLY is true.  */
 926
 927 static void
 928 update_df (rtx insn, rtx note)
 929 {
 930   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
 931
 932   if (note)
 933     {
 934       df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE);
 935       df_notes_rescan (insn);
 936     }
 937   else
 938     {
 939       df_uses_create (&PATTERN (insn), insn, 0);
 940       df_insn_rescan (insn);
 941       update_uses (DF_INSN_INFO_USES (insn_info));
 942     }
 943
 944   update_uses (DF_INSN_INFO_EQ_USES (insn_info));
 945 }
 946
 947
 948 /* Try substituting NEW into LOC, which originated from forward propagation
 949    of USE's value from DEF_INSN.  SET_REG_EQUAL says whether we are
 950    substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
 951    new insn is not recognized.  Return whether the substitution was
 952    performed.  */
 953
 954 static bool
 955 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal)
 956 {
 957   rtx insn = DF_REF_INSN (use);
 958   rtx set = single_set (insn);
 959   rtx note = NULL_RTX;
 960   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
 961   int old_cost = 0;
 962   bool ok;
 963
 964   update_df_init (def_insn, insn);
 965
 966   /* forward_propagate_subreg may be operating on an instruction with
 967      multiple sets.  If so, assume the cost of the new instruction is
 968      not greater than the old one.  */
 969   if (set)
 970     old_cost = set_src_cost (SET_SRC (set), speed);
 971   if (dump_file)
 972     {
 973       fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
 974       print_inline_rtx (dump_file, *loc, 2);
 975       fprintf (dump_file, "\n with ");
 976       print_inline_rtx (dump_file, new_rtx, 2);
 977       fprintf (dump_file, "\n");
 978     }
 979
 980   validate_unshare_change (insn, loc, new_rtx, true);
 981   if (!verify_changes (0))
 982     {
 983       if (dump_file)
 984         fprintf (dump_file, "Changes to insn %d not recognized\n",
 985                  INSN_UID (insn));
 986       ok = false;
 987     }
 988
 989   else if (DF_REF_TYPE (use) == DF_REF_REG_USE
 990            && set
 991            && set_src_cost (SET_SRC (set), speed) > old_cost)
 992     {
 993       if (dump_file)
 994         fprintf (dump_file, "Changes to insn %d not profitable\n",
 995                  INSN_UID (insn));
 996       ok = false;
 997     }
 998
 999   else
1000     {
1001       if (dump_file)
1002         fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
1003       ok = true;
1004     }
1005
1006   if (ok)
1007     {
1008       confirm_change_group ();
1009       num_changes++;
1010     }
1011   else
1012     {
1013       cancel_changes (0);
1014
1015       /* Can also record a simplified value in a REG_EQUAL note,
1016          making a new one if one does not already exist.  */
1017       if (set_reg_equal)
1018         {
1019           if (dump_file)
1020             fprintf (dump_file, " Setting REG_EQUAL note\n");
1021
1022           note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
1023         }
1024     }
1025
1026   if ((ok || note) && !CONSTANT_P (new_rtx))
1027     update_df (insn, note);
1028
1029   return ok;
1030 }
1031
1032 /* For the given single_set INSN, containing SRC known to be a
1033    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
1034    is redundant due to the register being set by a LOAD_EXTEND_OP
1035    load from memory.  */
1036
1037 static bool
1038 free_load_extend (rtx src, rtx insn)
1039 {
1040   rtx reg;
1041   df_ref *use_vec;
1042   df_ref use = 0, def;
1043
1044   reg = XEXP (src, 0);
1045 #ifdef LOAD_EXTEND_OP
1046   if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src))
1047 #endif
1048     return false;
1049
1050   for (use_vec = DF_INSN_USES (insn); *use_vec; use_vec++)
1051     {
1052       use = *use_vec;
1053
1054       if (!DF_REF_IS_ARTIFICIAL (use)
1055           && DF_REF_TYPE (use) == DF_REF_REG_USE
1056           && DF_REF_REG (use) == reg)
1057         break;
1058     }
1059   if (!use)
1060     return false;
1061
1062   def = get_def_for_use (use);
1063   if (!def)
1064     return false;
1065
1066   if (DF_REF_IS_ARTIFICIAL (def))
1067     return false;
1068
1069   if (NONJUMP_INSN_P (DF_REF_INSN (def)))
1070     {
1071       rtx patt = PATTERN (DF_REF_INSN (def));
1072
1073       if (GET_CODE (patt) == SET
1074           && GET_CODE (SET_SRC (patt)) == MEM
1075           && rtx_equal_p (SET_DEST (patt), reg))
1076         return true;
1077     }
1078   return false;
1079 }
1080
1081 /* If USE is a subreg, see if it can be replaced by a pseudo.  */
1082
1083 static bool
1084 forward_propagate_subreg (df_ref use, rtx def_insn, rtx def_set)
1085 {
1086   rtx use_reg = DF_REF_REG (use);
1087   rtx use_insn, src;
1088
1089   /* Only consider subregs... */
1090   enum machine_mode use_mode = GET_MODE (use_reg);
1091   if (GET_CODE (use_reg) != SUBREG
1092       || !REG_P (SET_DEST (def_set)))
1093     return false;
1094
1095   /* If this is a paradoxical SUBREG...  */
1096   if (GET_MODE_SIZE (use_mode)
1097       > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
1098     {
1099       /* If this is a paradoxical SUBREG, we have no idea what value the
1100          extra bits would have.  However, if the operand is equivalent to
1101          a SUBREG whose operand is the same as our mode, and all the modes
1102          are within a word, we can just use the inner operand because
1103          these SUBREGs just say how to treat the register.  */
1104       use_insn = DF_REF_INSN (use);
1105       src = SET_SRC (def_set);
1106       if (GET_CODE (src) == SUBREG
1107           && REG_P (SUBREG_REG (src))
1108           && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
1109           && GET_MODE (SUBREG_REG (src)) == use_mode
1110           && subreg_lowpart_p (src)
1111           && all_uses_available_at (def_insn, use_insn))
1112         return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
1113                                  def_insn, false);
1114     }
1115
1116   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
1117      is the low part of the reg being extended then just use the inner
1118      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
1119      be removed due to it matching a LOAD_EXTEND_OP load from memory,
1120      or due to the operation being a no-op when applied to registers.
1121      For example, if we have:
1122
1123          A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
1124          B: (... (subreg:SI (reg:DI X)) ...)
1125
1126      and mode_rep_extended says that Y is already sign-extended,
1127      the backend will typically allow A to be combined with the
1128      definition of Y or, failing that, allow A to be deleted after
1129      reload through register tying.  Introducing more uses of Y
1130      prevents both optimisations.  */
1131   else if (subreg_lowpart_p (use_reg))
1132     {
1133       use_insn = DF_REF_INSN (use);
1134       src = SET_SRC (def_set);
1135       if ((GET_CODE (src) == ZERO_EXTEND
1136            || GET_CODE (src) == SIGN_EXTEND)
1137           && REG_P (XEXP (src, 0))
1138           && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
1139           && GET_MODE (XEXP (src, 0)) == use_mode
1140           && !free_load_extend (src, def_insn)
1141           && (targetm.mode_rep_extended (use_mode, GET_MODE (src))
1142               != (int) GET_CODE (src))
1143           && all_uses_available_at (def_insn, use_insn))
1144         return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
1145                                  def_insn, false);
1146     }
1147
1148   return false;
1149 }
1150
1151 /* Try to replace USE with SRC (defined in DEF_INSN) in __asm.  */
1152
1153 static bool
1154 forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg)
1155 {
1156   rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc;
1157   int speed_p, i;
1158   df_ref *use_vec;
1159
1160   gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
1161
1162   src = SET_SRC (def_set);
1163   use_pat = PATTERN (use_insn);
1164
1165   /* In __asm don't replace if src might need more registers than
1166      reg, as that could increase register pressure on the __asm.  */
1167   use_vec = DF_INSN_USES (def_insn);
1168   if (use_vec[0] && use_vec[1])
1169     return false;
1170
1171   update_df_init (def_insn, use_insn);
1172   speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
1173   asm_operands = NULL_RTX;
1174   switch (GET_CODE (use_pat))
1175     {
1176     case ASM_OPERANDS:
1177       asm_operands = use_pat;
1178       break;
1179     case SET:
1180       if (MEM_P (SET_DEST (use_pat)))
1181         {
1182           loc = &SET_DEST (use_pat);
1183           new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1184           if (new_rtx)
1185             validate_unshare_change (use_insn, loc, new_rtx, true);
1186         }
1187       asm_operands = SET_SRC (use_pat);
1188       break;
1189     case PARALLEL:
1190       for (i = 0; i < XVECLEN (use_pat, 0); i++)
1191         if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1192           {
1193             if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1194               {
1195                 loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1196                 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1197                                          src, speed_p);
1198                 if (new_rtx)
1199                   validate_unshare_change (use_insn, loc, new_rtx, true);
1200               }
1201             asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1202           }
1203         else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1204           asm_operands = XVECEXP (use_pat, 0, i);
1205       break;
1206     default:
1207       gcc_unreachable ();
1208     }
1209
1210   gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1211   for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1212     {
1213       loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1214       new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1215       if (new_rtx)
1216         validate_unshare_change (use_insn, loc, new_rtx, true);
1217     }
1218
1219   if (num_changes_pending () == 0 || !apply_change_group ())
1220     return false;
1221
1222   update_df (use_insn, NULL);
1223   num_changes++;
1224   return true;
1225 }
1226
1227 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1228    result.  */
1229
1230 static bool
1231 forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set)
1232 {
1233   rtx use_insn = DF_REF_INSN (use);
1234   rtx use_set = single_set (use_insn);
1235   rtx src, reg, new_rtx, *loc;
1236   bool set_reg_equal;
1237   enum machine_mode mode;
1238   int asm_use = -1;
1239
1240   if (INSN_CODE (use_insn) < 0)
1241     asm_use = asm_noperands (PATTERN (use_insn));
1242
1243   if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
1244     return false;
1245
1246   /* Do not propagate into PC, CC0, etc.  */
1247   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1248     return false;
1249
1250   /* If def and use are subreg, check if they match.  */
1251   reg = DF_REF_REG (use);
1252   if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG)
1253     {
1254       if (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg))
1255         return false;
1256     }
1257   /* Check if the def had a subreg, but the use has the whole reg.  */
1258   else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1259     return false;
1260   /* Check if the use has a subreg, but the def had the whole reg.  Unlike the
1261      previous case, the optimization is possible and often useful indeed.  */
1262   else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1263     reg = SUBREG_REG (reg);
1264
1265   /* Make sure that we can treat REG as having the same mode as the
1266      source of DEF_SET.  */
1267   if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))
1268     return false;
1269
1270   /* Check if the substitution is valid (last, because it's the most
1271      expensive check!).  */
1272   src = SET_SRC (def_set);
1273   if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1274     return false;
1275
1276   /* Check if the def is loading something from the constant pool; in this
1277      case we would undo optimization such as compress_float_constant.
1278      Still, we can set a REG_EQUAL note.  */
1279   if (MEM_P (src) && MEM_READONLY_P (src))
1280     {
1281       rtx x = avoid_constant_pool_reference (src);
1282       if (x != src && use_set)
1283         {
1284           rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1285           rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1286           rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1287           if (old_rtx != new_rtx)
1288             set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1289         }
1290       return false;
1291     }
1292
1293   if (asm_use >= 0)
1294     return forward_propagate_asm (use, def_insn, def_set, reg);
1295
1296   /* Else try simplifying.  */
1297
1298   if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1299     {
1300       loc = &SET_DEST (use_set);
1301       set_reg_equal = false;
1302     }
1303   else if (!use_set)
1304     {
1305       loc = &INSN_VAR_LOCATION_LOC (use_insn);
1306       set_reg_equal = false;
1307     }
1308   else
1309     {
1310       rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1311       if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1312         loc = &XEXP (note, 0);
1313       else
1314         loc = &SET_SRC (use_set);
1315
1316       /* Do not replace an existing REG_EQUAL note if the insn is not
1317          recognized.  Either we're already replacing in the note, or we'll
1318          separately try plugging the definition in the note and simplifying.
1319          And only install a REQ_EQUAL note when the destination is a REG,
1320          as the note would be invalid otherwise.  */
1321       set_reg_equal = (note == NULL_RTX && REG_P (SET_DEST (use_set)));
1322     }
1323
1324   if (GET_MODE (*loc) == VOIDmode)
1325     mode = GET_MODE (SET_DEST (use_set));
1326   else
1327     mode = GET_MODE (*loc);
1328
1329   new_rtx = propagate_rtx (*loc, mode, reg, src,
1330                            optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1331
1332   if (!new_rtx)
1333     return false;
1334
1335   return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1336 }
1337
1338
1339 /* Given a use USE of an insn, if it has a single reaching
1340    definition, try to forward propagate it into that insn.
1341    Return true if cfg cleanup will be needed.  */
1342
1343 static bool
1344 forward_propagate_into (df_ref use)
1345 {
1346   df_ref def;
1347   rtx def_insn, def_set, use_insn;
1348   rtx parent;
1349
1350   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1351     return false;
1352   if (DF_REF_IS_ARTIFICIAL (use))
1353     return false;
1354
1355   /* Only consider uses that have a single definition.  */
1356   def = get_def_for_use (use);
1357   if (!def)
1358     return false;
1359   if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1360     return false;
1361   if (DF_REF_IS_ARTIFICIAL (def))
1362     return false;
1363
1364   /* Do not propagate loop invariant definitions inside the loop.  */
1365   if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1366     return false;
1367
1368   /* Check if the use is still present in the insn!  */
1369   use_insn = DF_REF_INSN (use);
1370   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1371     parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1372   else
1373     parent = PATTERN (use_insn);
1374
1375   if (!reg_mentioned_p (DF_REF_REG (use), parent))
1376     return false;
1377
1378   def_insn = DF_REF_INSN (def);
1379   if (multiple_sets (def_insn))
1380     return false;
1381   def_set = single_set (def_insn);
1382   if (!def_set)
1383     return false;
1384
1385   /* Only try one kind of propagation.  If two are possible, we'll
1386      do it on the following iterations.  */
1387   if (forward_propagate_and_simplify (use, def_insn, def_set)
1388       || forward_propagate_subreg (use, def_insn, def_set))
1389     {
1390       if (cfun->can_throw_non_call_exceptions
1391           && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX)
1392           && purge_dead_edges (DF_REF_BB (use)))
1393         return true;
1394     }
1395   return false;
1396 }
1397
1398 \f
1399 static void
1400 fwprop_init (void)
1401 {
1402   num_changes = 0;
1403   calculate_dominance_info (CDI_DOMINATORS);
1404
1405   /* We do not always want to propagate into loops, so we have to find
1406      loops and be careful about them.  But we have to call flow_loops_find
1407      before df_analyze, because flow_loops_find may introduce new jump
1408      insns (sadly) if we are not working in cfglayout mode.  */
1409   loop_optimizer_init (0);
1410
1411   build_single_def_use_links ();
1412   df_set_flags (DF_DEFER_INSN_RESCAN);
1413
1414   active_defs = XNEWVEC (df_ref, max_reg_num ());
1415 #ifdef ENABLE_CHECKING
1416   active_defs_check = sparseset_alloc (max_reg_num ());
1417 #endif
1418 }
1419
1420 static void
1421 fwprop_done (void)
1422 {
1423   loop_optimizer_finalize ();
1424
1425   VEC_free (df_ref, heap, use_def_ref);
1426   free (active_defs);
1427 #ifdef ENABLE_CHECKING
1428   sparseset_free (active_defs_check);
1429 #endif
1430
1431   free_dominance_info (CDI_DOMINATORS);
1432   cleanup_cfg (0);
1433   delete_trivially_dead_insns (get_insns (), max_reg_num ());
1434
1435   if (dump_file)
1436     fprintf (dump_file,
1437              "\nNumber of successful forward propagations: %d\n\n",
1438              num_changes);
1439 }
1440
1441
1442 /* Main entry point.  */
1443
1444 static bool
1445 gate_fwprop (void)
1446 {
1447   return optimize > 0 && flag_forward_propagate;
1448 }
1449
1450 static unsigned int
1451 fwprop (void)
1452 {
1453   unsigned i;
1454   bool need_cleanup = false;
1455
1456   fwprop_init ();
1457
1458   /* Go through all the uses.  df_uses_create will create new ones at the
1459      end, and we'll go through them as well.
1460
1461      Do not forward propagate addresses into loops until after unrolling.
1462      CSE did so because it was able to fix its own mess, but we are not.  */
1463
1464   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1465     {
1466       df_ref use = DF_USES_GET (i);
1467       if (use)
1468         if (DF_REF_TYPE (use) == DF_REF_REG_USE
1469             || DF_REF_BB (use)->loop_father == NULL
1470             /* The outer most loop is not really a loop.  */
1471             || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1472           need_cleanup |= forward_propagate_into (use);
1473     }
1474
1475   fwprop_done ();
1476   if (need_cleanup)
1477     cleanup_cfg (0);
1478   return 0;
1479 }
1480
1481 struct rtl_opt_pass pass_rtl_fwprop =
1482 {
1483  {
1484   RTL_PASS,
1485   "fwprop1",                            /* name */
1486   gate_fwprop,                          /* gate */
1487   fwprop,                               /* execute */
1488   NULL,                                 /* sub */
1489   NULL,                                 /* next */
1490   0,                                    /* static_pass_number */
1491   TV_FWPROP,                            /* tv_id */
1492   0,                                    /* properties_required */
1493   0,                                    /* properties_provided */
1494   0,                                    /* properties_destroyed */
1495   0,                                    /* todo_flags_start */
1496   TODO_df_finish
1497     | TODO_verify_flow
1498     | TODO_verify_rtl_sharing           /* todo_flags_finish */
1499  }
1500 };
1501
1502 static unsigned int
1503 fwprop_addr (void)
1504 {
1505   unsigned i;
1506   bool need_cleanup = false;
1507
1508   fwprop_init ();
1509
1510   /* Go through all the uses.  df_uses_create will create new ones at the
1511      end, and we'll go through them as well.  */
1512   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1513     {
1514       df_ref use = DF_USES_GET (i);
1515       if (use)
1516         if (DF_REF_TYPE (use) != DF_REF_REG_USE
1517             && DF_REF_BB (use)->loop_father != NULL
1518             /* The outer most loop is not really a loop.  */
1519             && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1520           need_cleanup |= forward_propagate_into (use);
1521     }
1522
1523   fwprop_done ();
1524
1525   if (need_cleanup)
1526     cleanup_cfg (0);
1527   return 0;
1528 }
1529
1530 struct rtl_opt_pass pass_rtl_fwprop_addr =
1531 {
1532  {
1533   RTL_PASS,
1534   "fwprop2",                            /* name */
1535   gate_fwprop,                          /* gate */
1536   fwprop_addr,                          /* execute */
1537   NULL,                                 /* sub */
1538   NULL,                                 /* next */
1539   0,                                    /* static_pass_number */
1540   TV_FWPROP,                            /* tv_id */
1541   0,                                    /* properties_required */
1542   0,                                    /* properties_provided */
1543   0,                                    /* properties_destroyed */
1544   0,                                    /* todo_flags_start */
1545   TODO_df_finish | TODO_verify_rtl_sharing  /* todo_flags_finish */
1546  }
1547 };