gcc/fwprop.c

   1 /* RTL-based forward propagation pass for GNU compiler.
   2    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
   3    Free Software Foundation, Inc.
   4    Contributed by Paolo Bonzini and Steven Bosscher.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27
  28 #include "sparseset.h"
  29 #include "rtl.h"
  30 #include "tm_p.h"
  31 #include "insn-config.h"
  32 #include "recog.h"
  33 #include "flags.h"
  34 #include "obstack.h"
  35 #include "basic-block.h"
  36 #include "df.h"
  37 #include "target.h"
  38 #include "cfgloop.h"
  39 #include "tree-pass.h"
  40 #include "domwalk.h"
  41 #include "emit-rtl.h"
  42
  43
  44 /* This pass does simple forward propagation and simplification when an
  45    operand of an insn can only come from a single def.  This pass uses
  46    df.c, so it is global.  However, we only do limited analysis of
  47    available expressions.
  48
  49    1) The pass tries to propagate the source of the def into the use,
  50    and checks if the result is independent of the substituted value.
  51    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
  52    zero, independent of the source register.
  53
  54    In particular, we propagate constants into the use site.  Sometimes
  55    RTL expansion did not put the constant in the same insn on purpose,
  56    to satisfy a predicate, and the result will fail to be recognized;
  57    but this happens rarely and in this case we can still create a
  58    REG_EQUAL note.  For multi-word operations, this
  59
  60       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
  61       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
  62       (set (subreg:SI (reg:DI 122) 0)
  63          (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
  64       (set (subreg:SI (reg:DI 122) 4)
  65          (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
  66
  67    can be simplified to the much simpler
  68
  69       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
  70       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
  71
  72    This particular propagation is also effective at putting together
  73    complex addressing modes.  We are more aggressive inside MEMs, in
  74    that all definitions are propagated if the use is in a MEM; if the
  75    result is a valid memory address we check address_cost to decide
  76    whether the substitution is worthwhile.
  77
  78    2) The pass propagates register copies.  This is not as effective as
  79    the copy propagation done by CSE's canon_reg, which works by walking
  80    the instruction chain, it can help the other transformations.
  81
  82    We should consider removing this optimization, and instead reorder the
  83    RTL passes, because GCSE does this transformation too.  With some luck,
  84    the CSE pass at the end of rest_of_handle_gcse could also go away.
  85
  86    3) The pass looks for paradoxical subregs that are actually unnecessary.
  87    Things like this:
  88
  89      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
  90      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
  91      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
  92                                 (subreg:SI (reg:QI 121) 0)))
  93
  94    are very common on machines that can only do word-sized operations.
  95    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
  96    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
  97    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
  98    above will simplify this to
  99
 100      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
 101      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
 102      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
 103
 104    where the first two insns are now dead.
 105
 106    We used to use reaching definitions to find which uses have a
 107    single reaching definition (sounds obvious...), but this is too
 108    complex a problem in nasty testcases like PR33928.  Now we use the
 109    multiple definitions problem in df-problems.c.  The similarity
 110    between that problem and SSA form creation is taken further, in
 111    that fwprop does a dominator walk to create its chains; however,
 112    instead of creating a PHI function where multiple definitions meet
 113    I just punt and record only singleton use-def chains, which is
 114    all that is needed by fwprop.  */
 115
 116
 117 static int num_changes;
 118
 119 static vec<df_ref> use_def_ref;
 120 static vec<df_ref> reg_defs;
 121 static vec<df_ref> reg_defs_stack;
 122
 123 /* The MD bitmaps are trimmed to include only live registers to cut
 124    memory usage on testcases like insn-recog.c.  Track live registers
 125    in the basic block and do not perform forward propagation if the
 126    destination is a dead pseudo occurring in a note.  */
 127 static bitmap local_md;
 128 static bitmap local_lr;
 129
 130 /* Return the only def in USE's use-def chain, or NULL if there is
 131    more than one def in the chain.  */
 132
 133 static inline df_ref
 134 get_def_for_use (df_ref use)
 135 {
 136   return use_def_ref[DF_REF_ID (use)];
 137 }
 138
 139
 140 /* Update the reg_defs vector with non-partial definitions in DEF_REC.
 141    TOP_FLAG says which artificials uses should be used, when DEF_REC
 142    is an artificial def vector.  LOCAL_MD is modified as after a
 143    df_md_simulate_* function; we do more or less the same processing
 144    done there, so we do not use those functions.  */
 145
 146 #define DF_MD_GEN_FLAGS \
 147         (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
 148
 149 static void
 150 process_defs (df_ref *def_rec, int top_flag)
 151 {
 152   df_ref def;
 153   while ((def = *def_rec++) != NULL)
 154     {
 155       df_ref curr_def = reg_defs[DF_REF_REGNO (def)];
 156       unsigned int dregno;
 157
 158       if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
 159         continue;
 160
 161       dregno = DF_REF_REGNO (def);
 162       if (curr_def)
 163         reg_defs_stack.safe_push (curr_def);
 164       else
 165         {
 166           /* Do not store anything if "transitioning" from NULL to NULL.  But
 167              otherwise, push a special entry on the stack to tell the
 168              leave_block callback that the entry in reg_defs was NULL.  */
 169           if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 170             ;
 171           else
 172             reg_defs_stack.safe_push (def);
 173         }
 174
 175       if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 176         {
 177           bitmap_set_bit (local_md, dregno);
 178           reg_defs[dregno] = NULL;
 179         }
 180       else
 181         {
 182           bitmap_clear_bit (local_md, dregno);
 183           reg_defs[dregno] = def;
 184         }
 185     }
 186 }
 187
 188
 189 /* Fill the use_def_ref vector with values for the uses in USE_REC,
 190    taking reaching definitions info from LOCAL_MD and REG_DEFS.
 191    TOP_FLAG says which artificials uses should be used, when USE_REC
 192    is an artificial use vector.  */
 193
 194 static void
 195 process_uses (df_ref *use_rec, int top_flag)
 196 {
 197   df_ref use;
 198   while ((use = *use_rec++) != NULL)
 199     if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
 200       {
 201         unsigned int uregno = DF_REF_REGNO (use);
 202         if (reg_defs[uregno]
 203             && !bitmap_bit_p (local_md, uregno)
 204             && bitmap_bit_p (local_lr, uregno))
 205           use_def_ref[DF_REF_ID (use)] = reg_defs[uregno];
 206       }
 207 }
 208
 209
 210 static void
 211 single_def_use_enter_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 212                             basic_block bb)
 213 {
 214   int bb_index = bb->index;
 215   struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
 216   struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
 217   rtx insn;
 218
 219   bitmap_copy (local_md, &md_bb_info->in);
 220   bitmap_copy (local_lr, &lr_bb_info->in);
 221
 222   /* Push a marker for the leave_block callback.  */
 223   reg_defs_stack.safe_push (NULL);
 224
 225   process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
 226   process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
 227
 228   /* We don't call df_simulate_initialize_forwards, as it may overestimate
 229      the live registers if there are unused artificial defs.  We prefer
 230      liveness to be underestimated.  */
 231
 232   FOR_BB_INSNS (bb, insn)
 233     if (INSN_P (insn))
 234       {
 235         unsigned int uid = INSN_UID (insn);
 236         process_uses (DF_INSN_UID_USES (uid), 0);
 237         process_uses (DF_INSN_UID_EQ_USES (uid), 0);
 238         process_defs (DF_INSN_UID_DEFS (uid), 0);
 239         df_simulate_one_insn_forwards (bb, insn, local_lr);
 240       }
 241
 242   process_uses (df_get_artificial_uses (bb_index), 0);
 243   process_defs (df_get_artificial_defs (bb_index), 0);
 244 }
 245
 246 /* Pop the definitions created in this basic block when leaving its
 247    dominated parts.  */
 248
 249 static void
 250 single_def_use_leave_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 251                             basic_block bb ATTRIBUTE_UNUSED)
 252 {
 253   df_ref saved_def;
 254   while ((saved_def = reg_defs_stack.pop ()) != NULL)
 255     {
 256       unsigned int dregno = DF_REF_REGNO (saved_def);
 257
 258       /* See also process_defs.  */
 259       if (saved_def == reg_defs[dregno])
 260         reg_defs[dregno] = NULL;
 261       else
 262         reg_defs[dregno] = saved_def;
 263     }
 264 }
 265
 266
 267 /* Build a vector holding the reaching definitions of uses reached by a
 268    single dominating definition.  */
 269
 270 static void
 271 build_single_def_use_links (void)
 272 {
 273   struct dom_walk_data walk_data;
 274
 275   /* We use the multiple definitions problem to compute our restricted
 276      use-def chains.  */
 277   df_set_flags (DF_EQ_NOTES);
 278   df_md_add_problem ();
 279   df_note_add_problem ();
 280   df_analyze ();
 281   df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
 282
 283   use_def_ref.create (DF_USES_TABLE_SIZE ());
 284   use_def_ref.safe_grow_cleared (DF_USES_TABLE_SIZE ());
 285
 286   reg_defs.create (max_reg_num ());
 287   reg_defs.safe_grow_cleared (max_reg_num ());
 288
 289   reg_defs_stack.create (n_basic_blocks * 10);
 290   local_md = BITMAP_ALLOC (NULL);
 291   local_lr = BITMAP_ALLOC (NULL);
 292
 293   /* Walk the dominator tree looking for single reaching definitions
 294      dominating the uses.  This is similar to how SSA form is built.  */
 295   walk_data.dom_direction = CDI_DOMINATORS;
 296   walk_data.initialize_block_local_data = NULL;
 297   walk_data.before_dom_children = single_def_use_enter_block;
 298   walk_data.after_dom_children = single_def_use_leave_block;
 299
 300   init_walk_dominator_tree (&walk_data);
 301   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
 302   fini_walk_dominator_tree (&walk_data);
 303
 304   BITMAP_FREE (local_lr);
 305   BITMAP_FREE (local_md);
 306   reg_defs.release ();
 307   reg_defs_stack.release ();
 308 }
 309
 310 \f
 311 /* Do not try to replace constant addresses or addresses of local and
 312    argument slots.  These MEM expressions are made only once and inserted
 313    in many instructions, as well as being used to control symbol table
 314    output.  It is not safe to clobber them.
 315
 316    There are some uncommon cases where the address is already in a register
 317    for some reason, but we cannot take advantage of that because we have
 318    no easy way to unshare the MEM.  In addition, looking up all stack
 319    addresses is costly.  */
 320
 321 static bool
 322 can_simplify_addr (rtx addr)
 323 {
 324   rtx reg;
 325
 326   if (CONSTANT_ADDRESS_P (addr))
 327     return false;
 328
 329   if (GET_CODE (addr) == PLUS)
 330     reg = XEXP (addr, 0);
 331   else
 332     reg = addr;
 333
 334   return (!REG_P (reg)
 335           || (REGNO (reg) != FRAME_POINTER_REGNUM
 336               && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
 337               && REGNO (reg) != ARG_POINTER_REGNUM));
 338 }
 339
 340 /* Returns a canonical version of X for the address, from the point of view,
 341    that all multiplications are represented as MULT instead of the multiply
 342    by a power of 2 being represented as ASHIFT.
 343
 344    Every ASHIFT we find has been made by simplify_gen_binary and was not
 345    there before, so it is not shared.  So we can do this in place.  */
 346
 347 static void
 348 canonicalize_address (rtx x)
 349 {
 350   for (;;)
 351     switch (GET_CODE (x))
 352       {
 353       case ASHIFT:
 354         if (CONST_INT_P (XEXP (x, 1))
 355             && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
 356             && INTVAL (XEXP (x, 1)) >= 0)
 357           {
 358             HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
 359             PUT_CODE (x, MULT);
 360             XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
 361                                         GET_MODE (x));
 362           }
 363
 364         x = XEXP (x, 0);
 365         break;
 366
 367       case PLUS:
 368         if (GET_CODE (XEXP (x, 0)) == PLUS
 369             || GET_CODE (XEXP (x, 0)) == ASHIFT
 370             || GET_CODE (XEXP (x, 0)) == CONST)
 371           canonicalize_address (XEXP (x, 0));
 372
 373         x = XEXP (x, 1);
 374         break;
 375
 376       case CONST:
 377         x = XEXP (x, 0);
 378         break;
 379
 380       default:
 381         return;
 382       }
 383 }
 384
 385 /* OLD is a memory address.  Return whether it is good to use NEW instead,
 386    for a memory access in the given MODE.  */
 387
 388 static bool
 389 should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode,
 390                         addr_space_t as, bool speed)
 391 {
 392   int gain;
 393
 394   if (rtx_equal_p (old_rtx, new_rtx)
 395       || !memory_address_addr_space_p (mode, new_rtx, as))
 396     return false;
 397
 398   /* Copy propagation is always ok.  */
 399   if (REG_P (old_rtx) && REG_P (new_rtx))
 400     return true;
 401
 402   /* Prefer the new address if it is less expensive.  */
 403   gain = (address_cost (old_rtx, mode, as, speed)
 404           - address_cost (new_rtx, mode, as, speed));
 405
 406   /* If the addresses have equivalent cost, prefer the new address
 407      if it has the highest `set_src_cost'.  That has the potential of
 408      eliminating the most insns without additional costs, and it
 409      is the same that cse.c used to do.  */
 410   if (gain == 0)
 411     gain = set_src_cost (new_rtx, speed) - set_src_cost (old_rtx, speed);
 412
 413   return (gain > 0);
 414 }
 415
 416
 417 /* Flags for the last parameter of propagate_rtx_1.  */
 418
 419 enum {
 420   /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
 421      if it is false, propagate_rtx_1 returns false if, for at least
 422      one occurrence OLD, it failed to collapse the result to a constant.
 423      For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
 424      collapse to zero if replacing (reg:M B) with (reg:M A).
 425
 426      PR_CAN_APPEAR is disregarded inside MEMs: in that case,
 427      propagate_rtx_1 just tries to make cheaper and valid memory
 428      addresses.  */
 429   PR_CAN_APPEAR = 1,
 430
 431   /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
 432      outside memory addresses.  This is needed because propagate_rtx_1 does
 433      not do any analysis on memory; thus it is very conservative and in general
 434      it will fail if non-read-only MEMs are found in the source expression.
 435
 436      PR_HANDLE_MEM is set when the source of the propagation was not
 437      another MEM.  Then, it is safe not to treat non-read-only MEMs as
 438      ``opaque'' objects.  */
 439   PR_HANDLE_MEM = 2,
 440
 441   /* Set when costs should be optimized for speed.  */
 442   PR_OPTIMIZE_FOR_SPEED = 4
 443 };
 444
 445
 446 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
 447    resulting expression.  Replace *PX with a new RTL expression if an
 448    occurrence of OLD was found.
 449
 450    This is only a wrapper around simplify-rtx.c: do not add any pattern
 451    matching code here.  (The sole exception is the handling of LO_SUM, but
 452    that is because there is no simplify_gen_* function for LO_SUM).  */
 453
 454 static bool
 455 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 456 {
 457   rtx x = *px, tem = NULL_RTX, op0, op1, op2;
 458   enum rtx_code code = GET_CODE (x);
 459   enum machine_mode mode = GET_MODE (x);
 460   enum machine_mode op_mode;
 461   bool can_appear = (flags & PR_CAN_APPEAR) != 0;
 462   bool valid_ops = true;
 463
 464   if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
 465     {
 466       /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
 467          they have side effects or not).  */
 468       *px = (side_effects_p (x)
 469              ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
 470              : gen_rtx_SCRATCH (GET_MODE (x)));
 471       return false;
 472     }
 473
 474   /* If X is OLD_RTX, return NEW_RTX.  But not if replacing only within an
 475      address, and we are *not* inside one.  */
 476   if (x == old_rtx)
 477     {
 478       *px = new_rtx;
 479       return can_appear;
 480     }
 481
 482   /* If this is an expression, try recursive substitution.  */
 483   switch (GET_RTX_CLASS (code))
 484     {
 485     case RTX_UNARY:
 486       op0 = XEXP (x, 0);
 487       op_mode = GET_MODE (op0);
 488       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 489       if (op0 == XEXP (x, 0))
 490         return true;
 491       tem = simplify_gen_unary (code, mode, op0, op_mode);
 492       break;
 493
 494     case RTX_BIN_ARITH:
 495     case RTX_COMM_ARITH:
 496       op0 = XEXP (x, 0);
 497       op1 = XEXP (x, 1);
 498       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 499       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 500       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 501         return true;
 502       tem = simplify_gen_binary (code, mode, op0, op1);
 503       break;
 504
 505     case RTX_COMPARE:
 506     case RTX_COMM_COMPARE:
 507       op0 = XEXP (x, 0);
 508       op1 = XEXP (x, 1);
 509       op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
 510       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 511       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 512       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 513         return true;
 514       tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
 515       break;
 516
 517     case RTX_TERNARY:
 518     case RTX_BITFIELD_OPS:
 519       op0 = XEXP (x, 0);
 520       op1 = XEXP (x, 1);
 521       op2 = XEXP (x, 2);
 522       op_mode = GET_MODE (op0);
 523       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 524       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 525       valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
 526       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
 527         return true;
 528       if (op_mode == VOIDmode)
 529         op_mode = GET_MODE (op0);
 530       tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
 531       break;
 532
 533     case RTX_EXTRA:
 534       /* The only case we try to handle is a SUBREG.  */
 535       if (code == SUBREG)
 536         {
 537           op0 = XEXP (x, 0);
 538           valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 539           if (op0 == XEXP (x, 0))
 540             return true;
 541           tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
 542                                      SUBREG_BYTE (x));
 543         }
 544       break;
 545
 546     case RTX_OBJ:
 547       if (code == MEM && x != new_rtx)
 548         {
 549           rtx new_op0;
 550           op0 = XEXP (x, 0);
 551
 552           /* There are some addresses that we cannot work on.  */
 553           if (!can_simplify_addr (op0))
 554             return true;
 555
 556           op0 = new_op0 = targetm.delegitimize_address (op0);
 557           valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
 558                                         flags | PR_CAN_APPEAR);
 559
 560           /* Dismiss transformation that we do not want to carry on.  */
 561           if (!valid_ops
 562               || new_op0 == op0
 563               || !(GET_MODE (new_op0) == GET_MODE (op0)
 564                    || GET_MODE (new_op0) == VOIDmode))
 565             return true;
 566
 567           canonicalize_address (new_op0);
 568
 569           /* Copy propagations are always ok.  Otherwise check the costs.  */
 570           if (!(REG_P (old_rtx) && REG_P (new_rtx))
 571               && !should_replace_address (op0, new_op0, GET_MODE (x),
 572                                           MEM_ADDR_SPACE (x),
 573                                           flags & PR_OPTIMIZE_FOR_SPEED))
 574             return true;
 575
 576           tem = replace_equiv_address_nv (x, new_op0);
 577         }
 578
 579       else if (code == LO_SUM)
 580         {
 581           op0 = XEXP (x, 0);
 582           op1 = XEXP (x, 1);
 583
 584           /* The only simplification we do attempts to remove references to op0
 585              or make it constant -- in both cases, op0's invalidity will not
 586              make the result invalid.  */
 587           propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
 588           valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 589           if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 590             return true;
 591
 592           /* (lo_sum (high x) x) -> x  */
 593           if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
 594             tem = op1;
 595           else
 596             tem = gen_rtx_LO_SUM (mode, op0, op1);
 597
 598           /* OP1 is likely not a legitimate address, otherwise there would have
 599              been no LO_SUM.  We want it to disappear if it is invalid, return
 600              false in that case.  */
 601           return memory_address_p (mode, tem);
 602         }
 603
 604       else if (code == REG)
 605         {
 606           if (rtx_equal_p (x, old_rtx))
 607             {
 608               *px = new_rtx;
 609               return can_appear;
 610             }
 611         }
 612       break;
 613
 614     default:
 615       break;
 616     }
 617
 618   /* No change, no trouble.  */
 619   if (tem == NULL_RTX)
 620     return true;
 621
 622   *px = tem;
 623
 624   /* The replacement we made so far is valid, if all of the recursive
 625      replacements were valid, or we could simplify everything to
 626      a constant.  */
 627   return valid_ops || can_appear || CONSTANT_P (tem);
 628 }
 629
 630
 631 /* for_each_rtx traversal function that returns 1 if BODY points to
 632    a non-constant mem.  */
 633
 634 static int
 635 varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED)
 636 {
 637   rtx x = *body;
 638   return MEM_P (x) && !MEM_READONLY_P (x);
 639 }
 640
 641
 642 /* Replace all occurrences of OLD in X with NEW and try to simplify the
 643    resulting expression (in mode MODE).  Return a new expression if it is
 644    a constant, otherwise X.
 645
 646    Simplifications where occurrences of NEW collapse to a constant are always
 647    accepted.  All simplifications are accepted if NEW is a pseudo too.
 648    Otherwise, we accept simplifications that have a lower or equal cost.  */
 649
 650 static rtx
 651 propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx,
 652                bool speed)
 653 {
 654   rtx tem;
 655   bool collapsed;
 656   int flags;
 657
 658   if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
 659     return NULL_RTX;
 660
 661   flags = 0;
 662   if (REG_P (new_rtx)
 663       || CONSTANT_P (new_rtx)
 664       || (GET_CODE (new_rtx) == SUBREG
 665           && REG_P (SUBREG_REG (new_rtx))
 666           && (GET_MODE_SIZE (mode)
 667               <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (new_rtx))))))
 668     flags |= PR_CAN_APPEAR;
 669   if (!for_each_rtx (&new_rtx, varying_mem_p, NULL))
 670     flags |= PR_HANDLE_MEM;
 671
 672   if (speed)
 673     flags |= PR_OPTIMIZE_FOR_SPEED;
 674
 675   tem = x;
 676   collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
 677   if (tem == x || !collapsed)
 678     return NULL_RTX;
 679
 680   /* gen_lowpart_common will not be able to process VOIDmode entities other
 681      than CONST_INTs.  */
 682   if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
 683     return NULL_RTX;
 684
 685   if (GET_MODE (tem) == VOIDmode)
 686     tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
 687   else
 688     gcc_assert (GET_MODE (tem) == mode);
 689
 690   return tem;
 691 }
 692
 693
 694 \f
 695
 696 /* Return true if the register from reference REF is killed
 697    between FROM to (but not including) TO.  */
 698
 699 static bool
 700 local_ref_killed_between_p (df_ref ref, rtx from, rtx to)
 701 {
 702   rtx insn;
 703
 704   for (insn = from; insn != to; insn = NEXT_INSN (insn))
 705     {
 706       df_ref *def_rec;
 707       if (!INSN_P (insn))
 708         continue;
 709
 710       for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
 711         {
 712           df_ref def = *def_rec;
 713           if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
 714             return true;
 715         }
 716     }
 717   return false;
 718 }
 719
 720
 721 /* Check if the given DEF is available in INSN.  This would require full
 722    computation of available expressions; we check only restricted conditions:
 723    - if DEF is the sole definition of its register, go ahead;
 724    - in the same basic block, we check for no definitions killing the
 725      definition of DEF_INSN;
 726    - if USE's basic block has DEF's basic block as the sole predecessor,
 727      we check if the definition is killed after DEF_INSN or before
 728      TARGET_INSN insn, in their respective basic blocks.  */
 729 static bool
 730 use_killed_between (df_ref use, rtx def_insn, rtx target_insn)
 731 {
 732   basic_block def_bb = BLOCK_FOR_INSN (def_insn);
 733   basic_block target_bb = BLOCK_FOR_INSN (target_insn);
 734   int regno;
 735   df_ref def;
 736
 737   /* We used to have a def reaching a use that is _before_ the def,
 738      with the def not dominating the use even though the use and def
 739      are in the same basic block, when a register may be used
 740      uninitialized in a loop.  This should not happen anymore since
 741      we do not use reaching definitions, but still we test for such
 742      cases and assume that DEF is not available.  */
 743   if (def_bb == target_bb
 744       ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
 745       : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
 746     return true;
 747
 748   /* Check if the reg in USE has only one definition.  We already
 749      know that this definition reaches use, or we wouldn't be here.
 750      However, this is invalid for hard registers because if they are
 751      live at the beginning of the function it does not mean that we
 752      have an uninitialized access.  */
 753   regno = DF_REF_REGNO (use);
 754   def = DF_REG_DEF_CHAIN (regno);
 755   if (def
 756       && DF_REF_NEXT_REG (def) == NULL
 757       && regno >= FIRST_PSEUDO_REGISTER)
 758     return false;
 759
 760   /* Check locally if we are in the same basic block.  */
 761   if (def_bb == target_bb)
 762     return local_ref_killed_between_p (use, def_insn, target_insn);
 763
 764   /* Finally, if DEF_BB is the sole predecessor of TARGET_BB.  */
 765   if (single_pred_p (target_bb)
 766       && single_pred (target_bb) == def_bb)
 767     {
 768       df_ref x;
 769
 770       /* See if USE is killed between DEF_INSN and the last insn in the
 771          basic block containing DEF_INSN.  */
 772       x = df_bb_regno_last_def_find (def_bb, regno);
 773       if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
 774         return true;
 775
 776       /* See if USE is killed between TARGET_INSN and the first insn in the
 777          basic block containing TARGET_INSN.  */
 778       x = df_bb_regno_first_def_find (target_bb, regno);
 779       if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
 780         return true;
 781
 782       return false;
 783     }
 784
 785   /* Otherwise assume the worst case.  */
 786   return true;
 787 }
 788
 789
 790 /* Check if all uses in DEF_INSN can be used in TARGET_INSN.  This
 791    would require full computation of available expressions;
 792    we check only restricted conditions, see use_killed_between.  */
 793 static bool
 794 all_uses_available_at (rtx def_insn, rtx target_insn)
 795 {
 796   df_ref *use_rec;
 797   struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
 798   rtx def_set = single_set (def_insn);
 799   rtx next;
 800
 801   gcc_assert (def_set);
 802
 803   /* If target_insn comes right after def_insn, which is very common
 804      for addresses, we can use a quicker test.  Ignore debug insns
 805      other than target insns for this.  */
 806   next = NEXT_INSN (def_insn);
 807   while (next && next != target_insn && DEBUG_INSN_P (next))
 808     next = NEXT_INSN (next);
 809   if (next == target_insn && REG_P (SET_DEST (def_set)))
 810     {
 811       rtx def_reg = SET_DEST (def_set);
 812
 813       /* If the insn uses the reg that it defines, the substitution is
 814          invalid.  */
 815       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 816         {
 817           df_ref use = *use_rec;
 818           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 819             return false;
 820         }
 821       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 822         {
 823           df_ref use = *use_rec;
 824           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 825             return false;
 826         }
 827     }
 828   else
 829     {
 830       rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
 831
 832       /* Look at all the uses of DEF_INSN, and see if they are not
 833          killed between DEF_INSN and TARGET_INSN.  */
 834       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 835         {
 836           df_ref use = *use_rec;
 837           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 838             return false;
 839           if (use_killed_between (use, def_insn, target_insn))
 840             return false;
 841         }
 842       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 843         {
 844           df_ref use = *use_rec;
 845           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 846             return false;
 847           if (use_killed_between (use, def_insn, target_insn))
 848             return false;
 849         }
 850     }
 851
 852   return true;
 853 }
 854
 855 \f
 856 static df_ref *active_defs;
 857 #ifdef ENABLE_CHECKING
 858 static sparseset active_defs_check;
 859 #endif
 860
 861 /* Fill the ACTIVE_DEFS array with the use->def link for the registers
 862    mentioned in USE_REC.  Register the valid entries in ACTIVE_DEFS_CHECK
 863    too, for checking purposes.  */
 864
 865 static void
 866 register_active_defs (df_ref *use_rec)
 867 {
 868   while (*use_rec)
 869     {
 870       df_ref use = *use_rec++;
 871       df_ref def = get_def_for_use (use);
 872       int regno = DF_REF_REGNO (use);
 873
 874 #ifdef ENABLE_CHECKING
 875       sparseset_set_bit (active_defs_check, regno);
 876 #endif
 877       active_defs[regno] = def;
 878     }
 879 }
 880
 881
 882 /* Build the use->def links that we use to update the dataflow info
 883    for new uses.  Note that building the links is very cheap and if
 884    it were done earlier, they could be used to rule out invalid
 885    propagations (in addition to what is done in all_uses_available_at).
 886    I'm not doing this yet, though.  */
 887
 888 static void
 889 update_df_init (rtx def_insn, rtx insn)
 890 {
 891 #ifdef ENABLE_CHECKING
 892   sparseset_clear (active_defs_check);
 893 #endif
 894   register_active_defs (DF_INSN_USES (def_insn));
 895   register_active_defs (DF_INSN_USES (insn));
 896   register_active_defs (DF_INSN_EQ_USES (insn));
 897 }
 898
 899
 900 /* Update the USE_DEF_REF array for the given use, using the active definitions
 901    in the ACTIVE_DEFS array to match pseudos to their def. */
 902
 903 static inline void
 904 update_uses (df_ref *use_rec)
 905 {
 906   while (*use_rec)
 907     {
 908       df_ref use = *use_rec++;
 909       int regno = DF_REF_REGNO (use);
 910
 911       /* Set up the use-def chain.  */
 912       if (DF_REF_ID (use) >= (int) use_def_ref.length ())
 913         use_def_ref.safe_grow_cleared (DF_REF_ID (use) + 1);
 914
 915 #ifdef ENABLE_CHECKING
 916       gcc_assert (sparseset_bit_p (active_defs_check, regno));
 917 #endif
 918       use_def_ref[DF_REF_ID (use)] = active_defs[regno];
 919     }
 920 }
 921
 922
 923 /* Update the USE_DEF_REF array for the uses in INSN.  Only update note
 924    uses if NOTES_ONLY is true.  */
 925
 926 static void
 927 update_df (rtx insn, rtx note)
 928 {
 929   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
 930
 931   if (note)
 932     {
 933       df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE);
 934       df_notes_rescan (insn);
 935     }
 936   else
 937     {
 938       df_uses_create (&PATTERN (insn), insn, 0);
 939       df_insn_rescan (insn);
 940       update_uses (DF_INSN_INFO_USES (insn_info));
 941     }
 942
 943   update_uses (DF_INSN_INFO_EQ_USES (insn_info));
 944 }
 945
 946
 947 /* Try substituting NEW into LOC, which originated from forward propagation
 948    of USE's value from DEF_INSN.  SET_REG_EQUAL says whether we are
 949    substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
 950    new insn is not recognized.  Return whether the substitution was
 951    performed.  */
 952
 953 static bool
 954 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal)
 955 {
 956   rtx insn = DF_REF_INSN (use);
 957   rtx set = single_set (insn);
 958   rtx note = NULL_RTX;
 959   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
 960   int old_cost = 0;
 961   bool ok;
 962
 963   update_df_init (def_insn, insn);
 964
 965   /* forward_propagate_subreg may be operating on an instruction with
 966      multiple sets.  If so, assume the cost of the new instruction is
 967      not greater than the old one.  */
 968   if (set)
 969     old_cost = set_src_cost (SET_SRC (set), speed);
 970   if (dump_file)
 971     {
 972       fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
 973       print_inline_rtx (dump_file, *loc, 2);
 974       fprintf (dump_file, "\n with ");
 975       print_inline_rtx (dump_file, new_rtx, 2);
 976       fprintf (dump_file, "\n");
 977     }
 978
 979   validate_unshare_change (insn, loc, new_rtx, true);
 980   if (!verify_changes (0))
 981     {
 982       if (dump_file)
 983         fprintf (dump_file, "Changes to insn %d not recognized\n",
 984                  INSN_UID (insn));
 985       ok = false;
 986     }
 987
 988   else if (DF_REF_TYPE (use) == DF_REF_REG_USE
 989            && set
 990            && set_src_cost (SET_SRC (set), speed) > old_cost)
 991     {
 992       if (dump_file)
 993         fprintf (dump_file, "Changes to insn %d not profitable\n",
 994                  INSN_UID (insn));
 995       ok = false;
 996     }
 997
 998   else
 999     {
1000       if (dump_file)
1001         fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
1002       ok = true;
1003     }
1004
1005   if (ok)
1006     {
1007       confirm_change_group ();
1008       num_changes++;
1009     }
1010   else
1011     {
1012       cancel_changes (0);
1013
1014       /* Can also record a simplified value in a REG_EQUAL note,
1015          making a new one if one does not already exist.  */
1016       if (set_reg_equal)
1017         {
1018           if (dump_file)
1019             fprintf (dump_file, " Setting REG_EQUAL note\n");
1020
1021           note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
1022         }
1023     }
1024
1025   if ((ok || note) && !CONSTANT_P (new_rtx))
1026     update_df (insn, note);
1027
1028   return ok;
1029 }
1030
1031 /* For the given single_set INSN, containing SRC known to be a
1032    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
1033    is redundant due to the register being set by a LOAD_EXTEND_OP
1034    load from memory.  */
1035
1036 static bool
1037 free_load_extend (rtx src, rtx insn)
1038 {
1039   rtx reg;
1040   df_ref *use_vec;
1041   df_ref use = 0, def;
1042
1043   reg = XEXP (src, 0);
1044 #ifdef LOAD_EXTEND_OP
1045   if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src))
1046 #endif
1047     return false;
1048
1049   for (use_vec = DF_INSN_USES (insn); *use_vec; use_vec++)
1050     {
1051       use = *use_vec;
1052
1053       if (!DF_REF_IS_ARTIFICIAL (use)
1054           && DF_REF_TYPE (use) == DF_REF_REG_USE
1055           && DF_REF_REG (use) == reg)
1056         break;
1057     }
1058   if (!use)
1059     return false;
1060
1061   def = get_def_for_use (use);
1062   if (!def)
1063     return false;
1064
1065   if (DF_REF_IS_ARTIFICIAL (def))
1066     return false;
1067
1068   if (NONJUMP_INSN_P (DF_REF_INSN (def)))
1069     {
1070       rtx patt = PATTERN (DF_REF_INSN (def));
1071
1072       if (GET_CODE (patt) == SET
1073           && GET_CODE (SET_SRC (patt)) == MEM
1074           && rtx_equal_p (SET_DEST (patt), reg))
1075         return true;
1076     }
1077   return false;
1078 }
1079
1080 /* If USE is a subreg, see if it can be replaced by a pseudo.  */
1081
1082 static bool
1083 forward_propagate_subreg (df_ref use, rtx def_insn, rtx def_set)
1084 {
1085   rtx use_reg = DF_REF_REG (use);
1086   rtx use_insn, src;
1087
1088   /* Only consider subregs... */
1089   enum machine_mode use_mode = GET_MODE (use_reg);
1090   if (GET_CODE (use_reg) != SUBREG
1091       || !REG_P (SET_DEST (def_set)))
1092     return false;
1093
1094   /* If this is a paradoxical SUBREG...  */
1095   if (GET_MODE_SIZE (use_mode)
1096       > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
1097     {
1098       /* If this is a paradoxical SUBREG, we have no idea what value the
1099          extra bits would have.  However, if the operand is equivalent to
1100          a SUBREG whose operand is the same as our mode, and all the modes
1101          are within a word, we can just use the inner operand because
1102          these SUBREGs just say how to treat the register.  */
1103       use_insn = DF_REF_INSN (use);
1104       src = SET_SRC (def_set);
1105       if (GET_CODE (src) == SUBREG
1106           && REG_P (SUBREG_REG (src))
1107           && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
1108           && GET_MODE (SUBREG_REG (src)) == use_mode
1109           && subreg_lowpart_p (src)
1110           && all_uses_available_at (def_insn, use_insn))
1111         return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
1112                                  def_insn, false);
1113     }
1114
1115   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
1116      is the low part of the reg being extended then just use the inner
1117      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
1118      be removed due to it matching a LOAD_EXTEND_OP load from memory,
1119      or due to the operation being a no-op when applied to registers.
1120      For example, if we have:
1121
1122          A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
1123          B: (... (subreg:SI (reg:DI X)) ...)
1124
1125      and mode_rep_extended says that Y is already sign-extended,
1126      the backend will typically allow A to be combined with the
1127      definition of Y or, failing that, allow A to be deleted after
1128      reload through register tying.  Introducing more uses of Y
1129      prevents both optimisations.  */
1130   else if (subreg_lowpart_p (use_reg))
1131     {
1132       use_insn = DF_REF_INSN (use);
1133       src = SET_SRC (def_set);
1134       if ((GET_CODE (src) == ZERO_EXTEND
1135            || GET_CODE (src) == SIGN_EXTEND)
1136           && REG_P (XEXP (src, 0))
1137           && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
1138           && GET_MODE (XEXP (src, 0)) == use_mode
1139           && !free_load_extend (src, def_insn)
1140           && (targetm.mode_rep_extended (use_mode, GET_MODE (src))
1141               != (int) GET_CODE (src))
1142           && all_uses_available_at (def_insn, use_insn))
1143         return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
1144                                  def_insn, false);
1145     }
1146
1147   return false;
1148 }
1149
1150 /* Try to replace USE with SRC (defined in DEF_INSN) in __asm.  */
1151
1152 static bool
1153 forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg)
1154 {
1155   rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc;
1156   int speed_p, i;
1157   df_ref *use_vec;
1158
1159   gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
1160
1161   src = SET_SRC (def_set);
1162   use_pat = PATTERN (use_insn);
1163
1164   /* In __asm don't replace if src might need more registers than
1165      reg, as that could increase register pressure on the __asm.  */
1166   use_vec = DF_INSN_USES (def_insn);
1167   if (use_vec[0] && use_vec[1])
1168     return false;
1169
1170   update_df_init (def_insn, use_insn);
1171   speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
1172   asm_operands = NULL_RTX;
1173   switch (GET_CODE (use_pat))
1174     {
1175     case ASM_OPERANDS:
1176       asm_operands = use_pat;
1177       break;
1178     case SET:
1179       if (MEM_P (SET_DEST (use_pat)))
1180         {
1181           loc = &SET_DEST (use_pat);
1182           new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1183           if (new_rtx)
1184             validate_unshare_change (use_insn, loc, new_rtx, true);
1185         }
1186       asm_operands = SET_SRC (use_pat);
1187       break;
1188     case PARALLEL:
1189       for (i = 0; i < XVECLEN (use_pat, 0); i++)
1190         if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1191           {
1192             if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1193               {
1194                 loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1195                 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1196                                          src, speed_p);
1197                 if (new_rtx)
1198                   validate_unshare_change (use_insn, loc, new_rtx, true);
1199               }
1200             asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1201           }
1202         else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1203           asm_operands = XVECEXP (use_pat, 0, i);
1204       break;
1205     default:
1206       gcc_unreachable ();
1207     }
1208
1209   gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1210   for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1211     {
1212       loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1213       new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1214       if (new_rtx)
1215         validate_unshare_change (use_insn, loc, new_rtx, true);
1216     }
1217
1218   if (num_changes_pending () == 0 || !apply_change_group ())
1219     return false;
1220
1221   update_df (use_insn, NULL);
1222   num_changes++;
1223   return true;
1224 }
1225
1226 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1227    result.  */
1228
1229 static bool
1230 forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set)
1231 {
1232   rtx use_insn = DF_REF_INSN (use);
1233   rtx use_set = single_set (use_insn);
1234   rtx src, reg, new_rtx, *loc;
1235   bool set_reg_equal;
1236   enum machine_mode mode;
1237   int asm_use = -1;
1238
1239   if (INSN_CODE (use_insn) < 0)
1240     asm_use = asm_noperands (PATTERN (use_insn));
1241
1242   if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
1243     return false;
1244
1245   /* Do not propagate into PC, CC0, etc.  */
1246   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1247     return false;
1248
1249   /* If def and use are subreg, check if they match.  */
1250   reg = DF_REF_REG (use);
1251   if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG)
1252     {
1253       if (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg))
1254         return false;
1255     }
1256   /* Check if the def had a subreg, but the use has the whole reg.  */
1257   else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1258     return false;
1259   /* Check if the use has a subreg, but the def had the whole reg.  Unlike the
1260      previous case, the optimization is possible and often useful indeed.  */
1261   else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1262     reg = SUBREG_REG (reg);
1263
1264   /* Make sure that we can treat REG as having the same mode as the
1265      source of DEF_SET.  */
1266   if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))
1267     return false;
1268
1269   /* Check if the substitution is valid (last, because it's the most
1270      expensive check!).  */
1271   src = SET_SRC (def_set);
1272   if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1273     return false;
1274
1275   /* Check if the def is loading something from the constant pool; in this
1276      case we would undo optimization such as compress_float_constant.
1277      Still, we can set a REG_EQUAL note.  */
1278   if (MEM_P (src) && MEM_READONLY_P (src))
1279     {
1280       rtx x = avoid_constant_pool_reference (src);
1281       if (x != src && use_set)
1282         {
1283           rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1284           rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1285           rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1286           if (old_rtx != new_rtx)
1287             set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1288         }
1289       return false;
1290     }
1291
1292   if (asm_use >= 0)
1293     return forward_propagate_asm (use, def_insn, def_set, reg);
1294
1295   /* Else try simplifying.  */
1296
1297   if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1298     {
1299       loc = &SET_DEST (use_set);
1300       set_reg_equal = false;
1301     }
1302   else if (!use_set)
1303     {
1304       loc = &INSN_VAR_LOCATION_LOC (use_insn);
1305       set_reg_equal = false;
1306     }
1307   else
1308     {
1309       rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1310       if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1311         loc = &XEXP (note, 0);
1312       else
1313         loc = &SET_SRC (use_set);
1314
1315       /* Do not replace an existing REG_EQUAL note if the insn is not
1316          recognized.  Either we're already replacing in the note, or we'll
1317          separately try plugging the definition in the note and simplifying.
1318          And only install a REQ_EQUAL note when the destination is a REG,
1319          as the note would be invalid otherwise.  */
1320       set_reg_equal = (note == NULL_RTX && REG_P (SET_DEST (use_set)));
1321     }
1322
1323   if (GET_MODE (*loc) == VOIDmode)
1324     mode = GET_MODE (SET_DEST (use_set));
1325   else
1326     mode = GET_MODE (*loc);
1327
1328   new_rtx = propagate_rtx (*loc, mode, reg, src,
1329                            optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1330
1331   if (!new_rtx)
1332     return false;
1333
1334   return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1335 }
1336
1337
1338 /* Given a use USE of an insn, if it has a single reaching
1339    definition, try to forward propagate it into that insn.
1340    Return true if cfg cleanup will be needed.  */
1341
1342 static bool
1343 forward_propagate_into (df_ref use)
1344 {
1345   df_ref def;
1346   rtx def_insn, def_set, use_insn;
1347   rtx parent;
1348
1349   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1350     return false;
1351   if (DF_REF_IS_ARTIFICIAL (use))
1352     return false;
1353
1354   /* Only consider uses that have a single definition.  */
1355   def = get_def_for_use (use);
1356   if (!def)
1357     return false;
1358   if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1359     return false;
1360   if (DF_REF_IS_ARTIFICIAL (def))
1361     return false;
1362
1363   /* Do not propagate loop invariant definitions inside the loop.  */
1364   if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1365     return false;
1366
1367   /* Check if the use is still present in the insn!  */
1368   use_insn = DF_REF_INSN (use);
1369   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1370     parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1371   else
1372     parent = PATTERN (use_insn);
1373
1374   if (!reg_mentioned_p (DF_REF_REG (use), parent))
1375     return false;
1376
1377   def_insn = DF_REF_INSN (def);
1378   if (multiple_sets (def_insn))
1379     return false;
1380   def_set = single_set (def_insn);
1381   if (!def_set)
1382     return false;
1383
1384   /* Only try one kind of propagation.  If two are possible, we'll
1385      do it on the following iterations.  */
1386   if (forward_propagate_and_simplify (use, def_insn, def_set)
1387       || forward_propagate_subreg (use, def_insn, def_set))
1388     {
1389       if (cfun->can_throw_non_call_exceptions
1390           && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX)
1391           && purge_dead_edges (DF_REF_BB (use)))
1392         return true;
1393     }
1394   return false;
1395 }
1396
1397 \f
1398 static void
1399 fwprop_init (void)
1400 {
1401   num_changes = 0;
1402   calculate_dominance_info (CDI_DOMINATORS);
1403
1404   /* We do not always want to propagate into loops, so we have to find
1405      loops and be careful about them.  But we have to call flow_loops_find
1406      before df_analyze, because flow_loops_find may introduce new jump
1407      insns (sadly) if we are not working in cfglayout mode.  */
1408   loop_optimizer_init (0);
1409
1410   build_single_def_use_links ();
1411   df_set_flags (DF_DEFER_INSN_RESCAN);
1412
1413   active_defs = XNEWVEC (df_ref, max_reg_num ());
1414 #ifdef ENABLE_CHECKING
1415   active_defs_check = sparseset_alloc (max_reg_num ());
1416 #endif
1417 }
1418
1419 static void
1420 fwprop_done (void)
1421 {
1422   loop_optimizer_finalize ();
1423
1424   use_def_ref.release ();
1425   free (active_defs);
1426 #ifdef ENABLE_CHECKING
1427   sparseset_free (active_defs_check);
1428 #endif
1429
1430   free_dominance_info (CDI_DOMINATORS);
1431   cleanup_cfg (0);
1432   delete_trivially_dead_insns (get_insns (), max_reg_num ());
1433
1434   if (dump_file)
1435     fprintf (dump_file,
1436              "\nNumber of successful forward propagations: %d\n\n",
1437              num_changes);
1438 }
1439
1440
1441 /* Main entry point.  */
1442
1443 static bool
1444 gate_fwprop (void)
1445 {
1446   return optimize > 0 && flag_forward_propagate;
1447 }
1448
1449 static unsigned int
1450 fwprop (void)
1451 {
1452   unsigned i;
1453   bool need_cleanup = false;
1454
1455   fwprop_init ();
1456
1457   /* Go through all the uses.  df_uses_create will create new ones at the
1458      end, and we'll go through them as well.
1459
1460      Do not forward propagate addresses into loops until after unrolling.
1461      CSE did so because it was able to fix its own mess, but we are not.  */
1462
1463   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1464     {
1465       df_ref use = DF_USES_GET (i);
1466       if (use)
1467         if (DF_REF_TYPE (use) == DF_REF_REG_USE
1468             || DF_REF_BB (use)->loop_father == NULL
1469             /* The outer most loop is not really a loop.  */
1470             || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1471           need_cleanup |= forward_propagate_into (use);
1472     }
1473
1474   fwprop_done ();
1475   if (need_cleanup)
1476     cleanup_cfg (0);
1477   return 0;
1478 }
1479
1480 struct rtl_opt_pass pass_rtl_fwprop =
1481 {
1482  {
1483   RTL_PASS,
1484   "fwprop1",                            /* name */
1485   OPTGROUP_NONE,                        /* optinfo_flags */
1486   gate_fwprop,                          /* gate */
1487   fwprop,                               /* execute */
1488   NULL,                                 /* sub */
1489   NULL,                                 /* next */
1490   0,                                    /* static_pass_number */
1491   TV_FWPROP,                            /* tv_id */
1492   0,                                    /* properties_required */
1493   0,                                    /* properties_provided */
1494   0,                                    /* properties_destroyed */
1495   0,                                    /* todo_flags_start */
1496   TODO_df_finish
1497     | TODO_verify_flow
1498     | TODO_verify_rtl_sharing           /* todo_flags_finish */
1499  }
1500 };
1501
1502 static unsigned int
1503 fwprop_addr (void)
1504 {
1505   unsigned i;
1506   bool need_cleanup = false;
1507
1508   fwprop_init ();
1509
1510   /* Go through all the uses.  df_uses_create will create new ones at the
1511      end, and we'll go through them as well.  */
1512   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1513     {
1514       df_ref use = DF_USES_GET (i);
1515       if (use)
1516         if (DF_REF_TYPE (use) != DF_REF_REG_USE
1517             && DF_REF_BB (use)->loop_father != NULL
1518             /* The outer most loop is not really a loop.  */
1519             && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1520           need_cleanup |= forward_propagate_into (use);
1521     }
1522
1523   fwprop_done ();
1524
1525   if (need_cleanup)
1526     cleanup_cfg (0);
1527   return 0;
1528 }
1529
1530 struct rtl_opt_pass pass_rtl_fwprop_addr =
1531 {
1532  {
1533   RTL_PASS,
1534   "fwprop2",                            /* name */
1535   OPTGROUP_NONE,                        /* optinfo_flags */
1536   gate_fwprop,                          /* gate */
1537   fwprop_addr,                          /* execute */
1538   NULL,                                 /* sub */
1539   NULL,                                 /* next */
1540   0,                                    /* static_pass_number */
1541   TV_FWPROP,                            /* tv_id */
1542   0,                                    /* properties_required */
1543   0,                                    /* properties_provided */
1544   0,                                    /* properties_destroyed */
1545   0,                                    /* todo_flags_start */
1546   TODO_df_finish | TODO_verify_rtl_sharing  /* todo_flags_finish */
1547  }
1548 };