gcc/fwprop.c

   1 /* RTL-based forward propagation pass for GNU compiler.
   2    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4    Contributed by Paolo Bonzini and Steven Bosscher.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27
  28 #include "sparseset.h"
  29 #include "rtl.h"
  30 #include "tm_p.h"
  31 #include "insn-config.h"
  32 #include "recog.h"
  33 #include "flags.h"
  34 #include "obstack.h"
  35 #include "basic-block.h"
  36 #include "df.h"
  37 #include "target.h"
  38 #include "cfgloop.h"
  39 #include "tree-pass.h"
  40 #include "domwalk.h"
  41 #include "emit-rtl.h"
  42
  43
  44 /* This pass does simple forward propagation and simplification when an
  45    operand of an insn can only come from a single def.  This pass uses
  46    df.c, so it is global.  However, we only do limited analysis of
  47    available expressions.
  48
  49    1) The pass tries to propagate the source of the def into the use,
  50    and checks if the result is independent of the substituted value.
  51    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
  52    zero, independent of the source register.
  53
  54    In particular, we propagate constants into the use site.  Sometimes
  55    RTL expansion did not put the constant in the same insn on purpose,
  56    to satisfy a predicate, and the result will fail to be recognized;
  57    but this happens rarely and in this case we can still create a
  58    REG_EQUAL note.  For multi-word operations, this
  59
  60       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
  61       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
  62       (set (subreg:SI (reg:DI 122) 0)
  63          (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
  64       (set (subreg:SI (reg:DI 122) 4)
  65          (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
  66
  67    can be simplified to the much simpler
  68
  69       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
  70       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
  71
  72    This particular propagation is also effective at putting together
  73    complex addressing modes.  We are more aggressive inside MEMs, in
  74    that all definitions are propagated if the use is in a MEM; if the
  75    result is a valid memory address we check address_cost to decide
  76    whether the substitution is worthwhile.
  77
  78    2) The pass propagates register copies.  This is not as effective as
  79    the copy propagation done by CSE's canon_reg, which works by walking
  80    the instruction chain, it can help the other transformations.
  81
  82    We should consider removing this optimization, and instead reorder the
  83    RTL passes, because GCSE does this transformation too.  With some luck,
  84    the CSE pass at the end of rest_of_handle_gcse could also go away.
  85
  86    3) The pass looks for paradoxical subregs that are actually unnecessary.
  87    Things like this:
  88
  89      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
  90      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
  91      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
  92                                 (subreg:SI (reg:QI 121) 0)))
  93
  94    are very common on machines that can only do word-sized operations.
  95    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
  96    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
  97    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
  98    above will simplify this to
  99
 100      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
 101      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
 102      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
 103
 104    where the first two insns are now dead.
 105
 106    We used to use reaching definitions to find which uses have a
 107    single reaching definition (sounds obvious...), but this is too
 108    complex a problem in nasty testcases like PR33928.  Now we use the
 109    multiple definitions problem in df-problems.c.  The similarity
 110    between that problem and SSA form creation is taken further, in
 111    that fwprop does a dominator walk to create its chains; however,
 112    instead of creating a PHI function where multiple definitions meet
 113    I just punt and record only singleton use-def chains, which is
 114    all that is needed by fwprop.  */
 115
 116
 117 static int num_changes;
 118
 119 DEF_VEC_P(df_ref);
 120 DEF_VEC_ALLOC_P(df_ref,heap);
 121 static VEC(df_ref,heap) *use_def_ref;
 122 static VEC(df_ref,heap) *reg_defs;
 123 static VEC(df_ref,heap) *reg_defs_stack;
 124
 125 /* The MD bitmaps are trimmed to include only live registers to cut
 126    memory usage on testcases like insn-recog.c.  Track live registers
 127    in the basic block and do not perform forward propagation if the
 128    destination is a dead pseudo occurring in a note.  */
 129 static bitmap local_md;
 130 static bitmap local_lr;
 131
 132 /* Return the only def in USE's use-def chain, or NULL if there is
 133    more than one def in the chain.  */
 134
 135 static inline df_ref
 136 get_def_for_use (df_ref use)
 137 {
 138   return VEC_index (df_ref, use_def_ref, DF_REF_ID (use));
 139 }
 140
 141
 142 /* Update the reg_defs vector with non-partial definitions in DEF_REC.
 143    TOP_FLAG says which artificials uses should be used, when DEF_REC
 144    is an artificial def vector.  LOCAL_MD is modified as after a
 145    df_md_simulate_* function; we do more or less the same processing
 146    done there, so we do not use those functions.  */
 147
 148 #define DF_MD_GEN_FLAGS \
 149         (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
 150
 151 static void
 152 process_defs (df_ref *def_rec, int top_flag)
 153 {
 154   df_ref def;
 155   while ((def = *def_rec++) != NULL)
 156     {
 157       df_ref curr_def = VEC_index (df_ref, reg_defs, DF_REF_REGNO (def));
 158       unsigned int dregno;
 159
 160       if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
 161         continue;
 162
 163       dregno = DF_REF_REGNO (def);
 164       if (curr_def)
 165         VEC_safe_push (df_ref, heap, reg_defs_stack, curr_def);
 166       else
 167         {
 168           /* Do not store anything if "transitioning" from NULL to NULL.  But
 169              otherwise, push a special entry on the stack to tell the
 170              leave_block callback that the entry in reg_defs was NULL.  */
 171           if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 172             ;
 173           else
 174             VEC_safe_push (df_ref, heap, reg_defs_stack, def);
 175         }
 176
 177       if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 178         {
 179           bitmap_set_bit (local_md, dregno);
 180           VEC_replace (df_ref, reg_defs, dregno, NULL);
 181         }
 182       else
 183         {
 184           bitmap_clear_bit (local_md, dregno);
 185           VEC_replace (df_ref, reg_defs, dregno, def);
 186         }
 187     }
 188 }
 189
 190
 191 /* Fill the use_def_ref vector with values for the uses in USE_REC,
 192    taking reaching definitions info from LOCAL_MD and REG_DEFS.
 193    TOP_FLAG says which artificials uses should be used, when USE_REC
 194    is an artificial use vector.  */
 195
 196 static void
 197 process_uses (df_ref *use_rec, int top_flag)
 198 {
 199   df_ref use;
 200   while ((use = *use_rec++) != NULL)
 201     if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
 202       {
 203         unsigned int uregno = DF_REF_REGNO (use);
 204         if (VEC_index (df_ref, reg_defs, uregno)
 205             && !bitmap_bit_p (local_md, uregno)
 206             && bitmap_bit_p (local_lr, uregno))
 207           VEC_replace (df_ref, use_def_ref, DF_REF_ID (use),
 208                        VEC_index (df_ref, reg_defs, uregno));
 209       }
 210 }
 211
 212
 213 static void
 214 single_def_use_enter_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 215                             basic_block bb)
 216 {
 217   int bb_index = bb->index;
 218   struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
 219   struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
 220   rtx insn;
 221
 222   bitmap_copy (local_md, &md_bb_info->in);
 223   bitmap_copy (local_lr, &lr_bb_info->in);
 224
 225   /* Push a marker for the leave_block callback.  */
 226   VEC_safe_push (df_ref, heap, reg_defs_stack, (df_ref) NULL);
 227
 228   process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
 229   process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
 230
 231   /* We don't call df_simulate_initialize_forwards, as it may overestimate
 232      the live registers if there are unused artificial defs.  We prefer
 233      liveness to be underestimated.  */
 234
 235   FOR_BB_INSNS (bb, insn)
 236     if (INSN_P (insn))
 237       {
 238         unsigned int uid = INSN_UID (insn);
 239         process_uses (DF_INSN_UID_USES (uid), 0);
 240         process_uses (DF_INSN_UID_EQ_USES (uid), 0);
 241         process_defs (DF_INSN_UID_DEFS (uid), 0);
 242         df_simulate_one_insn_forwards (bb, insn, local_lr);
 243       }
 244
 245   process_uses (df_get_artificial_uses (bb_index), 0);
 246   process_defs (df_get_artificial_defs (bb_index), 0);
 247 }
 248
 249 /* Pop the definitions created in this basic block when leaving its
 250    dominated parts.  */
 251
 252 static void
 253 single_def_use_leave_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 254                             basic_block bb ATTRIBUTE_UNUSED)
 255 {
 256   df_ref saved_def;
 257   while ((saved_def = VEC_pop (df_ref, reg_defs_stack)) != NULL)
 258     {
 259       unsigned int dregno = DF_REF_REGNO (saved_def);
 260
 261       /* See also process_defs.  */
 262       if (saved_def == VEC_index (df_ref, reg_defs, dregno))
 263         VEC_replace (df_ref, reg_defs, dregno, NULL);
 264       else
 265         VEC_replace (df_ref, reg_defs, dregno, saved_def);
 266     }
 267 }
 268
 269
 270 /* Build a vector holding the reaching definitions of uses reached by a
 271    single dominating definition.  */
 272
 273 static void
 274 build_single_def_use_links (void)
 275 {
 276   struct dom_walk_data walk_data;
 277
 278   /* We use the multiple definitions problem to compute our restricted
 279      use-def chains.  */
 280   df_set_flags (DF_EQ_NOTES);
 281   df_md_add_problem ();
 282   df_note_add_problem ();
 283   df_analyze ();
 284   df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
 285
 286   use_def_ref = VEC_alloc (df_ref, heap, DF_USES_TABLE_SIZE ());
 287   VEC_safe_grow_cleared (df_ref, heap, use_def_ref, DF_USES_TABLE_SIZE ());
 288
 289   reg_defs = VEC_alloc (df_ref, heap, max_reg_num ());
 290   VEC_safe_grow_cleared (df_ref, heap, reg_defs, max_reg_num ());
 291
 292   reg_defs_stack = VEC_alloc (df_ref, heap, n_basic_blocks * 10);
 293   local_md = BITMAP_ALLOC (NULL);
 294   local_lr = BITMAP_ALLOC (NULL);
 295
 296   /* Walk the dominator tree looking for single reaching definitions
 297      dominating the uses.  This is similar to how SSA form is built.  */
 298   walk_data.dom_direction = CDI_DOMINATORS;
 299   walk_data.initialize_block_local_data = NULL;
 300   walk_data.before_dom_children = single_def_use_enter_block;
 301   walk_data.after_dom_children = single_def_use_leave_block;
 302
 303   init_walk_dominator_tree (&walk_data);
 304   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
 305   fini_walk_dominator_tree (&walk_data);
 306
 307   BITMAP_FREE (local_lr);
 308   BITMAP_FREE (local_md);
 309   VEC_free (df_ref, heap, reg_defs);
 310   VEC_free (df_ref, heap, reg_defs_stack);
 311 }
 312
 313 \f
 314 /* Do not try to replace constant addresses or addresses of local and
 315    argument slots.  These MEM expressions are made only once and inserted
 316    in many instructions, as well as being used to control symbol table
 317    output.  It is not safe to clobber them.
 318
 319    There are some uncommon cases where the address is already in a register
 320    for some reason, but we cannot take advantage of that because we have
 321    no easy way to unshare the MEM.  In addition, looking up all stack
 322    addresses is costly.  */
 323
 324 static bool
 325 can_simplify_addr (rtx addr)
 326 {
 327   rtx reg;
 328
 329   if (CONSTANT_ADDRESS_P (addr))
 330     return false;
 331
 332   if (GET_CODE (addr) == PLUS)
 333     reg = XEXP (addr, 0);
 334   else
 335     reg = addr;
 336
 337   return (!REG_P (reg)
 338           || (REGNO (reg) != FRAME_POINTER_REGNUM
 339               && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
 340               && REGNO (reg) != ARG_POINTER_REGNUM));
 341 }
 342
 343 /* Returns a canonical version of X for the address, from the point of view,
 344    that all multiplications are represented as MULT instead of the multiply
 345    by a power of 2 being represented as ASHIFT.
 346
 347    Every ASHIFT we find has been made by simplify_gen_binary and was not
 348    there before, so it is not shared.  So we can do this in place.  */
 349
 350 static void
 351 canonicalize_address (rtx x)
 352 {
 353   for (;;)
 354     switch (GET_CODE (x))
 355       {
 356       case ASHIFT:
 357         if (CONST_INT_P (XEXP (x, 1))
 358             && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
 359             && INTVAL (XEXP (x, 1)) >= 0)
 360           {
 361             HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
 362             PUT_CODE (x, MULT);
 363             XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
 364                                         GET_MODE (x));
 365           }
 366
 367         x = XEXP (x, 0);
 368         break;
 369
 370       case PLUS:
 371         if (GET_CODE (XEXP (x, 0)) == PLUS
 372             || GET_CODE (XEXP (x, 0)) == ASHIFT
 373             || GET_CODE (XEXP (x, 0)) == CONST)
 374           canonicalize_address (XEXP (x, 0));
 375
 376         x = XEXP (x, 1);
 377         break;
 378
 379       case CONST:
 380         x = XEXP (x, 0);
 381         break;
 382
 383       default:
 384         return;
 385       }
 386 }
 387
 388 /* OLD is a memory address.  Return whether it is good to use NEW instead,
 389    for a memory access in the given MODE.  */
 390
 391 static bool
 392 should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode,
 393                         addr_space_t as, bool speed)
 394 {
 395   int gain;
 396
 397   if (rtx_equal_p (old_rtx, new_rtx)
 398       || !memory_address_addr_space_p (mode, new_rtx, as))
 399     return false;
 400
 401   /* Copy propagation is always ok.  */
 402   if (REG_P (old_rtx) && REG_P (new_rtx))
 403     return true;
 404
 405   /* Prefer the new address if it is less expensive.  */
 406   gain = (address_cost (old_rtx, mode, as, speed)
 407           - address_cost (new_rtx, mode, as, speed));
 408
 409   /* If the addresses have equivalent cost, prefer the new address
 410      if it has the highest `set_src_cost'.  That has the potential of
 411      eliminating the most insns without additional costs, and it
 412      is the same that cse.c used to do.  */
 413   if (gain == 0)
 414     gain = set_src_cost (new_rtx, speed) - set_src_cost (old_rtx, speed);
 415
 416   return (gain > 0);
 417 }
 418
 419
 420 /* Flags for the last parameter of propagate_rtx_1.  */
 421
 422 enum {
 423   /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
 424      if it is false, propagate_rtx_1 returns false if, for at least
 425      one occurrence OLD, it failed to collapse the result to a constant.
 426      For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
 427      collapse to zero if replacing (reg:M B) with (reg:M A).
 428
 429      PR_CAN_APPEAR is disregarded inside MEMs: in that case,
 430      propagate_rtx_1 just tries to make cheaper and valid memory
 431      addresses.  */
 432   PR_CAN_APPEAR = 1,
 433
 434   /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
 435      outside memory addresses.  This is needed because propagate_rtx_1 does
 436      not do any analysis on memory; thus it is very conservative and in general
 437      it will fail if non-read-only MEMs are found in the source expression.
 438
 439      PR_HANDLE_MEM is set when the source of the propagation was not
 440      another MEM.  Then, it is safe not to treat non-read-only MEMs as
 441      ``opaque'' objects.  */
 442   PR_HANDLE_MEM = 2,
 443
 444   /* Set when costs should be optimized for speed.  */
 445   PR_OPTIMIZE_FOR_SPEED = 4
 446 };
 447
 448
 449 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
 450    resulting expression.  Replace *PX with a new RTL expression if an
 451    occurrence of OLD was found.
 452
 453    This is only a wrapper around simplify-rtx.c: do not add any pattern
 454    matching code here.  (The sole exception is the handling of LO_SUM, but
 455    that is because there is no simplify_gen_* function for LO_SUM).  */
 456
 457 static bool
 458 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 459 {
 460   rtx x = *px, tem = NULL_RTX, op0, op1, op2;
 461   enum rtx_code code = GET_CODE (x);
 462   enum machine_mode mode = GET_MODE (x);
 463   enum machine_mode op_mode;
 464   bool can_appear = (flags & PR_CAN_APPEAR) != 0;
 465   bool valid_ops = true;
 466
 467   if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
 468     {
 469       /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
 470          they have side effects or not).  */
 471       *px = (side_effects_p (x)
 472              ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
 473              : gen_rtx_SCRATCH (GET_MODE (x)));
 474       return false;
 475     }
 476
 477   /* If X is OLD_RTX, return NEW_RTX.  But not if replacing only within an
 478      address, and we are *not* inside one.  */
 479   if (x == old_rtx)
 480     {
 481       *px = new_rtx;
 482       return can_appear;
 483     }
 484
 485   /* If this is an expression, try recursive substitution.  */
 486   switch (GET_RTX_CLASS (code))
 487     {
 488     case RTX_UNARY:
 489       op0 = XEXP (x, 0);
 490       op_mode = GET_MODE (op0);
 491       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 492       if (op0 == XEXP (x, 0))
 493         return true;
 494       tem = simplify_gen_unary (code, mode, op0, op_mode);
 495       break;
 496
 497     case RTX_BIN_ARITH:
 498     case RTX_COMM_ARITH:
 499       op0 = XEXP (x, 0);
 500       op1 = XEXP (x, 1);
 501       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 502       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 503       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 504         return true;
 505       tem = simplify_gen_binary (code, mode, op0, op1);
 506       break;
 507
 508     case RTX_COMPARE:
 509     case RTX_COMM_COMPARE:
 510       op0 = XEXP (x, 0);
 511       op1 = XEXP (x, 1);
 512       op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
 513       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 514       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 515       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 516         return true;
 517       tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
 518       break;
 519
 520     case RTX_TERNARY:
 521     case RTX_BITFIELD_OPS:
 522       op0 = XEXP (x, 0);
 523       op1 = XEXP (x, 1);
 524       op2 = XEXP (x, 2);
 525       op_mode = GET_MODE (op0);
 526       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 527       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 528       valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
 529       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
 530         return true;
 531       if (op_mode == VOIDmode)
 532         op_mode = GET_MODE (op0);
 533       tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
 534       break;
 535
 536     case RTX_EXTRA:
 537       /* The only case we try to handle is a SUBREG.  */
 538       if (code == SUBREG)
 539         {
 540           op0 = XEXP (x, 0);
 541           valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 542           if (op0 == XEXP (x, 0))
 543             return true;
 544           tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
 545                                      SUBREG_BYTE (x));
 546         }
 547       break;
 548
 549     case RTX_OBJ:
 550       if (code == MEM && x != new_rtx)
 551         {
 552           rtx new_op0;
 553           op0 = XEXP (x, 0);
 554
 555           /* There are some addresses that we cannot work on.  */
 556           if (!can_simplify_addr (op0))
 557             return true;
 558
 559           op0 = new_op0 = targetm.delegitimize_address (op0);
 560           valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
 561                                         flags | PR_CAN_APPEAR);
 562
 563           /* Dismiss transformation that we do not want to carry on.  */
 564           if (!valid_ops
 565               || new_op0 == op0
 566               || !(GET_MODE (new_op0) == GET_MODE (op0)
 567                    || GET_MODE (new_op0) == VOIDmode))
 568             return true;
 569
 570           canonicalize_address (new_op0);
 571
 572           /* Copy propagations are always ok.  Otherwise check the costs.  */
 573           if (!(REG_P (old_rtx) && REG_P (new_rtx))
 574               && !should_replace_address (op0, new_op0, GET_MODE (x),
 575                                           MEM_ADDR_SPACE (x),
 576                                           flags & PR_OPTIMIZE_FOR_SPEED))
 577             return true;
 578
 579           tem = replace_equiv_address_nv (x, new_op0);
 580         }
 581
 582       else if (code == LO_SUM)
 583         {
 584           op0 = XEXP (x, 0);
 585           op1 = XEXP (x, 1);
 586
 587           /* The only simplification we do attempts to remove references to op0
 588              or make it constant -- in both cases, op0's invalidity will not
 589              make the result invalid.  */
 590           propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
 591           valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 592           if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 593             return true;
 594
 595           /* (lo_sum (high x) x) -> x  */
 596           if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
 597             tem = op1;
 598           else
 599             tem = gen_rtx_LO_SUM (mode, op0, op1);
 600
 601           /* OP1 is likely not a legitimate address, otherwise there would have
 602              been no LO_SUM.  We want it to disappear if it is invalid, return
 603              false in that case.  */
 604           return memory_address_p (mode, tem);
 605         }
 606
 607       else if (code == REG)
 608         {
 609           if (rtx_equal_p (x, old_rtx))
 610             {
 611               *px = new_rtx;
 612               return can_appear;
 613             }
 614         }
 615       break;
 616
 617     default:
 618       break;
 619     }
 620
 621   /* No change, no trouble.  */
 622   if (tem == NULL_RTX)
 623     return true;
 624
 625   *px = tem;
 626
 627   /* The replacement we made so far is valid, if all of the recursive
 628      replacements were valid, or we could simplify everything to
 629      a constant.  */
 630   return valid_ops || can_appear || CONSTANT_P (tem);
 631 }
 632
 633
 634 /* for_each_rtx traversal function that returns 1 if BODY points to
 635    a non-constant mem.  */
 636
 637 static int
 638 varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED)
 639 {
 640   rtx x = *body;
 641   return MEM_P (x) && !MEM_READONLY_P (x);
 642 }
 643
 644
 645 /* Replace all occurrences of OLD in X with NEW and try to simplify the
 646    resulting expression (in mode MODE).  Return a new expression if it is
 647    a constant, otherwise X.
 648
 649    Simplifications where occurrences of NEW collapse to a constant are always
 650    accepted.  All simplifications are accepted if NEW is a pseudo too.
 651    Otherwise, we accept simplifications that have a lower or equal cost.  */
 652
 653 static rtx
 654 propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx,
 655                bool speed)
 656 {
 657   rtx tem;
 658   bool collapsed;
 659   int flags;
 660
 661   if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
 662     return NULL_RTX;
 663
 664   flags = 0;
 665   if (REG_P (new_rtx)
 666       || CONSTANT_P (new_rtx)
 667       || (GET_CODE (new_rtx) == SUBREG
 668           && REG_P (SUBREG_REG (new_rtx))
 669           && (GET_MODE_SIZE (mode)
 670               <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (new_rtx))))))
 671     flags |= PR_CAN_APPEAR;
 672   if (!for_each_rtx (&new_rtx, varying_mem_p, NULL))
 673     flags |= PR_HANDLE_MEM;
 674
 675   if (speed)
 676     flags |= PR_OPTIMIZE_FOR_SPEED;
 677
 678   tem = x;
 679   collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
 680   if (tem == x || !collapsed)
 681     return NULL_RTX;
 682
 683   /* gen_lowpart_common will not be able to process VOIDmode entities other
 684      than CONST_INTs.  */
 685   if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
 686     return NULL_RTX;
 687
 688   if (GET_MODE (tem) == VOIDmode)
 689     tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
 690   else
 691     gcc_assert (GET_MODE (tem) == mode);
 692
 693   return tem;
 694 }
 695
 696
 697 \f
 698
 699 /* Return true if the register from reference REF is killed
 700    between FROM to (but not including) TO.  */
 701
 702 static bool
 703 local_ref_killed_between_p (df_ref ref, rtx from, rtx to)
 704 {
 705   rtx insn;
 706
 707   for (insn = from; insn != to; insn = NEXT_INSN (insn))
 708     {
 709       df_ref *def_rec;
 710       if (!INSN_P (insn))
 711         continue;
 712
 713       for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
 714         {
 715           df_ref def = *def_rec;
 716           if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
 717             return true;
 718         }
 719     }
 720   return false;
 721 }
 722
 723
 724 /* Check if the given DEF is available in INSN.  This would require full
 725    computation of available expressions; we check only restricted conditions:
 726    - if DEF is the sole definition of its register, go ahead;
 727    - in the same basic block, we check for no definitions killing the
 728      definition of DEF_INSN;
 729    - if USE's basic block has DEF's basic block as the sole predecessor,
 730      we check if the definition is killed after DEF_INSN or before
 731      TARGET_INSN insn, in their respective basic blocks.  */
 732 static bool
 733 use_killed_between (df_ref use, rtx def_insn, rtx target_insn)
 734 {
 735   basic_block def_bb = BLOCK_FOR_INSN (def_insn);
 736   basic_block target_bb = BLOCK_FOR_INSN (target_insn);
 737   int regno;
 738   df_ref def;
 739
 740   /* We used to have a def reaching a use that is _before_ the def,
 741      with the def not dominating the use even though the use and def
 742      are in the same basic block, when a register may be used
 743      uninitialized in a loop.  This should not happen anymore since
 744      we do not use reaching definitions, but still we test for such
 745      cases and assume that DEF is not available.  */
 746   if (def_bb == target_bb
 747       ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
 748       : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
 749     return true;
 750
 751   /* Check if the reg in USE has only one definition.  We already
 752      know that this definition reaches use, or we wouldn't be here.
 753      However, this is invalid for hard registers because if they are
 754      live at the beginning of the function it does not mean that we
 755      have an uninitialized access.  */
 756   regno = DF_REF_REGNO (use);
 757   def = DF_REG_DEF_CHAIN (regno);
 758   if (def
 759       && DF_REF_NEXT_REG (def) == NULL
 760       && regno >= FIRST_PSEUDO_REGISTER)
 761     return false;
 762
 763   /* Check locally if we are in the same basic block.  */
 764   if (def_bb == target_bb)
 765     return local_ref_killed_between_p (use, def_insn, target_insn);
 766
 767   /* Finally, if DEF_BB is the sole predecessor of TARGET_BB.  */
 768   if (single_pred_p (target_bb)
 769       && single_pred (target_bb) == def_bb)
 770     {
 771       df_ref x;
 772
 773       /* See if USE is killed between DEF_INSN and the last insn in the
 774          basic block containing DEF_INSN.  */
 775       x = df_bb_regno_last_def_find (def_bb, regno);
 776       if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
 777         return true;
 778
 779       /* See if USE is killed between TARGET_INSN and the first insn in the
 780          basic block containing TARGET_INSN.  */
 781       x = df_bb_regno_first_def_find (target_bb, regno);
 782       if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
 783         return true;
 784
 785       return false;
 786     }
 787
 788   /* Otherwise assume the worst case.  */
 789   return true;
 790 }
 791
 792
 793 /* Check if all uses in DEF_INSN can be used in TARGET_INSN.  This
 794    would require full computation of available expressions;
 795    we check only restricted conditions, see use_killed_between.  */
 796 static bool
 797 all_uses_available_at (rtx def_insn, rtx target_insn)
 798 {
 799   df_ref *use_rec;
 800   struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
 801   rtx def_set = single_set (def_insn);
 802
 803   gcc_assert (def_set);
 804
 805   /* If target_insn comes right after def_insn, which is very common
 806      for addresses, we can use a quicker test.  */
 807   if (NEXT_INSN (def_insn) == target_insn
 808       && REG_P (SET_DEST (def_set)))
 809     {
 810       rtx def_reg = SET_DEST (def_set);
 811
 812       /* If the insn uses the reg that it defines, the substitution is
 813          invalid.  */
 814       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 815         {
 816           df_ref use = *use_rec;
 817           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 818             return false;
 819         }
 820       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 821         {
 822           df_ref use = *use_rec;
 823           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 824             return false;
 825         }
 826     }
 827   else
 828     {
 829       rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
 830
 831       /* Look at all the uses of DEF_INSN, and see if they are not
 832          killed between DEF_INSN and TARGET_INSN.  */
 833       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 834         {
 835           df_ref use = *use_rec;
 836           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 837             return false;
 838           if (use_killed_between (use, def_insn, target_insn))
 839             return false;
 840         }
 841       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 842         {
 843           df_ref use = *use_rec;
 844           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 845             return false;
 846           if (use_killed_between (use, def_insn, target_insn))
 847             return false;
 848         }
 849     }
 850
 851   return true;
 852 }
 853
 854 \f
 855 static df_ref *active_defs;
 856 #ifdef ENABLE_CHECKING
 857 static sparseset active_defs_check;
 858 #endif
 859
 860 /* Fill the ACTIVE_DEFS array with the use->def link for the registers
 861    mentioned in USE_REC.  Register the valid entries in ACTIVE_DEFS_CHECK
 862    too, for checking purposes.  */
 863
 864 static void
 865 register_active_defs (df_ref *use_rec)
 866 {
 867   while (*use_rec)
 868     {
 869       df_ref use = *use_rec++;
 870       df_ref def = get_def_for_use (use);
 871       int regno = DF_REF_REGNO (use);
 872
 873 #ifdef ENABLE_CHECKING
 874       sparseset_set_bit (active_defs_check, regno);
 875 #endif
 876       active_defs[regno] = def;
 877     }
 878 }
 879
 880
 881 /* Build the use->def links that we use to update the dataflow info
 882    for new uses.  Note that building the links is very cheap and if
 883    it were done earlier, they could be used to rule out invalid
 884    propagations (in addition to what is done in all_uses_available_at).
 885    I'm not doing this yet, though.  */
 886
 887 static void
 888 update_df_init (rtx def_insn, rtx insn)
 889 {
 890 #ifdef ENABLE_CHECKING
 891   sparseset_clear (active_defs_check);
 892 #endif
 893   register_active_defs (DF_INSN_USES (def_insn));
 894   register_active_defs (DF_INSN_USES (insn));
 895   register_active_defs (DF_INSN_EQ_USES (insn));
 896 }
 897
 898
 899 /* Update the USE_DEF_REF array for the given use, using the active definitions
 900    in the ACTIVE_DEFS array to match pseudos to their def. */
 901
 902 static inline void
 903 update_uses (df_ref *use_rec)
 904 {
 905   while (*use_rec)
 906     {
 907       df_ref use = *use_rec++;
 908       int regno = DF_REF_REGNO (use);
 909
 910       /* Set up the use-def chain.  */
 911       if (DF_REF_ID (use) >= (int) VEC_length (df_ref, use_def_ref))
 912         VEC_safe_grow_cleared (df_ref, heap, use_def_ref,
 913                                DF_REF_ID (use) + 1);
 914
 915 #ifdef ENABLE_CHECKING
 916       gcc_assert (sparseset_bit_p (active_defs_check, regno));
 917 #endif
 918       VEC_replace (df_ref, use_def_ref, DF_REF_ID (use), active_defs[regno]);
 919     }
 920 }
 921
 922
 923 /* Update the USE_DEF_REF array for the uses in INSN.  Only update note
 924    uses if NOTES_ONLY is true.  */
 925
 926 static void
 927 update_df (rtx insn, rtx note)
 928 {
 929   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
 930
 931   if (note)
 932     {
 933       df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE);
 934       df_notes_rescan (insn);
 935     }
 936   else
 937     {
 938       df_uses_create (&PATTERN (insn), insn, 0);
 939       df_insn_rescan (insn);
 940       update_uses (DF_INSN_INFO_USES (insn_info));
 941     }
 942
 943   update_uses (DF_INSN_INFO_EQ_USES (insn_info));
 944 }
 945
 946
 947 /* Try substituting NEW into LOC, which originated from forward propagation
 948    of USE's value from DEF_INSN.  SET_REG_EQUAL says whether we are
 949    substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
 950    new insn is not recognized.  Return whether the substitution was
 951    performed.  */
 952
 953 static bool
 954 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal)
 955 {
 956   rtx insn = DF_REF_INSN (use);
 957   rtx set = single_set (insn);
 958   rtx note = NULL_RTX;
 959   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
 960   int old_cost = 0;
 961   bool ok;
 962
 963   update_df_init (def_insn, insn);
 964
 965   /* forward_propagate_subreg may be operating on an instruction with
 966      multiple sets.  If so, assume the cost of the new instruction is
 967      not greater than the old one.  */
 968   if (set)
 969     old_cost = set_src_cost (SET_SRC (set), speed);
 970   if (dump_file)
 971     {
 972       fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
 973       print_inline_rtx (dump_file, *loc, 2);
 974       fprintf (dump_file, "\n with ");
 975       print_inline_rtx (dump_file, new_rtx, 2);
 976       fprintf (dump_file, "\n");
 977     }
 978
 979   validate_unshare_change (insn, loc, new_rtx, true);
 980   if (!verify_changes (0))
 981     {
 982       if (dump_file)
 983         fprintf (dump_file, "Changes to insn %d not recognized\n",
 984                  INSN_UID (insn));
 985       ok = false;
 986     }
 987
 988   else if (DF_REF_TYPE (use) == DF_REF_REG_USE
 989            && set
 990            && set_src_cost (SET_SRC (set), speed) > old_cost)
 991     {
 992       if (dump_file)
 993         fprintf (dump_file, "Changes to insn %d not profitable\n",
 994                  INSN_UID (insn));
 995       ok = false;
 996     }
 997
 998   else
 999     {
1000       if (dump_file)
1001         fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
1002       ok = true;
1003     }
1004
1005   if (ok)
1006     {
1007       confirm_change_group ();
1008       num_changes++;
1009     }
1010   else
1011     {
1012       cancel_changes (0);
1013
1014       /* Can also record a simplified value in a REG_EQUAL note,
1015          making a new one if one does not already exist.  */
1016       if (set_reg_equal)
1017         {
1018           if (dump_file)
1019             fprintf (dump_file, " Setting REG_EQUAL note\n");
1020
1021           note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
1022         }
1023     }
1024
1025   if ((ok || note) && !CONSTANT_P (new_rtx))
1026     update_df (insn, note);
1027
1028   return ok;
1029 }
1030
1031 /* For the given single_set INSN, containing SRC known to be a
1032    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
1033    is redundant due to the register being set by a LOAD_EXTEND_OP
1034    load from memory.  */
1035
1036 static bool
1037 free_load_extend (rtx src, rtx insn)
1038 {
1039   rtx reg;
1040   df_ref *use_vec;
1041   df_ref use = 0, def;
1042
1043   reg = XEXP (src, 0);
1044 #ifdef LOAD_EXTEND_OP
1045   if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src))
1046 #endif
1047     return false;
1048
1049   for (use_vec = DF_INSN_USES (insn); *use_vec; use_vec++)
1050     {
1051       use = *use_vec;
1052
1053       if (!DF_REF_IS_ARTIFICIAL (use)
1054           && DF_REF_TYPE (use) == DF_REF_REG_USE
1055           && DF_REF_REG (use) == reg)
1056         break;
1057     }
1058   if (!use)
1059     return false;
1060
1061   def = get_def_for_use (use);
1062   if (!def)
1063     return false;
1064
1065   if (DF_REF_IS_ARTIFICIAL (def))
1066     return false;
1067
1068   if (NONJUMP_INSN_P (DF_REF_INSN (def)))
1069     {
1070       rtx patt = PATTERN (DF_REF_INSN (def));
1071
1072       if (GET_CODE (patt) == SET
1073           && GET_CODE (SET_SRC (patt)) == MEM
1074           && rtx_equal_p (SET_DEST (patt), reg))
1075         return true;
1076     }
1077   return false;
1078 }
1079
1080 /* If USE is a subreg, see if it can be replaced by a pseudo.  */
1081
1082 static bool
1083 forward_propagate_subreg (df_ref use, rtx def_insn, rtx def_set)
1084 {
1085   rtx use_reg = DF_REF_REG (use);
1086   rtx use_insn, src;
1087
1088   /* Only consider subregs... */
1089   enum machine_mode use_mode = GET_MODE (use_reg);
1090   if (GET_CODE (use_reg) != SUBREG
1091       || !REG_P (SET_DEST (def_set)))
1092     return false;
1093
1094   /* If this is a paradoxical SUBREG...  */
1095   if (GET_MODE_SIZE (use_mode)
1096       > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
1097     {
1098       /* If this is a paradoxical SUBREG, we have no idea what value the
1099          extra bits would have.  However, if the operand is equivalent to
1100          a SUBREG whose operand is the same as our mode, and all the modes
1101          are within a word, we can just use the inner operand because
1102          these SUBREGs just say how to treat the register.  */
1103       use_insn = DF_REF_INSN (use);
1104       src = SET_SRC (def_set);
1105       if (GET_CODE (src) == SUBREG
1106           && REG_P (SUBREG_REG (src))
1107           && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
1108           && GET_MODE (SUBREG_REG (src)) == use_mode
1109           && subreg_lowpart_p (src)
1110           && all_uses_available_at (def_insn, use_insn))
1111         return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
1112                                  def_insn, false);
1113     }
1114
1115   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
1116      is the low part of the reg being extended then just use the inner
1117      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
1118      be removed due to it matching a LOAD_EXTEND_OP load from memory,
1119      or due to the operation being a no-op when applied to registers.
1120      For example, if we have:
1121
1122          A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
1123          B: (... (subreg:SI (reg:DI X)) ...)
1124
1125      and mode_rep_extended says that Y is already sign-extended,
1126      the backend will typically allow A to be combined with the
1127      definition of Y or, failing that, allow A to be deleted after
1128      reload through register tying.  Introducing more uses of Y
1129      prevents both optimisations.  */
1130   else if (subreg_lowpart_p (use_reg))
1131     {
1132       use_insn = DF_REF_INSN (use);
1133       src = SET_SRC (def_set);
1134       if ((GET_CODE (src) == ZERO_EXTEND
1135            || GET_CODE (src) == SIGN_EXTEND)
1136           && REG_P (XEXP (src, 0))
1137           && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
1138           && GET_MODE (XEXP (src, 0)) == use_mode
1139           && !free_load_extend (src, def_insn)
1140           && (targetm.mode_rep_extended (use_mode, GET_MODE (src))
1141               != (int) GET_CODE (src))
1142           && all_uses_available_at (def_insn, use_insn))
1143         return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
1144                                  def_insn, false);
1145     }
1146
1147   return false;
1148 }
1149
1150 /* Try to replace USE with SRC (defined in DEF_INSN) in __asm.  */
1151
1152 static bool
1153 forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg)
1154 {
1155   rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc;
1156   int speed_p, i;
1157   df_ref *use_vec;
1158
1159   gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
1160
1161   src = SET_SRC (def_set);
1162   use_pat = PATTERN (use_insn);
1163
1164   /* In __asm don't replace if src might need more registers than
1165      reg, as that could increase register pressure on the __asm.  */
1166   use_vec = DF_INSN_USES (def_insn);
1167   if (use_vec[0] && use_vec[1])
1168     return false;
1169
1170   update_df_init (def_insn, use_insn);
1171   speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
1172   asm_operands = NULL_RTX;
1173   switch (GET_CODE (use_pat))
1174     {
1175     case ASM_OPERANDS:
1176       asm_operands = use_pat;
1177       break;
1178     case SET:
1179       if (MEM_P (SET_DEST (use_pat)))
1180         {
1181           loc = &SET_DEST (use_pat);
1182           new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1183           if (new_rtx)
1184             validate_unshare_change (use_insn, loc, new_rtx, true);
1185         }
1186       asm_operands = SET_SRC (use_pat);
1187       break;
1188     case PARALLEL:
1189       for (i = 0; i < XVECLEN (use_pat, 0); i++)
1190         if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1191           {
1192             if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1193               {
1194                 loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1195                 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1196                                          src, speed_p);
1197                 if (new_rtx)
1198                   validate_unshare_change (use_insn, loc, new_rtx, true);
1199               }
1200             asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1201           }
1202         else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1203           asm_operands = XVECEXP (use_pat, 0, i);
1204       break;
1205     default:
1206       gcc_unreachable ();
1207     }
1208
1209   gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1210   for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1211     {
1212       loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1213       new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1214       if (new_rtx)
1215         validate_unshare_change (use_insn, loc, new_rtx, true);
1216     }
1217
1218   if (num_changes_pending () == 0 || !apply_change_group ())
1219     return false;
1220
1221   update_df (use_insn, NULL);
1222   num_changes++;
1223   return true;
1224 }
1225
1226 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1227    result.  */
1228
1229 static bool
1230 forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set)
1231 {
1232   rtx use_insn = DF_REF_INSN (use);
1233   rtx use_set = single_set (use_insn);
1234   rtx src, reg, new_rtx, *loc;
1235   bool set_reg_equal;
1236   enum machine_mode mode;
1237   int asm_use = -1;
1238
1239   if (INSN_CODE (use_insn) < 0)
1240     asm_use = asm_noperands (PATTERN (use_insn));
1241
1242   if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
1243     return false;
1244
1245   /* Do not propagate into PC, CC0, etc.  */
1246   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1247     return false;
1248
1249   /* If def and use are subreg, check if they match.  */
1250   reg = DF_REF_REG (use);
1251   if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG)
1252     {
1253       if (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg))
1254         return false;
1255     }
1256   /* Check if the def had a subreg, but the use has the whole reg.  */
1257   else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1258     return false;
1259   /* Check if the use has a subreg, but the def had the whole reg.  Unlike the
1260      previous case, the optimization is possible and often useful indeed.  */
1261   else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1262     reg = SUBREG_REG (reg);
1263
1264   /* Make sure that we can treat REG as having the same mode as the
1265      source of DEF_SET.  */
1266   if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))
1267     return false;
1268
1269   /* Check if the substitution is valid (last, because it's the most
1270      expensive check!).  */
1271   src = SET_SRC (def_set);
1272   if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1273     return false;
1274
1275   /* Check if the def is loading something from the constant pool; in this
1276      case we would undo optimization such as compress_float_constant.
1277      Still, we can set a REG_EQUAL note.  */
1278   if (MEM_P (src) && MEM_READONLY_P (src))
1279     {
1280       rtx x = avoid_constant_pool_reference (src);
1281       if (x != src && use_set)
1282         {
1283           rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1284           rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1285           rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1286           if (old_rtx != new_rtx)
1287             set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1288         }
1289       return false;
1290     }
1291
1292   if (asm_use >= 0)
1293     return forward_propagate_asm (use, def_insn, def_set, reg);
1294
1295   /* Else try simplifying.  */
1296
1297   if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1298     {
1299       loc = &SET_DEST (use_set);
1300       set_reg_equal = false;
1301     }
1302   else if (!use_set)
1303     {
1304       loc = &INSN_VAR_LOCATION_LOC (use_insn);
1305       set_reg_equal = false;
1306     }
1307   else
1308     {
1309       rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1310       if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1311         loc = &XEXP (note, 0);
1312       else
1313         loc = &SET_SRC (use_set);
1314
1315       /* Do not replace an existing REG_EQUAL note if the insn is not
1316          recognized.  Either we're already replacing in the note, or we'll
1317          separately try plugging the definition in the note and simplifying.
1318          And only install a REQ_EQUAL note when the destination is a REG,
1319          as the note would be invalid otherwise.  */
1320       set_reg_equal = (note == NULL_RTX && REG_P (SET_DEST (use_set)));
1321     }
1322
1323   if (GET_MODE (*loc) == VOIDmode)
1324     mode = GET_MODE (SET_DEST (use_set));
1325   else
1326     mode = GET_MODE (*loc);
1327
1328   new_rtx = propagate_rtx (*loc, mode, reg, src,
1329                            optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1330
1331   if (!new_rtx)
1332     return false;
1333
1334   return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1335 }
1336
1337
1338 /* Given a use USE of an insn, if it has a single reaching
1339    definition, try to forward propagate it into that insn.
1340    Return true if cfg cleanup will be needed.  */
1341
1342 static bool
1343 forward_propagate_into (df_ref use)
1344 {
1345   df_ref def;
1346   rtx def_insn, def_set, use_insn;
1347   rtx parent;
1348
1349   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1350     return false;
1351   if (DF_REF_IS_ARTIFICIAL (use))
1352     return false;
1353
1354   /* Only consider uses that have a single definition.  */
1355   def = get_def_for_use (use);
1356   if (!def)
1357     return false;
1358   if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1359     return false;
1360   if (DF_REF_IS_ARTIFICIAL (def))
1361     return false;
1362
1363   /* Do not propagate loop invariant definitions inside the loop.  */
1364   if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1365     return false;
1366
1367   /* Check if the use is still present in the insn!  */
1368   use_insn = DF_REF_INSN (use);
1369   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1370     parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1371   else
1372     parent = PATTERN (use_insn);
1373
1374   if (!reg_mentioned_p (DF_REF_REG (use), parent))
1375     return false;
1376
1377   def_insn = DF_REF_INSN (def);
1378   if (multiple_sets (def_insn))
1379     return false;
1380   def_set = single_set (def_insn);
1381   if (!def_set)
1382     return false;
1383
1384   /* Only try one kind of propagation.  If two are possible, we'll
1385      do it on the following iterations.  */
1386   if (forward_propagate_and_simplify (use, def_insn, def_set)
1387       || forward_propagate_subreg (use, def_insn, def_set))
1388     {
1389       if (cfun->can_throw_non_call_exceptions
1390           && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX)
1391           && purge_dead_edges (DF_REF_BB (use)))
1392         return true;
1393     }
1394   return false;
1395 }
1396
1397 \f
1398 static void
1399 fwprop_init (void)
1400 {
1401   num_changes = 0;
1402   calculate_dominance_info (CDI_DOMINATORS);
1403
1404   /* We do not always want to propagate into loops, so we have to find
1405      loops and be careful about them.  But we have to call flow_loops_find
1406      before df_analyze, because flow_loops_find may introduce new jump
1407      insns (sadly) if we are not working in cfglayout mode.  */
1408   loop_optimizer_init (0);
1409
1410   build_single_def_use_links ();
1411   df_set_flags (DF_DEFER_INSN_RESCAN);
1412
1413   active_defs = XNEWVEC (df_ref, max_reg_num ());
1414 #ifdef ENABLE_CHECKING
1415   active_defs_check = sparseset_alloc (max_reg_num ());
1416 #endif
1417 }
1418
1419 static void
1420 fwprop_done (void)
1421 {
1422   loop_optimizer_finalize ();
1423
1424   VEC_free (df_ref, heap, use_def_ref);
1425   free (active_defs);
1426 #ifdef ENABLE_CHECKING
1427   sparseset_free (active_defs_check);
1428 #endif
1429
1430   free_dominance_info (CDI_DOMINATORS);
1431   cleanup_cfg (0);
1432   delete_trivially_dead_insns (get_insns (), max_reg_num ());
1433
1434   if (dump_file)
1435     fprintf (dump_file,
1436              "\nNumber of successful forward propagations: %d\n\n",
1437              num_changes);
1438 }
1439
1440
1441 /* Main entry point.  */
1442
1443 static bool
1444 gate_fwprop (void)
1445 {
1446   return optimize > 0 && flag_forward_propagate;
1447 }
1448
1449 static unsigned int
1450 fwprop (void)
1451 {
1452   unsigned i;
1453   bool need_cleanup = false;
1454
1455   fwprop_init ();
1456
1457   /* Go through all the uses.  df_uses_create will create new ones at the
1458      end, and we'll go through them as well.
1459
1460      Do not forward propagate addresses into loops until after unrolling.
1461      CSE did so because it was able to fix its own mess, but we are not.  */
1462
1463   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1464     {
1465       df_ref use = DF_USES_GET (i);
1466       if (use)
1467         if (DF_REF_TYPE (use) == DF_REF_REG_USE
1468             || DF_REF_BB (use)->loop_father == NULL
1469             /* The outer most loop is not really a loop.  */
1470             || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1471           need_cleanup |= forward_propagate_into (use);
1472     }
1473
1474   fwprop_done ();
1475   if (need_cleanup)
1476     cleanup_cfg (0);
1477   return 0;
1478 }
1479
1480 struct rtl_opt_pass pass_rtl_fwprop =
1481 {
1482  {
1483   RTL_PASS,
1484   "fwprop1",                            /* name */
1485   gate_fwprop,                          /* gate */
1486   fwprop,                               /* execute */
1487   NULL,                                 /* sub */
1488   NULL,                                 /* next */
1489   0,                                    /* static_pass_number */
1490   TV_FWPROP,                            /* tv_id */
1491   0,                                    /* properties_required */
1492   0,                                    /* properties_provided */
1493   0,                                    /* properties_destroyed */
1494   0,                                    /* todo_flags_start */
1495   TODO_df_finish
1496     | TODO_verify_flow
1497     | TODO_verify_rtl_sharing           /* todo_flags_finish */
1498  }
1499 };
1500
1501 static unsigned int
1502 fwprop_addr (void)
1503 {
1504   unsigned i;
1505   bool need_cleanup = false;
1506
1507   fwprop_init ();
1508
1509   /* Go through all the uses.  df_uses_create will create new ones at the
1510      end, and we'll go through them as well.  */
1511   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1512     {
1513       df_ref use = DF_USES_GET (i);
1514       if (use)
1515         if (DF_REF_TYPE (use) != DF_REF_REG_USE
1516             && DF_REF_BB (use)->loop_father != NULL
1517             /* The outer most loop is not really a loop.  */
1518             && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1519           need_cleanup |= forward_propagate_into (use);
1520     }
1521
1522   fwprop_done ();
1523
1524   if (need_cleanup)
1525     cleanup_cfg (0);
1526   return 0;
1527 }
1528
1529 struct rtl_opt_pass pass_rtl_fwprop_addr =
1530 {
1531  {
1532   RTL_PASS,
1533   "fwprop2",                            /* name */
1534   gate_fwprop,                          /* gate */
1535   fwprop_addr,                          /* execute */
1536   NULL,                                 /* sub */
1537   NULL,                                 /* next */
1538   0,                                    /* static_pass_number */
1539   TV_FWPROP,                            /* tv_id */
1540   0,                                    /* properties_required */
1541   0,                                    /* properties_provided */
1542   0,                                    /* properties_destroyed */
1543   0,                                    /* todo_flags_start */
1544   TODO_df_finish | TODO_verify_rtl_sharing  /* todo_flags_finish */
1545  }
1546 };