gcc/gcse.c

   1 /* Partial redundancy elimination / Hoisting for RTL.
   2    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
   3    2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* TODO
  22    - reordering of memory allocation and freeing to be more space efficient
  23    - do rough calc of how many regs are needed in each block, and a rough
  24      calc of how many regs are available in each class and use that to
  25      throttle back the code in cases where RTX_COST is minimal.
  26 */
  27
  28 /* References searched while implementing this.
  29
  30    Compilers Principles, Techniques and Tools
  31    Aho, Sethi, Ullman
  32    Addison-Wesley, 1988
  33
  34    Global Optimization by Suppression of Partial Redundancies
  35    E. Morel, C. Renvoise
  36    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  37
  38    A Portable Machine-Independent Global Optimizer - Design and Measurements
  39    Frederick Chow
  40    Stanford Ph.D. thesis, Dec. 1983
  41
  42    A Fast Algorithm for Code Movement Optimization
  43    D.M. Dhamdhere
  44    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  45
  46    A Solution to a Problem with Morel and Renvoise's
  47    Global Optimization by Suppression of Partial Redundancies
  48    K-H Drechsler, M.P. Stadel
  49    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  50
  51    Practical Adaptation of the Global Optimization
  52    Algorithm of Morel and Renvoise
  53    D.M. Dhamdhere
  54    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  55
  56    Efficiently Computing Static Single Assignment Form and the Control
  57    Dependence Graph
  58    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  59    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  60
  61    Lazy Code Motion
  62    J. Knoop, O. Ruthing, B. Steffen
  63    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  64
  65    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  66    Time for Reducible Flow Control
  67    Thomas Ball
  68    ACM Letters on Programming Languages and Systems,
  69    Vol. 2, Num. 1-4, Mar-Dec 1993
  70
  71    An Efficient Representation for Sparse Sets
  72    Preston Briggs, Linda Torczon
  73    ACM Letters on Programming Languages and Systems,
  74    Vol. 2, Num. 1-4, Mar-Dec 1993
  75
  76    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  77    K-H Drechsler, M.P. Stadel
  78    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  79
  80    Partial Dead Code Elimination
  81    J. Knoop, O. Ruthing, B. Steffen
  82    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  83
  84    Effective Partial Redundancy Elimination
  85    P. Briggs, K.D. Cooper
  86    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  87
  88    The Program Structure Tree: Computing Control Regions in Linear Time
  89    R. Johnson, D. Pearson, K. Pingali
  90    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  91
  92    Optimal Code Motion: Theory and Practice
  93    J. Knoop, O. Ruthing, B. Steffen
  94    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
  95
  96    The power of assignment motion
  97    J. Knoop, O. Ruthing, B. Steffen
  98    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
  99
 100    Global code motion / global value numbering
 101    C. Click
 102    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 103
 104    Value Driven Redundancy Elimination
 105    L.T. Simpson
 106    Rice University Ph.D. thesis, Apr. 1996
 107
 108    Value Numbering
 109    L.T. Simpson
 110    Massively Scalar Compiler Project, Rice University, Sep. 1996
 111
 112    High Performance Compilers for Parallel Computing
 113    Michael Wolfe
 114    Addison-Wesley, 1996
 115
 116    Advanced Compiler Design and Implementation
 117    Steven Muchnick
 118    Morgan Kaufmann, 1997
 119
 120    Building an Optimizing Compiler
 121    Robert Morgan
 122    Digital Press, 1998
 123
 124    People wishing to speed up the code here should read:
 125      Elimination Algorithms for Data Flow Analysis
 126      B.G. Ryder, M.C. Paull
 127      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 128
 129      How to Analyze Large Programs Efficiently and Informatively
 130      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 131      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 132
 133    People wishing to do something different can find various possibilities
 134    in the above papers and elsewhere.
 135 */
 136
 137 #include "config.h"
 138 #include "system.h"
 139 #include "coretypes.h"
 140 #include "tm.h"
 141 #include "diagnostic-core.h"
 142 #include "toplev.h"
 143
 144 #include "rtl.h"
 145 #include "tree.h"
 146 #include "tm_p.h"
 147 #include "regs.h"
 148 #include "hard-reg-set.h"
 149 #include "flags.h"
 150 #include "insn-config.h"
 151 #include "recog.h"
 152 #include "basic-block.h"
 153 #include "function.h"
 154 #include "expr.h"
 155 #include "except.h"
 156 #include "ggc.h"
 157 #include "params.h"
 158 #include "cselib.h"
 159 #include "intl.h"
 160 #include "obstack.h"
 161 #include "timevar.h"
 162 #include "tree-pass.h"
 163 #include "hashtab.h"
 164 #include "df.h"
 165 #include "dbgcnt.h"
 166 #include "target.h"
 167 #include "gcse.h"
 168
 169 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 170    are a superset of those done by classic GCSE.
 171
 172    Two passes of copy/constant propagation are done around PRE or hoisting
 173    because the first one enables more GCSE and the second one helps to clean
 174    up the copies that PRE and HOIST create.  This is needed more for PRE than
 175    for HOIST because code hoisting will try to use an existing register
 176    containing the common subexpression rather than create a new one.  This is
 177    harder to do for PRE because of the code motion (which HOIST doesn't do).
 178
 179    Expressions we are interested in GCSE-ing are of the form
 180    (set (pseudo-reg) (expression)).
 181    Function want_to_gcse_p says what these are.
 182
 183    In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
 184    This allows PRE to hoist expressions that are expressed in multiple insns,
 185    such as complex address calculations (e.g. for PIC code, or loads with a
 186    high part and a low part).
 187
 188    PRE handles moving invariant expressions out of loops (by treating them as
 189    partially redundant).
 190
 191    **********************
 192
 193    We used to support multiple passes but there are diminishing returns in
 194    doing so.  The first pass usually makes 90% of the changes that are doable.
 195    A second pass can make a few more changes made possible by the first pass.
 196    Experiments show any further passes don't make enough changes to justify
 197    the expense.
 198
 199    A study of spec92 using an unlimited number of passes:
 200    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 201    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 202    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 203
 204    It was found doing copy propagation between each pass enables further
 205    substitutions.
 206
 207    This study was done before expressions in REG_EQUAL notes were added as
 208    candidate expressions for optimization, and before the GIMPLE optimizers
 209    were added.  Probably, multiple passes is even less efficient now than
 210    at the time when the study was conducted.
 211
 212    PRE is quite expensive in complicated functions because the DFA can take
 213    a while to converge.  Hence we only perform one pass.
 214
 215    **********************
 216
 217    The steps for PRE are:
 218
 219    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 220
 221    2) Perform the data flow analysis for PRE.
 222
 223    3) Delete the redundant instructions
 224
 225    4) Insert the required copies [if any] that make the partially
 226       redundant instructions fully redundant.
 227
 228    5) For other reaching expressions, insert an instruction to copy the value
 229       to a newly created pseudo that will reach the redundant instruction.
 230
 231    The deletion is done first so that when we do insertions we
 232    know which pseudo reg to use.
 233
 234    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 235    argue it is not.  The number of iterations for the algorithm to converge
 236    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 237
 238    PRE GCSE depends heavily on the second CPROP pass to clean up the copies
 239    we create.  To make an expression reach the place where it's redundant,
 240    the result of the expression is copied to a new register, and the redundant
 241    expression is deleted by replacing it with this new register.  Classic GCSE
 242    doesn't have this problem as much as it computes the reaching defs of
 243    each register in each block and thus can try to use an existing
 244    register.  */
 245 \f
 246 /* GCSE global vars.  */
 247
 248 struct target_gcse default_target_gcse;
 249 #if SWITCHABLE_TARGET
 250 struct target_gcse *this_target_gcse = &default_target_gcse;
 251 #endif
 252
 253 /* Set to non-zero if CSE should run after all GCSE optimizations are done.  */
 254 int flag_rerun_cse_after_global_opts;
 255
 256 /* An obstack for our working variables.  */
 257 static struct obstack gcse_obstack;
 258
 259 struct reg_use {rtx reg_rtx; };
 260
 261 /* Hash table of expressions.  */
 262
 263 struct expr
 264 {
 265   /* The expression.  */
 266   rtx expr;
 267   /* Index in the available expression bitmaps.  */
 268   int bitmap_index;
 269   /* Next entry with the same hash.  */
 270   struct expr *next_same_hash;
 271   /* List of anticipatable occurrences in basic blocks in the function.
 272      An "anticipatable occurrence" is one that is the first occurrence in the
 273      basic block, the operands are not modified in the basic block prior
 274      to the occurrence and the output is not used between the start of
 275      the block and the occurrence.  */
 276   struct occr *antic_occr;
 277   /* List of available occurrence in basic blocks in the function.
 278      An "available occurrence" is one that is the last occurrence in the
 279      basic block and the operands are not modified by following statements in
 280      the basic block [including this insn].  */
 281   struct occr *avail_occr;
 282   /* Non-null if the computation is PRE redundant.
 283      The value is the newly created pseudo-reg to record a copy of the
 284      expression in all the places that reach the redundant copy.  */
 285   rtx reaching_reg;
 286   /* Maximum distance in instructions this expression can travel.
 287      We avoid moving simple expressions for more than a few instructions
 288      to keep register pressure under control.
 289      A value of "0" removes restrictions on how far the expression can
 290      travel.  */
 291   int max_distance;
 292 };
 293
 294 /* Occurrence of an expression.
 295    There is one per basic block.  If a pattern appears more than once the
 296    last appearance is used [or first for anticipatable expressions].  */
 297
 298 struct occr
 299 {
 300   /* Next occurrence of this expression.  */
 301   struct occr *next;
 302   /* The insn that computes the expression.  */
 303   rtx insn;
 304   /* Nonzero if this [anticipatable] occurrence has been deleted.  */
 305   char deleted_p;
 306   /* Nonzero if this [available] occurrence has been copied to
 307      reaching_reg.  */
 308   /* ??? This is mutually exclusive with deleted_p, so they could share
 309      the same byte.  */
 310   char copied_p;
 311 };
 312
 313 typedef struct occr *occr_t;
 314 DEF_VEC_P (occr_t);
 315 DEF_VEC_ALLOC_P (occr_t, heap);
 316
 317 /* Expression hash tables.
 318    Each hash table is an array of buckets.
 319    ??? It is known that if it were an array of entries, structure elements
 320    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 321    not clear whether in the final analysis a sufficient amount of memory would
 322    be saved as the size of the available expression bitmaps would be larger
 323    [one could build a mapping table without holes afterwards though].
 324    Someday I'll perform the computation and figure it out.  */
 325
 326 struct hash_table_d
 327 {
 328   /* The table itself.
 329      This is an array of `expr_hash_table_size' elements.  */
 330   struct expr **table;
 331
 332   /* Size of the hash table, in elements.  */
 333   unsigned int size;
 334
 335   /* Number of hash table elements.  */
 336   unsigned int n_elems;
 337 };
 338
 339 /* Expression hash table.  */
 340 static struct hash_table_d expr_hash_table;
 341
 342 /* This is a list of expressions which are MEMs and will be used by load
 343    or store motion.
 344    Load motion tracks MEMs which aren't killed by anything except itself,
 345    i.e. loads and stores to a single location.
 346    We can then allow movement of these MEM refs with a little special
 347    allowance. (all stores copy the same value to the reaching reg used
 348    for the loads).  This means all values used to store into memory must have
 349    no side effects so we can re-issue the setter value.  */
 350
 351 struct ls_expr
 352 {
 353   struct expr * expr;           /* Gcse expression reference for LM.  */
 354   rtx pattern;                  /* Pattern of this mem.  */
 355   rtx pattern_regs;             /* List of registers mentioned by the mem.  */
 356   rtx loads;                    /* INSN list of loads seen.  */
 357   rtx stores;                   /* INSN list of stores seen.  */
 358   struct ls_expr * next;        /* Next in the list.  */
 359   int invalid;                  /* Invalid for some reason.  */
 360   int index;                    /* If it maps to a bitmap index.  */
 361   unsigned int hash_index;      /* Index when in a hash table.  */
 362   rtx reaching_reg;             /* Register to use when re-writing.  */
 363 };
 364
 365 /* Head of the list of load/store memory refs.  */
 366 static struct ls_expr * pre_ldst_mems = NULL;
 367
 368 /* Hashtable for the load/store memory refs.  */
 369 static htab_t pre_ldst_table = NULL;
 370
 371 /* Bitmap containing one bit for each register in the program.
 372    Used when performing GCSE to track which registers have been set since
 373    the start of the basic block.  */
 374 static regset reg_set_bitmap;
 375
 376 /* Array, indexed by basic block number for a list of insns which modify
 377    memory within that block.  */
 378 static VEC (rtx,heap) **modify_mem_list;
 379 static bitmap modify_mem_list_set;
 380
 381 typedef struct modify_pair_s
 382 {
 383   rtx dest;                     /* A MEM.  */
 384   rtx dest_addr;                /* The canonical address of `dest'.  */
 385 } modify_pair;
 386
 387 DEF_VEC_O(modify_pair);
 388 DEF_VEC_ALLOC_O(modify_pair,heap);
 389
 390 /* This array parallels modify_mem_list, except that it stores MEMs
 391    being set and their canonicalized memory addresses.  */
 392 static VEC (modify_pair,heap) **canon_modify_mem_list;
 393
 394 /* Bitmap indexed by block numbers to record which blocks contain
 395    function calls.  */
 396 static bitmap blocks_with_calls;
 397
 398 /* Various variables for statistics gathering.  */
 399
 400 /* Memory used in a pass.
 401    This isn't intended to be absolutely precise.  Its intent is only
 402    to keep an eye on memory usage.  */
 403 static int bytes_used;
 404
 405 /* GCSE substitutions made.  */
 406 static int gcse_subst_count;
 407 /* Number of copy instructions created.  */
 408 static int gcse_create_count;
 409 \f
 410 /* Doing code hoisting.  */
 411 static bool doing_code_hoisting_p = false;
 412 \f
 413 /* For available exprs */
 414 static sbitmap *ae_kill;
 415 \f
 416 static void compute_can_copy (void);
 417 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
 418 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
 419 static void *gcse_alloc (unsigned long);
 420 static void alloc_gcse_mem (void);
 421 static void free_gcse_mem (void);
 422 static void hash_scan_insn (rtx, struct hash_table_d *);
 423 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
 424 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
 425 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
 426 static int want_to_gcse_p (rtx, int *);
 427 static int oprs_unchanged_p (const_rtx, const_rtx, int);
 428 static int oprs_anticipatable_p (const_rtx, const_rtx);
 429 static int oprs_available_p (const_rtx, const_rtx);
 430 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
 431                                   struct hash_table_d *);
 432 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
 433 static int expr_equiv_p (const_rtx, const_rtx);
 434 static void record_last_reg_set_info (rtx, int);
 435 static void record_last_mem_set_info (rtx);
 436 static void record_last_set_info (rtx, const_rtx, void *);
 437 static void compute_hash_table (struct hash_table_d *);
 438 static void alloc_hash_table (struct hash_table_d *);
 439 static void free_hash_table (struct hash_table_d *);
 440 static void compute_hash_table_work (struct hash_table_d *);
 441 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
 442 static void compute_transp (const_rtx, int, sbitmap *);
 443 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
 444                                       struct hash_table_d *);
 445 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
 446 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
 447 static void canon_list_insert (rtx, const_rtx, void *);
 448 static void alloc_pre_mem (int, int);
 449 static void free_pre_mem (void);
 450 static struct edge_list *compute_pre_data (void);
 451 static int pre_expr_reaches_here_p (basic_block, struct expr *,
 452                                     basic_block);
 453 static void insert_insn_end_basic_block (struct expr *, basic_block);
 454 static void pre_insert_copy_insn (struct expr *, rtx);
 455 static void pre_insert_copies (void);
 456 static int pre_delete (void);
 457 static int pre_gcse (struct edge_list *);
 458 static int one_pre_gcse_pass (void);
 459 static void add_label_notes (rtx, rtx);
 460 static void alloc_code_hoist_mem (int, int);
 461 static void free_code_hoist_mem (void);
 462 static void compute_code_hoist_vbeinout (void);
 463 static void compute_code_hoist_data (void);
 464 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
 465                                       int, int *);
 466 static int hoist_code (void);
 467 static int one_code_hoisting_pass (void);
 468 static rtx process_insert_insn (struct expr *);
 469 static int pre_edge_insert (struct edge_list *, struct expr **);
 470 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
 471                                          basic_block, char *);
 472 static struct ls_expr * ldst_entry (rtx);
 473 static void free_ldst_entry (struct ls_expr *);
 474 static void free_ld_motion_mems (void);
 475 static void print_ldst_list (FILE *);
 476 static struct ls_expr * find_rtx_in_ldst (rtx);
 477 static int simple_mem (const_rtx);
 478 static void invalidate_any_buried_refs (rtx);
 479 static void compute_ld_motion_mems (void);
 480 static void trim_ld_motion_mems (void);
 481 static void update_ld_motion_stores (struct expr *);
 482 static void clear_modify_mem_tables (void);
 483 static void free_modify_mem_tables (void);
 484 static rtx gcse_emit_move_after (rtx, rtx, rtx);
 485 static bool is_too_expensive (const char *);
 486
 487 #define GNEW(T)                 ((T *) gmalloc (sizeof (T)))
 488 #define GCNEW(T)                ((T *) gcalloc (1, sizeof (T)))
 489
 490 #define GNEWVEC(T, N)           ((T *) gmalloc (sizeof (T) * (N)))
 491 #define GCNEWVEC(T, N)          ((T *) gcalloc ((N), sizeof (T)))
 492
 493 #define GNEWVAR(T, S)           ((T *) gmalloc ((S)))
 494 #define GCNEWVAR(T, S)          ((T *) gcalloc (1, (S)))
 495
 496 #define GOBNEW(T)               ((T *) gcse_alloc (sizeof (T)))
 497 #define GOBNEWVAR(T, S)         ((T *) gcse_alloc ((S)))
 498 \f
 499 /* Misc. utilities.  */
 500
 501 #define can_copy \
 502   (this_target_gcse->x_can_copy)
 503 #define can_copy_init_p \
 504   (this_target_gcse->x_can_copy_init_p)
 505
 506 /* Compute which modes support reg/reg copy operations.  */
 507
 508 static void
 509 compute_can_copy (void)
 510 {
 511   int i;
 512 #ifndef AVOID_CCMODE_COPIES
 513   rtx reg, insn;
 514 #endif
 515   memset (can_copy, 0, NUM_MACHINE_MODES);
 516
 517   start_sequence ();
 518   for (i = 0; i < NUM_MACHINE_MODES; i++)
 519     if (GET_MODE_CLASS (i) == MODE_CC)
 520       {
 521 #ifdef AVOID_CCMODE_COPIES
 522         can_copy[i] = 0;
 523 #else
 524         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 525         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 526         if (recog (PATTERN (insn), insn, NULL) >= 0)
 527           can_copy[i] = 1;
 528 #endif
 529       }
 530     else
 531       can_copy[i] = 1;
 532
 533   end_sequence ();
 534 }
 535
 536 /* Returns whether the mode supports reg/reg copy operations.  */
 537
 538 bool
 539 can_copy_p (enum machine_mode mode)
 540 {
 541   if (! can_copy_init_p)
 542     {
 543       compute_can_copy ();
 544       can_copy_init_p = true;
 545     }
 546
 547   return can_copy[mode] != 0;
 548 }
 549 \f
 550 /* Cover function to xmalloc to record bytes allocated.  */
 551
 552 static void *
 553 gmalloc (size_t size)
 554 {
 555   bytes_used += size;
 556   return xmalloc (size);
 557 }
 558
 559 /* Cover function to xcalloc to record bytes allocated.  */
 560
 561 static void *
 562 gcalloc (size_t nelem, size_t elsize)
 563 {
 564   bytes_used += nelem * elsize;
 565   return xcalloc (nelem, elsize);
 566 }
 567
 568 /* Cover function to obstack_alloc.  */
 569
 570 static void *
 571 gcse_alloc (unsigned long size)
 572 {
 573   bytes_used += size;
 574   return obstack_alloc (&gcse_obstack, size);
 575 }
 576
 577 /* Allocate memory for the reg/memory set tracking tables.
 578    This is called at the start of each pass.  */
 579
 580 static void
 581 alloc_gcse_mem (void)
 582 {
 583   /* Allocate vars to track sets of regs.  */
 584   reg_set_bitmap = ALLOC_REG_SET (NULL);
 585
 586   /* Allocate array to keep a list of insns which modify memory in each
 587      basic block.  */
 588   modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
 589   canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
 590                                     last_basic_block);
 591   modify_mem_list_set = BITMAP_ALLOC (NULL);
 592   blocks_with_calls = BITMAP_ALLOC (NULL);
 593 }
 594
 595 /* Free memory allocated by alloc_gcse_mem.  */
 596
 597 static void
 598 free_gcse_mem (void)
 599 {
 600   FREE_REG_SET (reg_set_bitmap);
 601
 602   free_modify_mem_tables ();
 603   BITMAP_FREE (modify_mem_list_set);
 604   BITMAP_FREE (blocks_with_calls);
 605 }
 606 \f
 607 /* Compute the local properties of each recorded expression.
 608
 609    Local properties are those that are defined by the block, irrespective of
 610    other blocks.
 611
 612    An expression is transparent in a block if its operands are not modified
 613    in the block.
 614
 615    An expression is computed (locally available) in a block if it is computed
 616    at least once and expression would contain the same value if the
 617    computation was moved to the end of the block.
 618
 619    An expression is locally anticipatable in a block if it is computed at
 620    least once and expression would contain the same value if the computation
 621    was moved to the beginning of the block.
 622
 623    We call this routine for pre and code hoisting.  They all compute
 624    basically the same information and thus can easily share this code.
 625
 626    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
 627    properties.  If NULL, then it is not necessary to compute or record that
 628    particular property.
 629
 630    TABLE controls which hash table to look at.  */
 631
 632 static void
 633 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
 634                           struct hash_table_d *table)
 635 {
 636   unsigned int i;
 637
 638   /* Initialize any bitmaps that were passed in.  */
 639   if (transp)
 640     {
 641       sbitmap_vector_ones (transp, last_basic_block);
 642     }
 643
 644   if (comp)
 645     sbitmap_vector_zero (comp, last_basic_block);
 646   if (antloc)
 647     sbitmap_vector_zero (antloc, last_basic_block);
 648
 649   for (i = 0; i < table->size; i++)
 650     {
 651       struct expr *expr;
 652
 653       for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
 654         {
 655           int indx = expr->bitmap_index;
 656           struct occr *occr;
 657
 658           /* The expression is transparent in this block if it is not killed.
 659              We start by assuming all are transparent [none are killed], and
 660              then reset the bits for those that are.  */
 661           if (transp)
 662             compute_transp (expr->expr, indx, transp);
 663
 664           /* The occurrences recorded in antic_occr are exactly those that
 665              we want to set to nonzero in ANTLOC.  */
 666           if (antloc)
 667             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
 668               {
 669                 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
 670
 671                 /* While we're scanning the table, this is a good place to
 672                    initialize this.  */
 673                 occr->deleted_p = 0;
 674               }
 675
 676           /* The occurrences recorded in avail_occr are exactly those that
 677              we want to set to nonzero in COMP.  */
 678           if (comp)
 679             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
 680               {
 681                 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
 682
 683                 /* While we're scanning the table, this is a good place to
 684                    initialize this.  */
 685                 occr->copied_p = 0;
 686               }
 687
 688           /* While we're scanning the table, this is a good place to
 689              initialize this.  */
 690           expr->reaching_reg = 0;
 691         }
 692     }
 693 }
 694 \f
 695 /* Hash table support.  */
 696
 697 struct reg_avail_info
 698 {
 699   basic_block last_bb;
 700   int first_set;
 701   int last_set;
 702 };
 703
 704 static struct reg_avail_info *reg_avail_info;
 705 static basic_block current_bb;
 706
 707 /* See whether X, the source of a set, is something we want to consider for
 708    GCSE.  */
 709
 710 static int
 711 want_to_gcse_p (rtx x, int *max_distance_ptr)
 712 {
 713 #ifdef STACK_REGS
 714   /* On register stack architectures, don't GCSE constants from the
 715      constant pool, as the benefits are often swamped by the overhead
 716      of shuffling the register stack between basic blocks.  */
 717   if (IS_STACK_MODE (GET_MODE (x)))
 718     x = avoid_constant_pool_reference (x);
 719 #endif
 720
 721   /* GCSE'ing constants:
 722
 723      We do not specifically distinguish between constant and non-constant
 724      expressions in PRE and Hoist.  We use set_src_cost below to limit
 725      the maximum distance simple expressions can travel.
 726
 727      Nevertheless, constants are much easier to GCSE, and, hence,
 728      it is easy to overdo the optimizations.  Usually, excessive PRE and
 729      Hoisting of constant leads to increased register pressure.
 730
 731      RA can deal with this by rematerialing some of the constants.
 732      Therefore, it is important that the back-end generates sets of constants
 733      in a way that allows reload rematerialize them under high register
 734      pressure, i.e., a pseudo register with REG_EQUAL to constant
 735      is set only once.  Failing to do so will result in IRA/reload
 736      spilling such constants under high register pressure instead of
 737      rematerializing them.  */
 738
 739   switch (GET_CODE (x))
 740     {
 741     case REG:
 742     case SUBREG:
 743     case CALL:
 744       return 0;
 745
 746     case CONST_INT:
 747     case CONST_DOUBLE:
 748     case CONST_FIXED:
 749     case CONST_VECTOR:
 750       if (!doing_code_hoisting_p)
 751         /* Do not PRE constants.  */
 752         return 0;
 753
 754       /* FALLTHRU */
 755
 756     default:
 757       if (doing_code_hoisting_p)
 758         /* PRE doesn't implement max_distance restriction.  */
 759         {
 760           int cost;
 761           int max_distance;
 762
 763           gcc_assert (!optimize_function_for_speed_p (cfun)
 764                       && optimize_function_for_size_p (cfun));
 765           cost = set_src_cost (x, 0);
 766
 767           if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
 768             {
 769               max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
 770               if (max_distance == 0)
 771                 return 0;
 772
 773               gcc_assert (max_distance > 0);
 774             }
 775           else
 776             max_distance = 0;
 777
 778           if (max_distance_ptr)
 779             *max_distance_ptr = max_distance;
 780         }
 781
 782       return can_assign_to_reg_without_clobbers_p (x);
 783     }
 784 }
 785
 786 /* Used internally by can_assign_to_reg_without_clobbers_p.  */
 787
 788 static GTY(()) rtx test_insn;
 789
 790 /* Return true if we can assign X to a pseudo register such that the
 791    resulting insn does not result in clobbering a hard register as a
 792    side-effect.
 793
 794    Additionally, if the target requires it, check that the resulting insn
 795    can be copied.  If it cannot, this means that X is special and probably
 796    has hidden side-effects we don't want to mess with.
 797
 798    This function is typically used by code motion passes, to verify
 799    that it is safe to insert an insn without worrying about clobbering
 800    maybe live hard regs.  */
 801
 802 bool
 803 can_assign_to_reg_without_clobbers_p (rtx x)
 804 {
 805   int num_clobbers = 0;
 806   int icode;
 807
 808   /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
 809   if (general_operand (x, GET_MODE (x)))
 810     return 1;
 811   else if (GET_MODE (x) == VOIDmode)
 812     return 0;
 813
 814   /* Otherwise, check if we can make a valid insn from it.  First initialize
 815      our test insn if we haven't already.  */
 816   if (test_insn == 0)
 817     {
 818       test_insn
 819         = make_insn_raw (gen_rtx_SET (VOIDmode,
 820                                       gen_rtx_REG (word_mode,
 821                                                    FIRST_PSEUDO_REGISTER * 2),
 822                                       const0_rtx));
 823       NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
 824     }
 825
 826   /* Now make an insn like the one we would make when GCSE'ing and see if
 827      valid.  */
 828   PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
 829   SET_SRC (PATTERN (test_insn)) = x;
 830
 831   icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
 832   if (icode < 0)
 833     return false;
 834
 835   if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
 836     return false;
 837
 838   if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
 839     return false;
 840
 841   return true;
 842 }
 843
 844 /* Return nonzero if the operands of expression X are unchanged from the
 845    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
 846    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
 847
 848 static int
 849 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
 850 {
 851   int i, j;
 852   enum rtx_code code;
 853   const char *fmt;
 854
 855   if (x == 0)
 856     return 1;
 857
 858   code = GET_CODE (x);
 859   switch (code)
 860     {
 861     case REG:
 862       {
 863         struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
 864
 865         if (info->last_bb != current_bb)
 866           return 1;
 867         if (avail_p)
 868           return info->last_set < DF_INSN_LUID (insn);
 869         else
 870           return info->first_set >= DF_INSN_LUID (insn);
 871       }
 872
 873     case MEM:
 874       if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
 875                                   x, avail_p))
 876         return 0;
 877       else
 878         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
 879
 880     case PRE_DEC:
 881     case PRE_INC:
 882     case POST_DEC:
 883     case POST_INC:
 884     case PRE_MODIFY:
 885     case POST_MODIFY:
 886       return 0;
 887
 888     case PC:
 889     case CC0: /*FIXME*/
 890     case CONST:
 891     case CONST_INT:
 892     case CONST_DOUBLE:
 893     case CONST_FIXED:
 894     case CONST_VECTOR:
 895     case SYMBOL_REF:
 896     case LABEL_REF:
 897     case ADDR_VEC:
 898     case ADDR_DIFF_VEC:
 899       return 1;
 900
 901     default:
 902       break;
 903     }
 904
 905   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
 906     {
 907       if (fmt[i] == 'e')
 908         {
 909           /* If we are about to do the last recursive call needed at this
 910              level, change it into iteration.  This function is called enough
 911              to be worth it.  */
 912           if (i == 0)
 913             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
 914
 915           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
 916             return 0;
 917         }
 918       else if (fmt[i] == 'E')
 919         for (j = 0; j < XVECLEN (x, i); j++)
 920           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
 921             return 0;
 922     }
 923
 924   return 1;
 925 }
 926
 927 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p.  */
 928
 929 struct mem_conflict_info
 930 {
 931   /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
 932      see if a memory store conflicts with this memory load.  */
 933   const_rtx mem;
 934
 935   /* True if mems_conflict_for_gcse_p finds a conflict between two memory
 936      references.  */
 937   bool conflict;
 938 };
 939
 940 /* DEST is the output of an instruction.  If it is a memory reference and
 941    possibly conflicts with the load found in DATA, then communicate this
 942    information back through DATA.  */
 943
 944 static void
 945 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
 946                           void *data)
 947 {
 948   struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
 949
 950   while (GET_CODE (dest) == SUBREG
 951          || GET_CODE (dest) == ZERO_EXTRACT
 952          || GET_CODE (dest) == STRICT_LOW_PART)
 953     dest = XEXP (dest, 0);
 954
 955   /* If DEST is not a MEM, then it will not conflict with the load.  Note
 956      that function calls are assumed to clobber memory, but are handled
 957      elsewhere.  */
 958   if (! MEM_P (dest))
 959     return;
 960
 961   /* If we are setting a MEM in our list of specially recognized MEMs,
 962      don't mark as killed this time.  */
 963   if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
 964     {
 965       if (!find_rtx_in_ldst (dest))
 966         mci->conflict = true;
 967       return;
 968     }
 969
 970   if (true_dependence (dest, GET_MODE (dest), mci->mem))
 971     mci->conflict = true;
 972 }
 973
 974 /* Return nonzero if the expression in X (a memory reference) is killed
 975    in block BB before or after the insn with the LUID in UID_LIMIT.
 976    AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
 977    before UID_LIMIT.
 978
 979    To check the entire block, set UID_LIMIT to max_uid + 1 and
 980    AVAIL_P to 0.  */
 981
 982 static int
 983 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
 984                         int avail_p)
 985 {
 986   VEC (rtx,heap) *list = modify_mem_list[bb->index];
 987   rtx setter;
 988   unsigned ix;
 989
 990   /* If this is a readonly then we aren't going to be changing it.  */
 991   if (MEM_READONLY_P (x))
 992     return 0;
 993
 994   FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
 995     {
 996       struct mem_conflict_info mci;
 997
 998       /* Ignore entries in the list that do not apply.  */
 999       if ((avail_p
1000            && DF_INSN_LUID (setter) < uid_limit)
1001           || (! avail_p
1002               && DF_INSN_LUID (setter) > uid_limit))
1003         continue;
1004
1005       /* If SETTER is a call everything is clobbered.  Note that calls
1006          to pure functions are never put on the list, so we need not
1007          worry about them.  */
1008       if (CALL_P (setter))
1009         return 1;
1010
1011       /* SETTER must be an INSN of some kind that sets memory.  Call
1012          note_stores to examine each hunk of memory that is modified.  */
1013       mci.mem = x;
1014       mci.conflict = false;
1015       note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1016       if (mci.conflict)
1017         return 1;
1018     }
1019   return 0;
1020 }
1021
1022 /* Return nonzero if the operands of expression X are unchanged from
1023    the start of INSN's basic block up to but not including INSN.  */
1024
1025 static int
1026 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1027 {
1028   return oprs_unchanged_p (x, insn, 0);
1029 }
1030
1031 /* Return nonzero if the operands of expression X are unchanged from
1032    INSN to the end of INSN's basic block.  */
1033
1034 static int
1035 oprs_available_p (const_rtx x, const_rtx insn)
1036 {
1037   return oprs_unchanged_p (x, insn, 1);
1038 }
1039
1040 /* Hash expression X.
1041
1042    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1043    indicating if a volatile operand is found or if the expression contains
1044    something we don't want to insert in the table.  HASH_TABLE_SIZE is
1045    the current size of the hash table to be probed.  */
1046
1047 static unsigned int
1048 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1049            int hash_table_size)
1050 {
1051   unsigned int hash;
1052
1053   *do_not_record_p = 0;
1054
1055   hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1056   return hash % hash_table_size;
1057 }
1058
1059 /* Return nonzero if exp1 is equivalent to exp2.  */
1060
1061 static int
1062 expr_equiv_p (const_rtx x, const_rtx y)
1063 {
1064   return exp_equiv_p (x, y, 0, true);
1065 }
1066
1067 /* Insert expression X in INSN in the hash TABLE.
1068    If it is already present, record it as the last occurrence in INSN's
1069    basic block.
1070
1071    MODE is the mode of the value X is being stored into.
1072    It is only used if X is a CONST_INT.
1073
1074    ANTIC_P is nonzero if X is an anticipatable expression.
1075    AVAIL_P is nonzero if X is an available expression.
1076
1077    MAX_DISTANCE is the maximum distance in instructions this expression can
1078    be moved.  */
1079
1080 static void
1081 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1082                       int avail_p, int max_distance, struct hash_table_d *table)
1083 {
1084   int found, do_not_record_p;
1085   unsigned int hash;
1086   struct expr *cur_expr, *last_expr = NULL;
1087   struct occr *antic_occr, *avail_occr;
1088
1089   hash = hash_expr (x, mode, &do_not_record_p, table->size);
1090
1091   /* Do not insert expression in table if it contains volatile operands,
1092      or if hash_expr determines the expression is something we don't want
1093      to or can't handle.  */
1094   if (do_not_record_p)
1095     return;
1096
1097   cur_expr = table->table[hash];
1098   found = 0;
1099
1100   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1101     {
1102       /* If the expression isn't found, save a pointer to the end of
1103          the list.  */
1104       last_expr = cur_expr;
1105       cur_expr = cur_expr->next_same_hash;
1106     }
1107
1108   if (! found)
1109     {
1110       cur_expr = GOBNEW (struct expr);
1111       bytes_used += sizeof (struct expr);
1112       if (table->table[hash] == NULL)
1113         /* This is the first pattern that hashed to this index.  */
1114         table->table[hash] = cur_expr;
1115       else
1116         /* Add EXPR to end of this hash chain.  */
1117         last_expr->next_same_hash = cur_expr;
1118
1119       /* Set the fields of the expr element.  */
1120       cur_expr->expr = x;
1121       cur_expr->bitmap_index = table->n_elems++;
1122       cur_expr->next_same_hash = NULL;
1123       cur_expr->antic_occr = NULL;
1124       cur_expr->avail_occr = NULL;
1125       gcc_assert (max_distance >= 0);
1126       cur_expr->max_distance = max_distance;
1127     }
1128   else
1129     gcc_assert (cur_expr->max_distance == max_distance);
1130
1131   /* Now record the occurrence(s).  */
1132   if (antic_p)
1133     {
1134       antic_occr = cur_expr->antic_occr;
1135
1136       if (antic_occr
1137           && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1138         antic_occr = NULL;
1139
1140       if (antic_occr)
1141         /* Found another instance of the expression in the same basic block.
1142            Prefer the currently recorded one.  We want the first one in the
1143            block and the block is scanned from start to end.  */
1144         ; /* nothing to do */
1145       else
1146         {
1147           /* First occurrence of this expression in this basic block.  */
1148           antic_occr = GOBNEW (struct occr);
1149           bytes_used += sizeof (struct occr);
1150           antic_occr->insn = insn;
1151           antic_occr->next = cur_expr->antic_occr;
1152           antic_occr->deleted_p = 0;
1153           cur_expr->antic_occr = antic_occr;
1154         }
1155     }
1156
1157   if (avail_p)
1158     {
1159       avail_occr = cur_expr->avail_occr;
1160
1161       if (avail_occr
1162           && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1163         {
1164           /* Found another instance of the expression in the same basic block.
1165              Prefer this occurrence to the currently recorded one.  We want
1166              the last one in the block and the block is scanned from start
1167              to end.  */
1168           avail_occr->insn = insn;
1169         }
1170       else
1171         {
1172           /* First occurrence of this expression in this basic block.  */
1173           avail_occr = GOBNEW (struct occr);
1174           bytes_used += sizeof (struct occr);
1175           avail_occr->insn = insn;
1176           avail_occr->next = cur_expr->avail_occr;
1177           avail_occr->deleted_p = 0;
1178           cur_expr->avail_occr = avail_occr;
1179         }
1180     }
1181 }
1182
1183 /* Scan SET present in INSN and add an entry to the hash TABLE.  */
1184
1185 static void
1186 hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
1187 {
1188   rtx src = SET_SRC (set);
1189   rtx dest = SET_DEST (set);
1190   rtx note;
1191
1192   if (GET_CODE (src) == CALL)
1193     hash_scan_call (src, insn, table);
1194
1195   else if (REG_P (dest))
1196     {
1197       unsigned int regno = REGNO (dest);
1198       int max_distance = 0;
1199
1200       /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1201
1202          This allows us to do a single GCSE pass and still eliminate
1203          redundant constants, addresses or other expressions that are
1204          constructed with multiple instructions.
1205
1206          However, keep the original SRC if INSN is a simple reg-reg move.
1207          In this case, there will almost always be a REG_EQUAL note on the
1208          insn that sets SRC.  By recording the REG_EQUAL value here as SRC
1209          for INSN, we miss copy propagation opportunities and we perform the
1210          same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1211          do more than one PRE GCSE pass.
1212
1213          Note that this does not impede profitable constant propagations.  We
1214          "look through" reg-reg sets in lookup_avail_set.  */
1215       note = find_reg_equal_equiv_note (insn);
1216       if (note != 0
1217           && REG_NOTE_KIND (note) == REG_EQUAL
1218           && !REG_P (src)
1219           && want_to_gcse_p (XEXP (note, 0), NULL))
1220         src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1221
1222       /* Only record sets of pseudo-regs in the hash table.  */
1223       if (regno >= FIRST_PSEUDO_REGISTER
1224           /* Don't GCSE something if we can't do a reg/reg copy.  */
1225           && can_copy_p (GET_MODE (dest))
1226           /* GCSE commonly inserts instruction after the insn.  We can't
1227              do that easily for EH edges so disable GCSE on these for now.  */
1228           /* ??? We can now easily create new EH landing pads at the
1229              gimple level, for splitting edges; there's no reason we
1230              can't do the same thing at the rtl level.  */
1231           && !can_throw_internal (insn)
1232           /* Is SET_SRC something we want to gcse?  */
1233           && want_to_gcse_p (src, &max_distance)
1234           /* Don't CSE a nop.  */
1235           && ! set_noop_p (set)
1236           /* Don't GCSE if it has attached REG_EQUIV note.
1237              At this point this only function parameters should have
1238              REG_EQUIV notes and if the argument slot is used somewhere
1239              explicitly, it means address of parameter has been taken,
1240              so we should not extend the lifetime of the pseudo.  */
1241           && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1242         {
1243           /* An expression is not anticipatable if its operands are
1244              modified before this insn or if this is not the only SET in
1245              this insn.  The latter condition does not have to mean that
1246              SRC itself is not anticipatable, but we just will not be
1247              able to handle code motion of insns with multiple sets.  */
1248           int antic_p = oprs_anticipatable_p (src, insn)
1249                         && !multiple_sets (insn);
1250           /* An expression is not available if its operands are
1251              subsequently modified, including this insn.  It's also not
1252              available if this is a branch, because we can't insert
1253              a set after the branch.  */
1254           int avail_p = (oprs_available_p (src, insn)
1255                          && ! JUMP_P (insn));
1256
1257           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1258                                 max_distance, table);
1259         }
1260     }
1261   /* In case of store we want to consider the memory value as available in
1262      the REG stored in that memory. This makes it possible to remove
1263      redundant loads from due to stores to the same location.  */
1264   else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1265       {
1266         unsigned int regno = REGNO (src);
1267         int max_distance = 0;
1268
1269         /* Only record sets of pseudo-regs in the hash table.  */
1270         if (regno >= FIRST_PSEUDO_REGISTER
1271            /* Don't GCSE something if we can't do a reg/reg copy.  */
1272            && can_copy_p (GET_MODE (src))
1273            /* GCSE commonly inserts instruction after the insn.  We can't
1274               do that easily for EH edges so disable GCSE on these for now.  */
1275            && !can_throw_internal (insn)
1276            /* Is SET_DEST something we want to gcse?  */
1277            && want_to_gcse_p (dest, &max_distance)
1278            /* Don't CSE a nop.  */
1279            && ! set_noop_p (set)
1280            /* Don't GCSE if it has attached REG_EQUIV note.
1281               At this point this only function parameters should have
1282               REG_EQUIV notes and if the argument slot is used somewhere
1283               explicitly, it means address of parameter has been taken,
1284               so we should not extend the lifetime of the pseudo.  */
1285            && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1286                || ! MEM_P (XEXP (note, 0))))
1287              {
1288                /* Stores are never anticipatable.  */
1289                int antic_p = 0;
1290                /* An expression is not available if its operands are
1291                   subsequently modified, including this insn.  It's also not
1292                   available if this is a branch, because we can't insert
1293                   a set after the branch.  */
1294                int avail_p = oprs_available_p (dest, insn)
1295                              && ! JUMP_P (insn);
1296
1297                /* Record the memory expression (DEST) in the hash table.  */
1298                insert_expr_in_table (dest, GET_MODE (dest), insn,
1299                                      antic_p, avail_p, max_distance, table);
1300              }
1301       }
1302 }
1303
1304 static void
1305 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1306                    struct hash_table_d *table ATTRIBUTE_UNUSED)
1307 {
1308   /* Currently nothing to do.  */
1309 }
1310
1311 static void
1312 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1313                 struct hash_table_d *table ATTRIBUTE_UNUSED)
1314 {
1315   /* Currently nothing to do.  */
1316 }
1317
1318 /* Process INSN and add hash table entries as appropriate.  */
1319
1320 static void
1321 hash_scan_insn (rtx insn, struct hash_table_d *table)
1322 {
1323   rtx pat = PATTERN (insn);
1324   int i;
1325
1326   /* Pick out the sets of INSN and for other forms of instructions record
1327      what's been modified.  */
1328
1329   if (GET_CODE (pat) == SET)
1330     hash_scan_set (pat, insn, table);
1331
1332   else if (GET_CODE (pat) == CLOBBER)
1333     hash_scan_clobber (pat, insn, table);
1334
1335   else if (GET_CODE (pat) == CALL)
1336     hash_scan_call (pat, insn, table);
1337
1338   else if (GET_CODE (pat) == PARALLEL)
1339     for (i = 0; i < XVECLEN (pat, 0); i++)
1340       {
1341         rtx x = XVECEXP (pat, 0, i);
1342
1343         if (GET_CODE (x) == SET)
1344           hash_scan_set (x, insn, table);
1345         else if (GET_CODE (x) == CLOBBER)
1346           hash_scan_clobber (x, insn, table);
1347         else if (GET_CODE (x) == CALL)
1348           hash_scan_call (x, insn, table);
1349       }
1350 }
1351
1352 /* Dump the hash table TABLE to file FILE under the name NAME.  */
1353
1354 static void
1355 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1356 {
1357   int i;
1358   /* Flattened out table, so it's printed in proper order.  */
1359   struct expr **flat_table;
1360   unsigned int *hash_val;
1361   struct expr *expr;
1362
1363   flat_table = XCNEWVEC (struct expr *, table->n_elems);
1364   hash_val = XNEWVEC (unsigned int, table->n_elems);
1365
1366   for (i = 0; i < (int) table->size; i++)
1367     for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1368       {
1369         flat_table[expr->bitmap_index] = expr;
1370         hash_val[expr->bitmap_index] = i;
1371       }
1372
1373   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1374            name, table->size, table->n_elems);
1375
1376   for (i = 0; i < (int) table->n_elems; i++)
1377     if (flat_table[i] != 0)
1378       {
1379         expr = flat_table[i];
1380         fprintf (file, "Index %d (hash value %d; max distance %d)\n  ",
1381                  expr->bitmap_index, hash_val[i], expr->max_distance);
1382         print_rtl (file, expr->expr);
1383         fprintf (file, "\n");
1384       }
1385
1386   fprintf (file, "\n");
1387
1388   free (flat_table);
1389   free (hash_val);
1390 }
1391
1392 /* Record register first/last/block set information for REGNO in INSN.
1393
1394    first_set records the first place in the block where the register
1395    is set and is used to compute "anticipatability".
1396
1397    last_set records the last place in the block where the register
1398    is set and is used to compute "availability".
1399
1400    last_bb records the block for which first_set and last_set are
1401    valid, as a quick test to invalidate them.  */
1402
1403 static void
1404 record_last_reg_set_info (rtx insn, int regno)
1405 {
1406   struct reg_avail_info *info = &reg_avail_info[regno];
1407   int luid = DF_INSN_LUID (insn);
1408
1409   info->last_set = luid;
1410   if (info->last_bb != current_bb)
1411     {
1412       info->last_bb = current_bb;
1413       info->first_set = luid;
1414     }
1415 }
1416
1417 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1418    Note we store a pair of elements in the list, so they have to be
1419    taken off pairwise.  */
1420
1421 static void
1422 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1423                    void * v_insn)
1424 {
1425   rtx dest_addr, insn;
1426   int bb;
1427   modify_pair *pair;
1428
1429   while (GET_CODE (dest) == SUBREG
1430       || GET_CODE (dest) == ZERO_EXTRACT
1431       || GET_CODE (dest) == STRICT_LOW_PART)
1432     dest = XEXP (dest, 0);
1433
1434   /* If DEST is not a MEM, then it will not conflict with a load.  Note
1435      that function calls are assumed to clobber memory, but are handled
1436      elsewhere.  */
1437
1438   if (! MEM_P (dest))
1439     return;
1440
1441   dest_addr = get_addr (XEXP (dest, 0));
1442   dest_addr = canon_rtx (dest_addr);
1443   insn = (rtx) v_insn;
1444   bb = BLOCK_FOR_INSN (insn)->index;
1445
1446   pair = VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], NULL);
1447   pair->dest = dest;
1448   pair->dest_addr = dest_addr;
1449 }
1450
1451 /* Record memory modification information for INSN.  We do not actually care
1452    about the memory location(s) that are set, or even how they are set (consider
1453    a CALL_INSN).  We merely need to record which insns modify memory.  */
1454
1455 static void
1456 record_last_mem_set_info (rtx insn)
1457 {
1458   int bb = BLOCK_FOR_INSN (insn)->index;
1459
1460   /* load_killed_in_block_p will handle the case of calls clobbering
1461      everything.  */
1462   VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1463   bitmap_set_bit (modify_mem_list_set, bb);
1464
1465   if (CALL_P (insn))
1466     bitmap_set_bit (blocks_with_calls, bb);
1467   else
1468     note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1469 }
1470
1471 /* Called from compute_hash_table via note_stores to handle one
1472    SET or CLOBBER in an insn.  DATA is really the instruction in which
1473    the SET is taking place.  */
1474
1475 static void
1476 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1477 {
1478   rtx last_set_insn = (rtx) data;
1479
1480   if (GET_CODE (dest) == SUBREG)
1481     dest = SUBREG_REG (dest);
1482
1483   if (REG_P (dest))
1484     record_last_reg_set_info (last_set_insn, REGNO (dest));
1485   else if (MEM_P (dest)
1486            /* Ignore pushes, they clobber nothing.  */
1487            && ! push_operand (dest, GET_MODE (dest)))
1488     record_last_mem_set_info (last_set_insn);
1489 }
1490
1491 /* Top level function to create an expression hash table.
1492
1493    Expression entries are placed in the hash table if
1494    - they are of the form (set (pseudo-reg) src),
1495    - src is something we want to perform GCSE on,
1496    - none of the operands are subsequently modified in the block
1497
1498    Currently src must be a pseudo-reg or a const_int.
1499
1500    TABLE is the table computed.  */
1501
1502 static void
1503 compute_hash_table_work (struct hash_table_d *table)
1504 {
1505   int i;
1506
1507   /* re-Cache any INSN_LIST nodes we have allocated.  */
1508   clear_modify_mem_tables ();
1509   /* Some working arrays used to track first and last set in each block.  */
1510   reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1511
1512   for (i = 0; i < max_reg_num (); ++i)
1513     reg_avail_info[i].last_bb = NULL;
1514
1515   FOR_EACH_BB (current_bb)
1516     {
1517       rtx insn;
1518       unsigned int regno;
1519
1520       /* First pass over the instructions records information used to
1521          determine when registers and memory are first and last set.  */
1522       FOR_BB_INSNS (current_bb, insn)
1523         {
1524           if (!NONDEBUG_INSN_P (insn))
1525             continue;
1526
1527           if (CALL_P (insn))
1528             {
1529               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1530                 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1531                   record_last_reg_set_info (insn, regno);
1532
1533               if (! RTL_CONST_OR_PURE_CALL_P (insn))
1534                 record_last_mem_set_info (insn);
1535             }
1536
1537           note_stores (PATTERN (insn), record_last_set_info, insn);
1538         }
1539
1540       /* The next pass builds the hash table.  */
1541       FOR_BB_INSNS (current_bb, insn)
1542         if (NONDEBUG_INSN_P (insn))
1543           hash_scan_insn (insn, table);
1544     }
1545
1546   free (reg_avail_info);
1547   reg_avail_info = NULL;
1548 }
1549
1550 /* Allocate space for the set/expr hash TABLE.
1551    It is used to determine the number of buckets to use.  */
1552
1553 static void
1554 alloc_hash_table (struct hash_table_d *table)
1555 {
1556   int n;
1557
1558   n = get_max_insn_count ();
1559
1560   table->size = n / 4;
1561   if (table->size < 11)
1562     table->size = 11;
1563
1564   /* Attempt to maintain efficient use of hash table.
1565      Making it an odd number is simplest for now.
1566      ??? Later take some measurements.  */
1567   table->size |= 1;
1568   n = table->size * sizeof (struct expr *);
1569   table->table = GNEWVAR (struct expr *, n);
1570 }
1571
1572 /* Free things allocated by alloc_hash_table.  */
1573
1574 static void
1575 free_hash_table (struct hash_table_d *table)
1576 {
1577   free (table->table);
1578 }
1579
1580 /* Compute the expression hash table TABLE.  */
1581
1582 static void
1583 compute_hash_table (struct hash_table_d *table)
1584 {
1585   /* Initialize count of number of entries in hash table.  */
1586   table->n_elems = 0;
1587   memset (table->table, 0, table->size * sizeof (struct expr *));
1588
1589   compute_hash_table_work (table);
1590 }
1591 \f
1592 /* Expression tracking support.  */
1593
1594 /* Clear canon_modify_mem_list and modify_mem_list tables.  */
1595 static void
1596 clear_modify_mem_tables (void)
1597 {
1598   unsigned i;
1599   bitmap_iterator bi;
1600
1601   EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1602     {
1603       VEC_free (rtx, heap, modify_mem_list[i]);
1604       VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1605     }
1606   bitmap_clear (modify_mem_list_set);
1607   bitmap_clear (blocks_with_calls);
1608 }
1609
1610 /* Release memory used by modify_mem_list_set.  */
1611
1612 static void
1613 free_modify_mem_tables (void)
1614 {
1615   clear_modify_mem_tables ();
1616   free (modify_mem_list);
1617   free (canon_modify_mem_list);
1618   modify_mem_list = 0;
1619   canon_modify_mem_list = 0;
1620 }
1621 \f
1622 /* For each block, compute whether X is transparent.  X is either an
1623    expression or an assignment [though we don't care which, for this context
1624    an assignment is treated as an expression].  For each block where an
1625    element of X is modified, reset the INDX bit in BMAP.  */
1626
1627 static void
1628 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1629 {
1630   int i, j;
1631   enum rtx_code code;
1632   const char *fmt;
1633
1634   /* repeat is used to turn tail-recursion into iteration since GCC
1635      can't do it when there's no return value.  */
1636  repeat:
1637
1638   if (x == 0)
1639     return;
1640
1641   code = GET_CODE (x);
1642   switch (code)
1643     {
1644     case REG:
1645         {
1646           df_ref def;
1647           for (def = DF_REG_DEF_CHAIN (REGNO (x));
1648                def;
1649                def = DF_REF_NEXT_REG (def))
1650             RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1651         }
1652
1653       return;
1654
1655     case MEM:
1656       if (! MEM_READONLY_P (x))
1657         {
1658           bitmap_iterator bi;
1659           unsigned bb_index;
1660
1661           /* First handle all the blocks with calls.  We don't need to
1662              do any list walking for them.  */
1663           EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1664             {
1665               RESET_BIT (bmap[bb_index], indx);
1666             }
1667
1668             /* Now iterate over the blocks which have memory modifications
1669                but which do not have any calls.  */
1670             EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1671                                             blocks_with_calls,
1672                                             0, bb_index, bi)
1673               {
1674                 VEC (modify_pair,heap) *list
1675                   = canon_modify_mem_list[bb_index];
1676                 modify_pair *pair;
1677                 unsigned ix;
1678
1679                 FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1680                   {
1681                     rtx dest = pair->dest;
1682                     rtx dest_addr = pair->dest_addr;
1683
1684                     if (canon_true_dependence (dest, GET_MODE (dest),
1685                                                dest_addr, x, NULL_RTX))
1686                       RESET_BIT (bmap[bb_index], indx);
1687                   }
1688               }
1689         }
1690
1691       x = XEXP (x, 0);
1692       goto repeat;
1693
1694     case PC:
1695     case CC0: /*FIXME*/
1696     case CONST:
1697     case CONST_INT:
1698     case CONST_DOUBLE:
1699     case CONST_FIXED:
1700     case CONST_VECTOR:
1701     case SYMBOL_REF:
1702     case LABEL_REF:
1703     case ADDR_VEC:
1704     case ADDR_DIFF_VEC:
1705       return;
1706
1707     default:
1708       break;
1709     }
1710
1711   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1712     {
1713       if (fmt[i] == 'e')
1714         {
1715           /* If we are about to do the last recursive call
1716              needed at this level, change it into iteration.
1717              This function is called enough to be worth it.  */
1718           if (i == 0)
1719             {
1720               x = XEXP (x, i);
1721               goto repeat;
1722             }
1723
1724           compute_transp (XEXP (x, i), indx, bmap);
1725         }
1726       else if (fmt[i] == 'E')
1727         for (j = 0; j < XVECLEN (x, i); j++)
1728           compute_transp (XVECEXP (x, i, j), indx, bmap);
1729     }
1730 }
1731 \f
1732 /* Compute PRE+LCM working variables.  */
1733
1734 /* Local properties of expressions.  */
1735
1736 /* Nonzero for expressions that are transparent in the block.  */
1737 static sbitmap *transp;
1738
1739 /* Nonzero for expressions that are computed (available) in the block.  */
1740 static sbitmap *comp;
1741
1742 /* Nonzero for expressions that are locally anticipatable in the block.  */
1743 static sbitmap *antloc;
1744
1745 /* Nonzero for expressions where this block is an optimal computation
1746    point.  */
1747 static sbitmap *pre_optimal;
1748
1749 /* Nonzero for expressions which are redundant in a particular block.  */
1750 static sbitmap *pre_redundant;
1751
1752 /* Nonzero for expressions which should be inserted on a specific edge.  */
1753 static sbitmap *pre_insert_map;
1754
1755 /* Nonzero for expressions which should be deleted in a specific block.  */
1756 static sbitmap *pre_delete_map;
1757
1758 /* Allocate vars used for PRE analysis.  */
1759
1760 static void
1761 alloc_pre_mem (int n_blocks, int n_exprs)
1762 {
1763   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1764   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1765   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1766
1767   pre_optimal = NULL;
1768   pre_redundant = NULL;
1769   pre_insert_map = NULL;
1770   pre_delete_map = NULL;
1771   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1772
1773   /* pre_insert and pre_delete are allocated later.  */
1774 }
1775
1776 /* Free vars used for PRE analysis.  */
1777
1778 static void
1779 free_pre_mem (void)
1780 {
1781   sbitmap_vector_free (transp);
1782   sbitmap_vector_free (comp);
1783
1784   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
1785
1786   if (pre_optimal)
1787     sbitmap_vector_free (pre_optimal);
1788   if (pre_redundant)
1789     sbitmap_vector_free (pre_redundant);
1790   if (pre_insert_map)
1791     sbitmap_vector_free (pre_insert_map);
1792   if (pre_delete_map)
1793     sbitmap_vector_free (pre_delete_map);
1794
1795   transp = comp = NULL;
1796   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1797 }
1798
1799 /* Remove certain expressions from anticipatable and transparent
1800    sets of basic blocks that have incoming abnormal edge.
1801    For PRE remove potentially trapping expressions to avoid placing
1802    them on abnormal edges.  For hoisting remove memory references that
1803    can be clobbered by calls.  */
1804
1805 static void
1806 prune_expressions (bool pre_p)
1807 {
1808   sbitmap prune_exprs;
1809   struct expr *expr;
1810   unsigned int ui;
1811   basic_block bb;
1812
1813   prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1814   sbitmap_zero (prune_exprs);
1815   for (ui = 0; ui < expr_hash_table.size; ui++)
1816     {
1817       for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1818         {
1819           /* Note potentially trapping expressions.  */
1820           if (may_trap_p (expr->expr))
1821             {
1822               SET_BIT (prune_exprs, expr->bitmap_index);
1823               continue;
1824             }
1825
1826           if (!pre_p && MEM_P (expr->expr))
1827             /* Note memory references that can be clobbered by a call.
1828                We do not split abnormal edges in hoisting, so would
1829                a memory reference get hoisted along an abnormal edge,
1830                it would be placed /before/ the call.  Therefore, only
1831                constant memory references can be hoisted along abnormal
1832                edges.  */
1833             {
1834               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1835                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1836                 continue;
1837
1838               if (MEM_READONLY_P (expr->expr)
1839                   && !MEM_VOLATILE_P (expr->expr)
1840                   && MEM_NOTRAP_P (expr->expr))
1841                 /* Constant memory reference, e.g., a PIC address.  */
1842                 continue;
1843
1844               /* ??? Optimally, we would use interprocedural alias
1845                  analysis to determine if this mem is actually killed
1846                  by this call.  */
1847
1848               SET_BIT (prune_exprs, expr->bitmap_index);
1849             }
1850         }
1851     }
1852
1853   FOR_EACH_BB (bb)
1854     {
1855       edge e;
1856       edge_iterator ei;
1857
1858       /* If the current block is the destination of an abnormal edge, we
1859          kill all trapping (for PRE) and memory (for hoist) expressions
1860          because we won't be able to properly place the instruction on
1861          the edge.  So make them neither anticipatable nor transparent.
1862          This is fairly conservative.
1863
1864          ??? For hoisting it may be necessary to check for set-and-jump
1865          instructions here, not just for abnormal edges.  The general problem
1866          is that when an expression cannot not be placed right at the end of
1867          a basic block we should account for any side-effects of a subsequent
1868          jump instructions that could clobber the expression.  It would
1869          be best to implement this check along the lines of
1870          hoist_expr_reaches_here_p where the target block is already known
1871          and, hence, there's no need to conservatively prune expressions on
1872          "intermediate" set-and-jump instructions.  */
1873       FOR_EACH_EDGE (e, ei, bb->preds)
1874         if ((e->flags & EDGE_ABNORMAL)
1875             && (pre_p || CALL_P (BB_END (e->src))))
1876           {
1877             sbitmap_difference (antloc[bb->index],
1878                                 antloc[bb->index], prune_exprs);
1879             sbitmap_difference (transp[bb->index],
1880                                 transp[bb->index], prune_exprs);
1881             break;
1882           }
1883     }
1884
1885   sbitmap_free (prune_exprs);
1886 }
1887
1888 /* It may be necessary to insert a large number of insns on edges to
1889    make the existing occurrences of expressions fully redundant.  This
1890    routine examines the set of insertions and deletions and if the ratio
1891    of insertions to deletions is too high for a particular expression, then
1892    the expression is removed from the insertion/deletion sets.
1893
1894    N_ELEMS is the number of elements in the hash table.  */
1895
1896 static void
1897 prune_insertions_deletions (int n_elems)
1898 {
1899   sbitmap_iterator sbi;
1900   sbitmap prune_exprs;
1901
1902   /* We always use I to iterate over blocks/edges and J to iterate over
1903      expressions.  */
1904   unsigned int i, j;
1905
1906   /* Counts for the number of times an expression needs to be inserted and
1907      number of times an expression can be removed as a result.  */
1908   int *insertions = GCNEWVEC (int, n_elems);
1909   int *deletions = GCNEWVEC (int, n_elems);
1910
1911   /* Set of expressions which require too many insertions relative to
1912      the number of deletions achieved.  We will prune these out of the
1913      insertion/deletion sets.  */
1914   prune_exprs = sbitmap_alloc (n_elems);
1915   sbitmap_zero (prune_exprs);
1916
1917   /* Iterate over the edges counting the number of times each expression
1918      needs to be inserted.  */
1919   for (i = 0; i < (unsigned) n_edges; i++)
1920     {
1921       EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1922         insertions[j]++;
1923     }
1924
1925   /* Similarly for deletions, but those occur in blocks rather than on
1926      edges.  */
1927   for (i = 0; i < (unsigned) last_basic_block; i++)
1928     {
1929       EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1930         deletions[j]++;
1931     }
1932
1933   /* Now that we have accurate counts, iterate over the elements in the
1934      hash table and see if any need too many insertions relative to the
1935      number of evaluations that can be removed.  If so, mark them in
1936      PRUNE_EXPRS.  */
1937   for (j = 0; j < (unsigned) n_elems; j++)
1938     if (deletions[j]
1939         && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1940       SET_BIT (prune_exprs, j);
1941
1942   /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS.  */
1943   EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1944     {
1945       for (i = 0; i < (unsigned) n_edges; i++)
1946         RESET_BIT (pre_insert_map[i], j);
1947
1948       for (i = 0; i < (unsigned) last_basic_block; i++)
1949         RESET_BIT (pre_delete_map[i], j);
1950     }
1951
1952   sbitmap_free (prune_exprs);
1953   free (insertions);
1954   free (deletions);
1955 }
1956
1957 /* Top level routine to do the dataflow analysis needed by PRE.  */
1958
1959 static struct edge_list *
1960 compute_pre_data (void)
1961 {
1962   struct edge_list *edge_list;
1963   basic_block bb;
1964
1965   compute_local_properties (transp, comp, antloc, &expr_hash_table);
1966   prune_expressions (true);
1967   sbitmap_vector_zero (ae_kill, last_basic_block);
1968
1969   /* Compute ae_kill for each basic block using:
1970
1971      ~(TRANSP | COMP)
1972   */
1973
1974   FOR_EACH_BB (bb)
1975     {
1976       sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1977       sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1978     }
1979
1980   edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1981                             ae_kill, &pre_insert_map, &pre_delete_map);
1982   sbitmap_vector_free (antloc);
1983   antloc = NULL;
1984   sbitmap_vector_free (ae_kill);
1985   ae_kill = NULL;
1986
1987   prune_insertions_deletions (expr_hash_table.n_elems);
1988
1989   return edge_list;
1990 }
1991 \f
1992 /* PRE utilities */
1993
1994 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
1995    block BB.
1996
1997    VISITED is a pointer to a working buffer for tracking which BB's have
1998    been visited.  It is NULL for the top-level call.
1999
2000    We treat reaching expressions that go through blocks containing the same
2001    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2002    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2003    2 as not reaching.  The intent is to improve the probability of finding
2004    only one reaching expression and to reduce register lifetimes by picking
2005    the closest such expression.  */
2006
2007 static int
2008 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
2009                               basic_block bb, char *visited)
2010 {
2011   edge pred;
2012   edge_iterator ei;
2013
2014   FOR_EACH_EDGE (pred, ei, bb->preds)
2015     {
2016       basic_block pred_bb = pred->src;
2017
2018       if (pred->src == ENTRY_BLOCK_PTR
2019           /* Has predecessor has already been visited?  */
2020           || visited[pred_bb->index])
2021         ;/* Nothing to do.  */
2022
2023       /* Does this predecessor generate this expression?  */
2024       else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2025         {
2026           /* Is this the occurrence we're looking for?
2027              Note that there's only one generating occurrence per block
2028              so we just need to check the block number.  */
2029           if (occr_bb == pred_bb)
2030             return 1;
2031
2032           visited[pred_bb->index] = 1;
2033         }
2034       /* Ignore this predecessor if it kills the expression.  */
2035       else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2036         visited[pred_bb->index] = 1;
2037
2038       /* Neither gen nor kill.  */
2039       else
2040         {
2041           visited[pred_bb->index] = 1;
2042           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2043             return 1;
2044         }
2045     }
2046
2047   /* All paths have been checked.  */
2048   return 0;
2049 }
2050
2051 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2052    memory allocated for that function is returned.  */
2053
2054 static int
2055 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2056 {
2057   int rval;
2058   char *visited = XCNEWVEC (char, last_basic_block);
2059
2060   rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2061
2062   free (visited);
2063   return rval;
2064 }
2065 \f
2066 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it.  */
2067
2068 static rtx
2069 process_insert_insn (struct expr *expr)
2070 {
2071   rtx reg = expr->reaching_reg;
2072   /* Copy the expression to make sure we don't have any sharing issues.  */
2073   rtx exp = copy_rtx (expr->expr);
2074   rtx pat;
2075
2076   start_sequence ();
2077
2078   /* If the expression is something that's an operand, like a constant,
2079      just copy it to a register.  */
2080   if (general_operand (exp, GET_MODE (reg)))
2081     emit_move_insn (reg, exp);
2082
2083   /* Otherwise, make a new insn to compute this expression and make sure the
2084      insn will be recognized (this also adds any needed CLOBBERs).  */
2085   else
2086     {
2087       rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2088
2089       if (insn_invalid_p (insn, false))
2090         gcc_unreachable ();
2091     }
2092
2093   pat = get_insns ();
2094   end_sequence ();
2095
2096   return pat;
2097 }
2098
2099 /* Add EXPR to the end of basic block BB.
2100
2101    This is used by both the PRE and code hoisting.  */
2102
2103 static void
2104 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2105 {
2106   rtx insn = BB_END (bb);
2107   rtx new_insn;
2108   rtx reg = expr->reaching_reg;
2109   int regno = REGNO (reg);
2110   rtx pat, pat_end;
2111
2112   pat = process_insert_insn (expr);
2113   gcc_assert (pat && INSN_P (pat));
2114
2115   pat_end = pat;
2116   while (NEXT_INSN (pat_end) != NULL_RTX)
2117     pat_end = NEXT_INSN (pat_end);
2118
2119   /* If the last insn is a jump, insert EXPR in front [taking care to
2120      handle cc0, etc. properly].  Similarly we need to care trapping
2121      instructions in presence of non-call exceptions.  */
2122
2123   if (JUMP_P (insn)
2124       || (NONJUMP_INSN_P (insn)
2125           && (!single_succ_p (bb)
2126               || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2127     {
2128 #ifdef HAVE_cc0
2129       rtx note;
2130 #endif
2131
2132       /* If this is a jump table, then we can't insert stuff here.  Since
2133          we know the previous real insn must be the tablejump, we insert
2134          the new instruction just before the tablejump.  */
2135       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2136           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2137         insn = prev_active_insn (insn);
2138
2139 #ifdef HAVE_cc0
2140       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2141          if cc0 isn't set.  */
2142       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2143       if (note)
2144         insn = XEXP (note, 0);
2145       else
2146         {
2147           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2148           if (maybe_cc0_setter
2149               && INSN_P (maybe_cc0_setter)
2150               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2151             insn = maybe_cc0_setter;
2152         }
2153 #endif
2154       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
2155       new_insn = emit_insn_before_noloc (pat, insn, bb);
2156     }
2157
2158   /* Likewise if the last insn is a call, as will happen in the presence
2159      of exception handling.  */
2160   else if (CALL_P (insn)
2161            && (!single_succ_p (bb)
2162                || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2163     {
2164       /* Keeping in mind targets with small register classes and parameters
2165          in registers, we search backward and place the instructions before
2166          the first parameter is loaded.  Do this for everyone for consistency
2167          and a presumption that we'll get better code elsewhere as well.  */
2168
2169       /* Since different machines initialize their parameter registers
2170          in different orders, assume nothing.  Collect the set of all
2171          parameter registers.  */
2172       insn = find_first_parameter_load (insn, BB_HEAD (bb));
2173
2174       /* If we found all the parameter loads, then we want to insert
2175          before the first parameter load.
2176
2177          If we did not find all the parameter loads, then we might have
2178          stopped on the head of the block, which could be a CODE_LABEL.
2179          If we inserted before the CODE_LABEL, then we would be putting
2180          the insn in the wrong basic block.  In that case, put the insn
2181          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
2182       while (LABEL_P (insn)
2183              || NOTE_INSN_BASIC_BLOCK_P (insn))
2184         insn = NEXT_INSN (insn);
2185
2186       new_insn = emit_insn_before_noloc (pat, insn, bb);
2187     }
2188   else
2189     new_insn = emit_insn_after_noloc (pat, insn, bb);
2190
2191   while (1)
2192     {
2193       if (INSN_P (pat))
2194         add_label_notes (PATTERN (pat), new_insn);
2195       if (pat == pat_end)
2196         break;
2197       pat = NEXT_INSN (pat);
2198     }
2199
2200   gcse_create_count++;
2201
2202   if (dump_file)
2203     {
2204       fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2205                bb->index, INSN_UID (new_insn));
2206       fprintf (dump_file, "copying expression %d to reg %d\n",
2207                expr->bitmap_index, regno);
2208     }
2209 }
2210
2211 /* Insert partially redundant expressions on edges in the CFG to make
2212    the expressions fully redundant.  */
2213
2214 static int
2215 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2216 {
2217   int e, i, j, num_edges, set_size, did_insert = 0;
2218   sbitmap *inserted;
2219
2220   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2221      if it reaches any of the deleted expressions.  */
2222
2223   set_size = pre_insert_map[0]->size;
2224   num_edges = NUM_EDGES (edge_list);
2225   inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2226   sbitmap_vector_zero (inserted, num_edges);
2227
2228   for (e = 0; e < num_edges; e++)
2229     {
2230       int indx;
2231       basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2232
2233       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2234         {
2235           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2236
2237           for (j = indx;
2238                insert && j < (int) expr_hash_table.n_elems;
2239                j++, insert >>= 1)
2240             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2241               {
2242                 struct expr *expr = index_map[j];
2243                 struct occr *occr;
2244
2245                 /* Now look at each deleted occurrence of this expression.  */
2246                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2247                   {
2248                     if (! occr->deleted_p)
2249                       continue;
2250
2251                     /* Insert this expression on this edge if it would
2252                        reach the deleted occurrence in BB.  */
2253                     if (!TEST_BIT (inserted[e], j))
2254                       {
2255                         rtx insn;
2256                         edge eg = INDEX_EDGE (edge_list, e);
2257
2258                         /* We can't insert anything on an abnormal and
2259                            critical edge, so we insert the insn at the end of
2260                            the previous block. There are several alternatives
2261                            detailed in Morgans book P277 (sec 10.5) for
2262                            handling this situation.  This one is easiest for
2263                            now.  */
2264
2265                         if (eg->flags & EDGE_ABNORMAL)
2266                           insert_insn_end_basic_block (index_map[j], bb);
2267                         else
2268                           {
2269                             insn = process_insert_insn (index_map[j]);
2270                             insert_insn_on_edge (insn, eg);
2271                           }
2272
2273                         if (dump_file)
2274                           {
2275                             fprintf (dump_file, "PRE: edge (%d,%d), ",
2276                                      bb->index,
2277                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2278                             fprintf (dump_file, "copy expression %d\n",
2279                                      expr->bitmap_index);
2280                           }
2281
2282                         update_ld_motion_stores (expr);
2283                         SET_BIT (inserted[e], j);
2284                         did_insert = 1;
2285                         gcse_create_count++;
2286                       }
2287                   }
2288               }
2289         }
2290     }
2291
2292   sbitmap_vector_free (inserted);
2293   return did_insert;
2294 }
2295
2296 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2297    Given "old_reg <- expr" (INSN), instead of adding after it
2298      reaching_reg <- old_reg
2299    it's better to do the following:
2300      reaching_reg <- expr
2301      old_reg      <- reaching_reg
2302    because this way copy propagation can discover additional PRE
2303    opportunities.  But if this fails, we try the old way.
2304    When "expr" is a store, i.e.
2305    given "MEM <- old_reg", instead of adding after it
2306      reaching_reg <- old_reg
2307    it's better to add it before as follows:
2308      reaching_reg <- old_reg
2309      MEM          <- reaching_reg.  */
2310
2311 static void
2312 pre_insert_copy_insn (struct expr *expr, rtx insn)
2313 {
2314   rtx reg = expr->reaching_reg;
2315   int regno = REGNO (reg);
2316   int indx = expr->bitmap_index;
2317   rtx pat = PATTERN (insn);
2318   rtx set, first_set, new_insn;
2319   rtx old_reg;
2320   int i;
2321
2322   /* This block matches the logic in hash_scan_insn.  */
2323   switch (GET_CODE (pat))
2324     {
2325     case SET:
2326       set = pat;
2327       break;
2328
2329     case PARALLEL:
2330       /* Search through the parallel looking for the set whose
2331          source was the expression that we're interested in.  */
2332       first_set = NULL_RTX;
2333       set = NULL_RTX;
2334       for (i = 0; i < XVECLEN (pat, 0); i++)
2335         {
2336           rtx x = XVECEXP (pat, 0, i);
2337           if (GET_CODE (x) == SET)
2338             {
2339               /* If the source was a REG_EQUAL or REG_EQUIV note, we
2340                  may not find an equivalent expression, but in this
2341                  case the PARALLEL will have a single set.  */
2342               if (first_set == NULL_RTX)
2343                 first_set = x;
2344               if (expr_equiv_p (SET_SRC (x), expr->expr))
2345                 {
2346                   set = x;
2347                   break;
2348                 }
2349             }
2350         }
2351
2352       gcc_assert (first_set);
2353       if (set == NULL_RTX)
2354         set = first_set;
2355       break;
2356
2357     default:
2358       gcc_unreachable ();
2359     }
2360
2361   if (REG_P (SET_DEST (set)))
2362     {
2363       old_reg = SET_DEST (set);
2364       /* Check if we can modify the set destination in the original insn.  */
2365       if (validate_change (insn, &SET_DEST (set), reg, 0))
2366         {
2367           new_insn = gen_move_insn (old_reg, reg);
2368           new_insn = emit_insn_after (new_insn, insn);
2369         }
2370       else
2371         {
2372           new_insn = gen_move_insn (reg, old_reg);
2373           new_insn = emit_insn_after (new_insn, insn);
2374         }
2375     }
2376   else /* This is possible only in case of a store to memory.  */
2377     {
2378       old_reg = SET_SRC (set);
2379       new_insn = gen_move_insn (reg, old_reg);
2380
2381       /* Check if we can modify the set source in the original insn.  */
2382       if (validate_change (insn, &SET_SRC (set), reg, 0))
2383         new_insn = emit_insn_before (new_insn, insn);
2384       else
2385         new_insn = emit_insn_after (new_insn, insn);
2386     }
2387
2388   gcse_create_count++;
2389
2390   if (dump_file)
2391     fprintf (dump_file,
2392              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2393               BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2394               INSN_UID (insn), regno);
2395 }
2396
2397 /* Copy available expressions that reach the redundant expression
2398    to `reaching_reg'.  */
2399
2400 static void
2401 pre_insert_copies (void)
2402 {
2403   unsigned int i, added_copy;
2404   struct expr *expr;
2405   struct occr *occr;
2406   struct occr *avail;
2407
2408   /* For each available expression in the table, copy the result to
2409      `reaching_reg' if the expression reaches a deleted one.
2410
2411      ??? The current algorithm is rather brute force.
2412      Need to do some profiling.  */
2413
2414   for (i = 0; i < expr_hash_table.size; i++)
2415     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2416       {
2417         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
2418            we don't want to insert a copy here because the expression may not
2419            really be redundant.  So only insert an insn if the expression was
2420            deleted.  This test also avoids further processing if the
2421            expression wasn't deleted anywhere.  */
2422         if (expr->reaching_reg == NULL)
2423           continue;
2424
2425         /* Set when we add a copy for that expression.  */
2426         added_copy = 0;
2427
2428         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2429           {
2430             if (! occr->deleted_p)
2431               continue;
2432
2433             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2434               {
2435                 rtx insn = avail->insn;
2436
2437                 /* No need to handle this one if handled already.  */
2438                 if (avail->copied_p)
2439                   continue;
2440
2441                 /* Don't handle this one if it's a redundant one.  */
2442                 if (INSN_DELETED_P (insn))
2443                   continue;
2444
2445                 /* Or if the expression doesn't reach the deleted one.  */
2446                 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2447                                                expr,
2448                                                BLOCK_FOR_INSN (occr->insn)))
2449                   continue;
2450
2451                 added_copy = 1;
2452
2453                 /* Copy the result of avail to reaching_reg.  */
2454                 pre_insert_copy_insn (expr, insn);
2455                 avail->copied_p = 1;
2456               }
2457           }
2458
2459           if (added_copy)
2460             update_ld_motion_stores (expr);
2461       }
2462 }
2463
2464 /* Emit move from SRC to DEST noting the equivalence with expression computed
2465    in INSN.  */
2466
2467 static rtx
2468 gcse_emit_move_after (rtx dest, rtx src, rtx insn)
2469 {
2470   rtx new_rtx;
2471   rtx set = single_set (insn), set2;
2472   rtx note;
2473   rtx eqv;
2474
2475   /* This should never fail since we're creating a reg->reg copy
2476      we've verified to be valid.  */
2477
2478   new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2479
2480   /* Note the equivalence for local CSE pass.  */
2481   set2 = single_set (new_rtx);
2482   if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2483     return new_rtx;
2484   if ((note = find_reg_equal_equiv_note (insn)))
2485     eqv = XEXP (note, 0);
2486   else
2487     eqv = SET_SRC (set);
2488
2489   set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2490
2491   return new_rtx;
2492 }
2493
2494 /* Delete redundant computations.
2495    Deletion is done by changing the insn to copy the `reaching_reg' of
2496    the expression into the result of the SET.  It is left to later passes
2497    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2498
2499    Return nonzero if a change is made.  */
2500
2501 static int
2502 pre_delete (void)
2503 {
2504   unsigned int i;
2505   int changed;
2506   struct expr *expr;
2507   struct occr *occr;
2508
2509   changed = 0;
2510   for (i = 0; i < expr_hash_table.size; i++)
2511     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2512       {
2513         int indx = expr->bitmap_index;
2514
2515         /* We only need to search antic_occr since we require ANTLOC != 0.  */
2516         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2517           {
2518             rtx insn = occr->insn;
2519             rtx set;
2520             basic_block bb = BLOCK_FOR_INSN (insn);
2521
2522             /* We only delete insns that have a single_set.  */
2523             if (TEST_BIT (pre_delete_map[bb->index], indx)
2524                 && (set = single_set (insn)) != 0
2525                 && dbg_cnt (pre_insn))
2526               {
2527                 /* Create a pseudo-reg to store the result of reaching
2528                    expressions into.  Get the mode for the new pseudo from
2529                    the mode of the original destination pseudo.  */
2530                 if (expr->reaching_reg == NULL)
2531                   expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2532
2533                 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2534                 delete_insn (insn);
2535                 occr->deleted_p = 1;
2536                 changed = 1;
2537                 gcse_subst_count++;
2538
2539                 if (dump_file)
2540                   {
2541                     fprintf (dump_file,
2542                              "PRE: redundant insn %d (expression %d) in ",
2543                                INSN_UID (insn), indx);
2544                     fprintf (dump_file, "bb %d, reaching reg is %d\n",
2545                              bb->index, REGNO (expr->reaching_reg));
2546                   }
2547               }
2548           }
2549       }
2550
2551   return changed;
2552 }
2553
2554 /* Perform GCSE optimizations using PRE.
2555    This is called by one_pre_gcse_pass after all the dataflow analysis
2556    has been done.
2557
2558    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2559    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2560    Compiler Design and Implementation.
2561
2562    ??? A new pseudo reg is created to hold the reaching expression.  The nice
2563    thing about the classical approach is that it would try to use an existing
2564    reg.  If the register can't be adequately optimized [i.e. we introduce
2565    reload problems], one could add a pass here to propagate the new register
2566    through the block.
2567
2568    ??? We don't handle single sets in PARALLELs because we're [currently] not
2569    able to copy the rest of the parallel when we insert copies to create full
2570    redundancies from partial redundancies.  However, there's no reason why we
2571    can't handle PARALLELs in the cases where there are no partial
2572    redundancies.  */
2573
2574 static int
2575 pre_gcse (struct edge_list *edge_list)
2576 {
2577   unsigned int i;
2578   int did_insert, changed;
2579   struct expr **index_map;
2580   struct expr *expr;
2581
2582   /* Compute a mapping from expression number (`bitmap_index') to
2583      hash table entry.  */
2584
2585   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2586   for (i = 0; i < expr_hash_table.size; i++)
2587     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2588       index_map[expr->bitmap_index] = expr;
2589
2590   /* Delete the redundant insns first so that
2591      - we know what register to use for the new insns and for the other
2592        ones with reaching expressions
2593      - we know which insns are redundant when we go to create copies  */
2594
2595   changed = pre_delete ();
2596   did_insert = pre_edge_insert (edge_list, index_map);
2597
2598   /* In other places with reaching expressions, copy the expression to the
2599      specially allocated pseudo-reg that reaches the redundant expr.  */
2600   pre_insert_copies ();
2601   if (did_insert)
2602     {
2603       commit_edge_insertions ();
2604       changed = 1;
2605     }
2606
2607   free (index_map);
2608   return changed;
2609 }
2610
2611 /* Top level routine to perform one PRE GCSE pass.
2612
2613    Return nonzero if a change was made.  */
2614
2615 static int
2616 one_pre_gcse_pass (void)
2617 {
2618   int changed = 0;
2619
2620   gcse_subst_count = 0;
2621   gcse_create_count = 0;
2622
2623   /* Return if there's nothing to do, or it is too expensive.  */
2624   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2625       || is_too_expensive (_("PRE disabled")))
2626     return 0;
2627
2628   /* We need alias.  */
2629   init_alias_analysis ();
2630
2631   bytes_used = 0;
2632   gcc_obstack_init (&gcse_obstack);
2633   alloc_gcse_mem ();
2634
2635   alloc_hash_table (&expr_hash_table);
2636   add_noreturn_fake_exit_edges ();
2637   if (flag_gcse_lm)
2638     compute_ld_motion_mems ();
2639
2640   compute_hash_table (&expr_hash_table);
2641   if (flag_gcse_lm)
2642     trim_ld_motion_mems ();
2643   if (dump_file)
2644     dump_hash_table (dump_file, "Expression", &expr_hash_table);
2645
2646   if (expr_hash_table.n_elems > 0)
2647     {
2648       struct edge_list *edge_list;
2649       alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2650       edge_list = compute_pre_data ();
2651       changed |= pre_gcse (edge_list);
2652       free_edge_list (edge_list);
2653       free_pre_mem ();
2654     }
2655
2656   if (flag_gcse_lm)
2657     free_ld_motion_mems ();
2658   remove_fake_exit_edges ();
2659   free_hash_table (&expr_hash_table);
2660
2661   free_gcse_mem ();
2662   obstack_free (&gcse_obstack, NULL);
2663
2664   /* We are finished with alias.  */
2665   end_alias_analysis ();
2666
2667   if (dump_file)
2668     {
2669       fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2670                current_function_name (), n_basic_blocks, bytes_used);
2671       fprintf (dump_file, "%d substs, %d insns created\n",
2672                gcse_subst_count, gcse_create_count);
2673     }
2674
2675   return changed;
2676 }
2677 \f
2678 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2679    to INSN.  If such notes are added to an insn which references a
2680    CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
2681    that note, because the following loop optimization pass requires
2682    them.  */
2683
2684 /* ??? If there was a jump optimization pass after gcse and before loop,
2685    then we would not need to do this here, because jump would add the
2686    necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
2687
2688 static void
2689 add_label_notes (rtx x, rtx insn)
2690 {
2691   enum rtx_code code = GET_CODE (x);
2692   int i, j;
2693   const char *fmt;
2694
2695   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2696     {
2697       /* This code used to ignore labels that referred to dispatch tables to
2698          avoid flow generating (slightly) worse code.
2699
2700          We no longer ignore such label references (see LABEL_REF handling in
2701          mark_jump_label for additional information).  */
2702
2703       /* There's no reason for current users to emit jump-insns with
2704          such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2705          notes.  */
2706       gcc_assert (!JUMP_P (insn));
2707       add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2708
2709       if (LABEL_P (XEXP (x, 0)))
2710         LABEL_NUSES (XEXP (x, 0))++;
2711
2712       return;
2713     }
2714
2715   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2716     {
2717       if (fmt[i] == 'e')
2718         add_label_notes (XEXP (x, i), insn);
2719       else if (fmt[i] == 'E')
2720         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2721           add_label_notes (XVECEXP (x, i, j), insn);
2722     }
2723 }
2724
2725 /* Code Hoisting variables and subroutines.  */
2726
2727 /* Very busy expressions.  */
2728 static sbitmap *hoist_vbein;
2729 static sbitmap *hoist_vbeout;
2730
2731 /* ??? We could compute post dominators and run this algorithm in
2732    reverse to perform tail merging, doing so would probably be
2733    more effective than the tail merging code in jump.c.
2734
2735    It's unclear if tail merging could be run in parallel with
2736    code hoisting.  It would be nice.  */
2737
2738 /* Allocate vars used for code hoisting analysis.  */
2739
2740 static void
2741 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2742 {
2743   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2744   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2745   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2746
2747   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2748   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2749 }
2750
2751 /* Free vars used for code hoisting analysis.  */
2752
2753 static void
2754 free_code_hoist_mem (void)
2755 {
2756   sbitmap_vector_free (antloc);
2757   sbitmap_vector_free (transp);
2758   sbitmap_vector_free (comp);
2759
2760   sbitmap_vector_free (hoist_vbein);
2761   sbitmap_vector_free (hoist_vbeout);
2762
2763   free_dominance_info (CDI_DOMINATORS);
2764 }
2765
2766 /* Compute the very busy expressions at entry/exit from each block.
2767
2768    An expression is very busy if all paths from a given point
2769    compute the expression.  */
2770
2771 static void
2772 compute_code_hoist_vbeinout (void)
2773 {
2774   int changed, passes;
2775   basic_block bb;
2776
2777   sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2778   sbitmap_vector_zero (hoist_vbein, last_basic_block);
2779
2780   passes = 0;
2781   changed = 1;
2782
2783   while (changed)
2784     {
2785       changed = 0;
2786
2787       /* We scan the blocks in the reverse order to speed up
2788          the convergence.  */
2789       FOR_EACH_BB_REVERSE (bb)
2790         {
2791           if (bb->next_bb != EXIT_BLOCK_PTR)
2792             {
2793               sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
2794                                              hoist_vbein, bb->index);
2795
2796               /* Include expressions in VBEout that are calculated
2797                  in BB and available at its end.  */
2798               sbitmap_a_or_b (hoist_vbeout[bb->index],
2799                               hoist_vbeout[bb->index], comp[bb->index]);
2800             }
2801
2802           changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2803                                               antloc[bb->index],
2804                                               hoist_vbeout[bb->index],
2805                                               transp[bb->index]);
2806         }
2807
2808       passes++;
2809     }
2810
2811   if (dump_file)
2812     {
2813       fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2814
2815       FOR_EACH_BB (bb)
2816         {
2817           fprintf (dump_file, "vbein (%d): ", bb->index);
2818           dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2819           fprintf (dump_file, "vbeout(%d): ", bb->index);
2820           dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2821         }
2822     }
2823 }
2824
2825 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
2826
2827 static void
2828 compute_code_hoist_data (void)
2829 {
2830   compute_local_properties (transp, comp, antloc, &expr_hash_table);
2831   prune_expressions (false);
2832   compute_code_hoist_vbeinout ();
2833   calculate_dominance_info (CDI_DOMINATORS);
2834   if (dump_file)
2835     fprintf (dump_file, "\n");
2836 }
2837
2838 /* Determine if the expression identified by EXPR_INDEX would
2839    reach BB unimpared if it was placed at the end of EXPR_BB.
2840    Stop the search if the expression would need to be moved more
2841    than DISTANCE instructions.
2842
2843    It's unclear exactly what Muchnick meant by "unimpared".  It seems
2844    to me that the expression must either be computed or transparent in
2845    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
2846    would allow the expression to be hoisted out of loops, even if
2847    the expression wasn't a loop invariant.
2848
2849    Contrast this to reachability for PRE where an expression is
2850    considered reachable if *any* path reaches instead of *all*
2851    paths.  */
2852
2853 static int
2854 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2855                            char *visited, int distance, int *bb_size)
2856 {
2857   edge pred;
2858   edge_iterator ei;
2859   int visited_allocated_locally = 0;
2860
2861   /* Terminate the search if distance, for which EXPR is allowed to move,
2862      is exhausted.  */
2863   if (distance > 0)
2864     {
2865       distance -= bb_size[bb->index];
2866
2867       if (distance <= 0)
2868         return 0;
2869     }
2870   else
2871     gcc_assert (distance == 0);
2872
2873   if (visited == NULL)
2874     {
2875       visited_allocated_locally = 1;
2876       visited = XCNEWVEC (char, last_basic_block);
2877     }
2878
2879   FOR_EACH_EDGE (pred, ei, bb->preds)
2880     {
2881       basic_block pred_bb = pred->src;
2882
2883       if (pred->src == ENTRY_BLOCK_PTR)
2884         break;
2885       else if (pred_bb == expr_bb)
2886         continue;
2887       else if (visited[pred_bb->index])
2888         continue;
2889
2890       else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2891         break;
2892
2893       /* Not killed.  */
2894       else
2895         {
2896           visited[pred_bb->index] = 1;
2897           if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2898                                            visited, distance, bb_size))
2899             break;
2900         }
2901     }
2902   if (visited_allocated_locally)
2903     free (visited);
2904
2905   return (pred == NULL);
2906 }
2907 \f
2908 /* Find occurrence in BB.  */
2909
2910 static struct occr *
2911 find_occr_in_bb (struct occr *occr, basic_block bb)
2912 {
2913   /* Find the right occurrence of this expression.  */
2914   while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2915     occr = occr->next;
2916
2917   return occr;
2918 }
2919
2920 /* Actually perform code hoisting.  */
2921
2922 static int
2923 hoist_code (void)
2924 {
2925   basic_block bb, dominated;
2926   VEC (basic_block, heap) *dom_tree_walk;
2927   unsigned int dom_tree_walk_index;
2928   VEC (basic_block, heap) *domby;
2929   unsigned int i,j;
2930   struct expr **index_map;
2931   struct expr *expr;
2932   int *to_bb_head;
2933   int *bb_size;
2934   int changed = 0;
2935
2936   /* Compute a mapping from expression number (`bitmap_index') to
2937      hash table entry.  */
2938
2939   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2940   for (i = 0; i < expr_hash_table.size; i++)
2941     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2942       index_map[expr->bitmap_index] = expr;
2943
2944   /* Calculate sizes of basic blocks and note how far
2945      each instruction is from the start of its block.  We then use this
2946      data to restrict distance an expression can travel.  */
2947
2948   to_bb_head = XCNEWVEC (int, get_max_uid ());
2949   bb_size = XCNEWVEC (int, last_basic_block);
2950
2951   FOR_EACH_BB (bb)
2952     {
2953       rtx insn;
2954       int to_head;
2955
2956       to_head = 0;
2957       FOR_BB_INSNS (bb, insn)
2958         {
2959           /* Don't count debug instructions to avoid them affecting
2960              decision choices.  */
2961           if (NONDEBUG_INSN_P (insn))
2962             to_bb_head[INSN_UID (insn)] = to_head++;
2963         }
2964
2965       bb_size[bb->index] = to_head;
2966     }
2967
2968   gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2969               && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2970                   == ENTRY_BLOCK_PTR->next_bb));
2971
2972   dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2973                                             ENTRY_BLOCK_PTR->next_bb);
2974
2975   /* Walk over each basic block looking for potentially hoistable
2976      expressions, nothing gets hoisted from the entry block.  */
2977   FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2978     {
2979       domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2980
2981       if (VEC_length (basic_block, domby) == 0)
2982         continue;
2983
2984       /* Examine each expression that is very busy at the exit of this
2985          block.  These are the potentially hoistable expressions.  */
2986       for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
2987         {
2988           if (TEST_BIT (hoist_vbeout[bb->index], i))
2989             {
2990               /* Current expression.  */
2991               struct expr *expr = index_map[i];
2992               /* Number of occurrences of EXPR that can be hoisted to BB.  */
2993               int hoistable = 0;
2994               /* Basic blocks that have occurrences reachable from BB.  */
2995               bitmap_head _from_bbs, *from_bbs = &_from_bbs;
2996               /* Occurrences reachable from BB.  */
2997               VEC (occr_t, heap) *occrs_to_hoist = NULL;
2998               /* We want to insert the expression into BB only once, so
2999                  note when we've inserted it.  */
3000               int insn_inserted_p;
3001               occr_t occr;
3002
3003               bitmap_initialize (from_bbs, 0);
3004
3005               /* If an expression is computed in BB and is available at end of
3006                  BB, hoist all occurrences dominated by BB to BB.  */
3007               if (TEST_BIT (comp[bb->index], i))
3008                 {
3009                   occr = find_occr_in_bb (expr->antic_occr, bb);
3010
3011                   if (occr)
3012                     {
3013                       /* An occurrence might've been already deleted
3014                          while processing a dominator of BB.  */
3015                       if (!occr->deleted_p)
3016                         {
3017                           gcc_assert (NONDEBUG_INSN_P (occr->insn));
3018                           hoistable++;
3019                         }
3020                     }
3021                   else
3022                     hoistable++;
3023                 }
3024
3025               /* We've found a potentially hoistable expression, now
3026                  we look at every block BB dominates to see if it
3027                  computes the expression.  */
3028               FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3029                 {
3030                   int max_distance;
3031
3032                   /* Ignore self dominance.  */
3033                   if (bb == dominated)
3034                     continue;
3035                   /* We've found a dominated block, now see if it computes
3036                      the busy expression and whether or not moving that
3037                      expression to the "beginning" of that block is safe.  */
3038                   if (!TEST_BIT (antloc[dominated->index], i))
3039                     continue;
3040
3041                   occr = find_occr_in_bb (expr->antic_occr, dominated);
3042                   gcc_assert (occr);
3043
3044                   /* An occurrence might've been already deleted
3045                      while processing a dominator of BB.  */
3046                   if (occr->deleted_p)
3047                     continue;
3048                   gcc_assert (NONDEBUG_INSN_P (occr->insn));
3049
3050                   max_distance = expr->max_distance;
3051                   if (max_distance > 0)
3052                     /* Adjust MAX_DISTANCE to account for the fact that
3053                        OCCR won't have to travel all of DOMINATED, but
3054                        only part of it.  */
3055                     max_distance += (bb_size[dominated->index]
3056                                      - to_bb_head[INSN_UID (occr->insn)]);
3057
3058                   /* Note if the expression would reach the dominated block
3059                      unimpared if it was placed at the end of BB.
3060
3061                      Keep track of how many times this expression is hoistable
3062                      from a dominated block into BB.  */
3063                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3064                                                  max_distance, bb_size))
3065                     {
3066                       hoistable++;
3067                       VEC_safe_push (occr_t, heap,
3068                                      occrs_to_hoist, occr);
3069                       bitmap_set_bit (from_bbs, dominated->index);
3070                     }
3071                 }
3072
3073               /* If we found more than one hoistable occurrence of this
3074                  expression, then note it in the vector of expressions to
3075                  hoist.  It makes no sense to hoist things which are computed
3076                  in only one BB, and doing so tends to pessimize register
3077                  allocation.  One could increase this value to try harder
3078                  to avoid any possible code expansion due to register
3079                  allocation issues; however experiments have shown that
3080                  the vast majority of hoistable expressions are only movable
3081                  from two successors, so raising this threshold is likely
3082                  to nullify any benefit we get from code hoisting.  */
3083               if (hoistable > 1 && dbg_cnt (hoist_insn))
3084                 {
3085                   /* If (hoistable != VEC_length), then there is
3086                      an occurrence of EXPR in BB itself.  Don't waste
3087                      time looking for LCA in this case.  */
3088                   if ((unsigned) hoistable
3089                       == VEC_length (occr_t, occrs_to_hoist))
3090                     {
3091                       basic_block lca;
3092
3093                       lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3094                                                               from_bbs);
3095                       if (lca != bb)
3096                         /* Punt, it's better to hoist these occurrences to
3097                            LCA.  */
3098                         VEC_free (occr_t, heap, occrs_to_hoist);
3099                     }
3100                 }
3101               else
3102                 /* Punt, no point hoisting a single occurence.  */
3103                 VEC_free (occr_t, heap, occrs_to_hoist);
3104
3105               insn_inserted_p = 0;
3106
3107               /* Walk through occurrences of I'th expressions we want
3108                  to hoist to BB and make the transformations.  */
3109               FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3110                 {
3111                   rtx insn;
3112                   rtx set;
3113
3114                   gcc_assert (!occr->deleted_p);
3115
3116                   insn = occr->insn;
3117                   set = single_set (insn);
3118                   gcc_assert (set);
3119
3120                   /* Create a pseudo-reg to store the result of reaching
3121                      expressions into.  Get the mode for the new pseudo
3122                      from the mode of the original destination pseudo.
3123
3124                      It is important to use new pseudos whenever we
3125                      emit a set.  This will allow reload to use
3126                      rematerialization for such registers.  */
3127                   if (!insn_inserted_p)
3128                     expr->reaching_reg
3129                       = gen_reg_rtx_and_attrs (SET_DEST (set));
3130
3131                   gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3132                                         insn);
3133                   delete_insn (insn);
3134                   occr->deleted_p = 1;
3135                   changed = 1;
3136                   gcse_subst_count++;
3137
3138                   if (!insn_inserted_p)
3139                     {
3140                       insert_insn_end_basic_block (expr, bb);
3141                       insn_inserted_p = 1;
3142                     }
3143                 }
3144
3145               VEC_free (occr_t, heap, occrs_to_hoist);
3146               bitmap_clear (from_bbs);
3147             }
3148         }
3149       VEC_free (basic_block, heap, domby);
3150     }
3151
3152   VEC_free (basic_block, heap, dom_tree_walk);
3153   free (bb_size);
3154   free (to_bb_head);
3155   free (index_map);
3156
3157   return changed;
3158 }
3159
3160 /* Top level routine to perform one code hoisting (aka unification) pass
3161
3162    Return nonzero if a change was made.  */
3163
3164 static int
3165 one_code_hoisting_pass (void)
3166 {
3167   int changed = 0;
3168
3169   gcse_subst_count = 0;
3170   gcse_create_count = 0;
3171
3172   /* Return if there's nothing to do, or it is too expensive.  */
3173   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3174       || is_too_expensive (_("GCSE disabled")))
3175     return 0;
3176
3177   doing_code_hoisting_p = true;
3178
3179   /* We need alias.  */
3180   init_alias_analysis ();
3181
3182   bytes_used = 0;
3183   gcc_obstack_init (&gcse_obstack);
3184   alloc_gcse_mem ();
3185
3186   alloc_hash_table (&expr_hash_table);
3187   compute_hash_table (&expr_hash_table);
3188   if (dump_file)
3189     dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3190
3191   if (expr_hash_table.n_elems > 0)
3192     {
3193       alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3194       compute_code_hoist_data ();
3195       changed = hoist_code ();
3196       free_code_hoist_mem ();
3197     }
3198
3199   free_hash_table (&expr_hash_table);
3200   free_gcse_mem ();
3201   obstack_free (&gcse_obstack, NULL);
3202
3203   /* We are finished with alias.  */
3204   end_alias_analysis ();
3205
3206   if (dump_file)
3207     {
3208       fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3209                current_function_name (), n_basic_blocks, bytes_used);
3210       fprintf (dump_file, "%d substs, %d insns created\n",
3211                gcse_subst_count, gcse_create_count);
3212     }
3213
3214   doing_code_hoisting_p = false;
3215
3216   return changed;
3217 }
3218 \f
3219 /*  Here we provide the things required to do store motion towards the exit.
3220     In order for this to be effective, gcse also needed to be taught how to
3221     move a load when it is killed only by a store to itself.
3222
3223             int i;
3224             float a[10];
3225
3226             void foo(float scale)
3227             {
3228               for (i=0; i<10; i++)
3229                 a[i] *= scale;
3230             }
3231
3232     'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3233     the load out since its live around the loop, and stored at the bottom
3234     of the loop.
3235
3236       The 'Load Motion' referred to and implemented in this file is
3237     an enhancement to gcse which when using edge based LCM, recognizes
3238     this situation and allows gcse to move the load out of the loop.
3239
3240       Once gcse has hoisted the load, store motion can then push this
3241     load towards the exit, and we end up with no loads or stores of 'i'
3242     in the loop.  */
3243
3244 static hashval_t
3245 pre_ldst_expr_hash (const void *p)
3246 {
3247   int do_not_record_p = 0;
3248   const struct ls_expr *const x = (const struct ls_expr *) p;
3249   return
3250     hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3251 }
3252
3253 static int
3254 pre_ldst_expr_eq (const void *p1, const void *p2)
3255 {
3256   const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3257     *const ptr2 = (const struct ls_expr *) p2;
3258   return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3259 }
3260
3261 /* This will search the ldst list for a matching expression. If it
3262    doesn't find one, we create one and initialize it.  */
3263
3264 static struct ls_expr *
3265 ldst_entry (rtx x)
3266 {
3267   int do_not_record_p = 0;
3268   struct ls_expr * ptr;
3269   unsigned int hash;
3270   void **slot;
3271   struct ls_expr e;
3272
3273   hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3274                    NULL,  /*have_reg_qty=*/false);
3275
3276   e.pattern = x;
3277   slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3278   if (*slot)
3279     return (struct ls_expr *)*slot;
3280
3281   ptr = XNEW (struct ls_expr);
3282
3283   ptr->next         = pre_ldst_mems;
3284   ptr->expr         = NULL;
3285   ptr->pattern      = x;
3286   ptr->pattern_regs = NULL_RTX;
3287   ptr->loads        = NULL_RTX;
3288   ptr->stores       = NULL_RTX;
3289   ptr->reaching_reg = NULL_RTX;
3290   ptr->invalid      = 0;
3291   ptr->index        = 0;
3292   ptr->hash_index   = hash;
3293   pre_ldst_mems     = ptr;
3294   *slot = ptr;
3295
3296   return ptr;
3297 }
3298
3299 /* Free up an individual ldst entry.  */
3300
3301 static void
3302 free_ldst_entry (struct ls_expr * ptr)
3303 {
3304   free_INSN_LIST_list (& ptr->loads);
3305   free_INSN_LIST_list (& ptr->stores);
3306
3307   free (ptr);
3308 }
3309
3310 /* Free up all memory associated with the ldst list.  */
3311
3312 static void
3313 free_ld_motion_mems (void)
3314 {
3315   if (pre_ldst_table)
3316     htab_delete (pre_ldst_table);
3317   pre_ldst_table = NULL;
3318
3319   while (pre_ldst_mems)
3320     {
3321       struct ls_expr * tmp = pre_ldst_mems;
3322
3323       pre_ldst_mems = pre_ldst_mems->next;
3324
3325       free_ldst_entry (tmp);
3326     }
3327
3328   pre_ldst_mems = NULL;
3329 }
3330
3331 /* Dump debugging info about the ldst list.  */
3332
3333 static void
3334 print_ldst_list (FILE * file)
3335 {
3336   struct ls_expr * ptr;
3337
3338   fprintf (file, "LDST list: \n");
3339
3340   for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3341     {
3342       fprintf (file, "  Pattern (%3d): ", ptr->index);
3343
3344       print_rtl (file, ptr->pattern);
3345
3346       fprintf (file, "\n         Loads : ");
3347
3348       if (ptr->loads)
3349         print_rtl (file, ptr->loads);
3350       else
3351         fprintf (file, "(nil)");
3352
3353       fprintf (file, "\n        Stores : ");
3354
3355       if (ptr->stores)
3356         print_rtl (file, ptr->stores);
3357       else
3358         fprintf (file, "(nil)");
3359
3360       fprintf (file, "\n\n");
3361     }
3362
3363   fprintf (file, "\n");
3364 }
3365
3366 /* Returns 1 if X is in the list of ldst only expressions.  */
3367
3368 static struct ls_expr *
3369 find_rtx_in_ldst (rtx x)
3370 {
3371   struct ls_expr e;
3372   void **slot;
3373   if (!pre_ldst_table)
3374     return NULL;
3375   e.pattern = x;
3376   slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3377   if (!slot || ((struct ls_expr *)*slot)->invalid)
3378     return NULL;
3379   return (struct ls_expr *) *slot;
3380 }
3381 \f
3382 /* Load Motion for loads which only kill themselves.  */
3383
3384 /* Return true if x, a MEM, is a simple access with no side effects.
3385    These are the types of loads we consider for the ld_motion list,
3386    otherwise we let the usual aliasing take care of it.  */
3387
3388 static int
3389 simple_mem (const_rtx x)
3390 {
3391   if (MEM_VOLATILE_P (x))
3392     return 0;
3393
3394   if (GET_MODE (x) == BLKmode)
3395     return 0;
3396
3397   /* If we are handling exceptions, we must be careful with memory references
3398      that may trap.  If we are not, the behavior is undefined, so we may just
3399      continue.  */
3400   if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3401     return 0;
3402
3403   if (side_effects_p (x))
3404     return 0;
3405
3406   /* Do not consider function arguments passed on stack.  */
3407   if (reg_mentioned_p (stack_pointer_rtx, x))
3408     return 0;
3409
3410   if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3411     return 0;
3412
3413   return 1;
3414 }
3415
3416 /* Make sure there isn't a buried reference in this pattern anywhere.
3417    If there is, invalidate the entry for it since we're not capable
3418    of fixing it up just yet.. We have to be sure we know about ALL
3419    loads since the aliasing code will allow all entries in the
3420    ld_motion list to not-alias itself.  If we miss a load, we will get
3421    the wrong value since gcse might common it and we won't know to
3422    fix it up.  */
3423
3424 static void
3425 invalidate_any_buried_refs (rtx x)
3426 {
3427   const char * fmt;
3428   int i, j;
3429   struct ls_expr * ptr;
3430
3431   /* Invalidate it in the list.  */
3432   if (MEM_P (x) && simple_mem (x))
3433     {
3434       ptr = ldst_entry (x);
3435       ptr->invalid = 1;
3436     }
3437
3438   /* Recursively process the insn.  */
3439   fmt = GET_RTX_FORMAT (GET_CODE (x));
3440
3441   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3442     {
3443       if (fmt[i] == 'e')
3444         invalidate_any_buried_refs (XEXP (x, i));
3445       else if (fmt[i] == 'E')
3446         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3447           invalidate_any_buried_refs (XVECEXP (x, i, j));
3448     }
3449 }
3450
3451 /* Find all the 'simple' MEMs which are used in LOADs and STORES.  Simple
3452    being defined as MEM loads and stores to symbols, with no side effects
3453    and no registers in the expression.  For a MEM destination, we also
3454    check that the insn is still valid if we replace the destination with a
3455    REG, as is done in update_ld_motion_stores.  If there are any uses/defs
3456    which don't match this criteria, they are invalidated and trimmed out
3457    later.  */
3458
3459 static void
3460 compute_ld_motion_mems (void)
3461 {
3462   struct ls_expr * ptr;
3463   basic_block bb;
3464   rtx insn;
3465
3466   pre_ldst_mems = NULL;
3467   pre_ldst_table
3468     = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
3469
3470   FOR_EACH_BB (bb)
3471     {
3472       FOR_BB_INSNS (bb, insn)
3473         {
3474           if (NONDEBUG_INSN_P (insn))
3475             {
3476               if (GET_CODE (PATTERN (insn)) == SET)
3477                 {
3478                   rtx src = SET_SRC (PATTERN (insn));
3479                   rtx dest = SET_DEST (PATTERN (insn));
3480
3481                   /* Check for a simple LOAD...  */
3482                   if (MEM_P (src) && simple_mem (src))
3483                     {
3484                       ptr = ldst_entry (src);
3485                       if (REG_P (dest))
3486                         ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3487                       else
3488                         ptr->invalid = 1;
3489                     }
3490                   else
3491                     {
3492                       /* Make sure there isn't a buried load somewhere.  */
3493                       invalidate_any_buried_refs (src);
3494                     }
3495
3496                   /* Check for stores. Don't worry about aliased ones, they
3497                      will block any movement we might do later. We only care
3498                      about this exact pattern since those are the only
3499                      circumstance that we will ignore the aliasing info.  */
3500                   if (MEM_P (dest) && simple_mem (dest))
3501                     {
3502                       ptr = ldst_entry (dest);
3503
3504                       if (! MEM_P (src)
3505                           && GET_CODE (src) != ASM_OPERANDS
3506                           /* Check for REG manually since want_to_gcse_p
3507                              returns 0 for all REGs.  */
3508                           && can_assign_to_reg_without_clobbers_p (src))
3509                         ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3510                       else
3511                         ptr->invalid = 1;
3512                     }
3513                 }
3514               else
3515                 invalidate_any_buried_refs (PATTERN (insn));
3516             }
3517         }
3518     }
3519 }
3520
3521 /* Remove any references that have been either invalidated or are not in the
3522    expression list for pre gcse.  */
3523
3524 static void
3525 trim_ld_motion_mems (void)
3526 {
3527   struct ls_expr * * last = & pre_ldst_mems;
3528   struct ls_expr * ptr = pre_ldst_mems;
3529
3530   while (ptr != NULL)
3531     {
3532       struct expr * expr;
3533
3534       /* Delete if entry has been made invalid.  */
3535       if (! ptr->invalid)
3536         {
3537           /* Delete if we cannot find this mem in the expression list.  */
3538           unsigned int hash = ptr->hash_index % expr_hash_table.size;
3539
3540           for (expr = expr_hash_table.table[hash];
3541                expr != NULL;
3542                expr = expr->next_same_hash)
3543             if (expr_equiv_p (expr->expr, ptr->pattern))
3544               break;
3545         }
3546       else
3547         expr = (struct expr *) 0;
3548
3549       if (expr)
3550         {
3551           /* Set the expression field if we are keeping it.  */
3552           ptr->expr = expr;
3553           last = & ptr->next;
3554           ptr = ptr->next;
3555         }
3556       else
3557         {
3558           *last = ptr->next;
3559           htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3560           free_ldst_entry (ptr);
3561           ptr = * last;
3562         }
3563     }
3564
3565   /* Show the world what we've found.  */
3566   if (dump_file && pre_ldst_mems != NULL)
3567     print_ldst_list (dump_file);
3568 }
3569
3570 /* This routine will take an expression which we are replacing with
3571    a reaching register, and update any stores that are needed if
3572    that expression is in the ld_motion list.  Stores are updated by
3573    copying their SRC to the reaching register, and then storing
3574    the reaching register into the store location. These keeps the
3575    correct value in the reaching register for the loads.  */
3576
3577 static void
3578 update_ld_motion_stores (struct expr * expr)
3579 {
3580   struct ls_expr * mem_ptr;
3581
3582   if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3583     {
3584       /* We can try to find just the REACHED stores, but is shouldn't
3585          matter to set the reaching reg everywhere...  some might be
3586          dead and should be eliminated later.  */
3587
3588       /* We replace (set mem expr) with (set reg expr) (set mem reg)
3589          where reg is the reaching reg used in the load.  We checked in
3590          compute_ld_motion_mems that we can replace (set mem expr) with
3591          (set reg expr) in that insn.  */
3592       rtx list = mem_ptr->stores;
3593
3594       for ( ; list != NULL_RTX; list = XEXP (list, 1))
3595         {
3596           rtx insn = XEXP (list, 0);
3597           rtx pat = PATTERN (insn);
3598           rtx src = SET_SRC (pat);
3599           rtx reg = expr->reaching_reg;
3600           rtx copy;
3601
3602           /* If we've already copied it, continue.  */
3603           if (expr->reaching_reg == src)
3604             continue;
3605
3606           if (dump_file)
3607             {
3608               fprintf (dump_file, "PRE:  store updated with reaching reg ");
3609               print_rtl (dump_file, reg);
3610               fprintf (dump_file, ":\n  ");
3611               print_inline_rtx (dump_file, insn, 8);
3612               fprintf (dump_file, "\n");
3613             }
3614
3615           copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3616           emit_insn_before (copy, insn);
3617           SET_SRC (pat) = reg;
3618           df_insn_rescan (insn);
3619
3620           /* un-recognize this pattern since it's probably different now.  */
3621           INSN_CODE (insn) = -1;
3622           gcse_create_count++;
3623         }
3624     }
3625 }
3626 \f
3627 /* Return true if the graph is too expensive to optimize. PASS is the
3628    optimization about to be performed.  */
3629
3630 static bool
3631 is_too_expensive (const char *pass)
3632 {
3633   /* Trying to perform global optimizations on flow graphs which have
3634      a high connectivity will take a long time and is unlikely to be
3635      particularly useful.
3636
3637      In normal circumstances a cfg should have about twice as many
3638      edges as blocks.  But we do not want to punish small functions
3639      which have a couple switch statements.  Rather than simply
3640      threshold the number of blocks, uses something with a more
3641      graceful degradation.  */
3642   if (n_edges > 20000 + n_basic_blocks * 4)
3643     {
3644       warning (OPT_Wdisabled_optimization,
3645                "%s: %d basic blocks and %d edges/basic block",
3646                pass, n_basic_blocks, n_edges / n_basic_blocks);
3647
3648       return true;
3649     }
3650
3651   /* If allocating memory for the dataflow bitmaps would take up too much
3652      storage it's better just to disable the optimization.  */
3653   if ((n_basic_blocks
3654        * SBITMAP_SET_SIZE (max_reg_num ())
3655        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3656     {
3657       warning (OPT_Wdisabled_optimization,
3658                "%s: %d basic blocks and %d registers",
3659                pass, n_basic_blocks, max_reg_num ());
3660
3661       return true;
3662     }
3663
3664   return false;
3665 }
3666 \f
3667 /* All the passes implemented in this file.  Each pass has its
3668    own gate and execute function, and at the end of the file a
3669    pass definition for passes.c.
3670
3671    We do not construct an accurate cfg in functions which call
3672    setjmp, so none of these passes runs if the function calls
3673    setjmp.
3674    FIXME: Should just handle setjmp via REG_SETJMP notes.  */
3675
3676 static bool
3677 gate_rtl_pre (void)
3678 {
3679   return optimize > 0 && flag_gcse
3680     && !cfun->calls_setjmp
3681     && optimize_function_for_speed_p (cfun)
3682     && dbg_cnt (pre);
3683 }
3684
3685 static unsigned int
3686 execute_rtl_pre (void)
3687 {
3688   int changed;
3689   delete_unreachable_blocks ();
3690   df_analyze ();
3691   changed = one_pre_gcse_pass ();
3692   flag_rerun_cse_after_global_opts |= changed;
3693   if (changed)
3694     cleanup_cfg (0);
3695   return 0;
3696 }
3697
3698 static bool
3699 gate_rtl_hoist (void)
3700 {
3701   return optimize > 0 && flag_gcse
3702     && !cfun->calls_setjmp
3703     /* It does not make sense to run code hoisting unless we are optimizing
3704        for code size -- it rarely makes programs faster, and can make then
3705        bigger if we did PRE (when optimizing for space, we don't run PRE).  */
3706     && optimize_function_for_size_p (cfun)
3707     && dbg_cnt (hoist);
3708 }
3709
3710 static unsigned int
3711 execute_rtl_hoist (void)
3712 {
3713   int changed;
3714   delete_unreachable_blocks ();
3715   df_analyze ();
3716   changed = one_code_hoisting_pass ();
3717   flag_rerun_cse_after_global_opts |= changed;
3718   if (changed)
3719     cleanup_cfg (0);
3720   return 0;
3721 }
3722
3723 struct rtl_opt_pass pass_rtl_pre =
3724 {
3725  {
3726   RTL_PASS,
3727   "rtl pre",                            /* name */
3728   gate_rtl_pre,                         /* gate */
3729   execute_rtl_pre,                      /* execute */
3730   NULL,                                 /* sub */
3731   NULL,                                 /* next */
3732   0,                                    /* static_pass_number */
3733   TV_PRE,                               /* tv_id */
3734   PROP_cfglayout,                       /* properties_required */
3735   0,                                    /* properties_provided */
3736   0,                                    /* properties_destroyed */
3737   0,                                    /* todo_flags_start */
3738   TODO_df_finish | TODO_verify_rtl_sharing |
3739   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3740  }
3741 };
3742
3743 struct rtl_opt_pass pass_rtl_hoist =
3744 {
3745  {
3746   RTL_PASS,
3747   "hoist",                              /* name */
3748   gate_rtl_hoist,                       /* gate */
3749   execute_rtl_hoist,                    /* execute */
3750   NULL,                                 /* sub */
3751   NULL,                                 /* next */
3752   0,                                    /* static_pass_number */
3753   TV_HOIST,                             /* tv_id */
3754   PROP_cfglayout,                       /* properties_required */
3755   0,                                    /* properties_provided */
3756   0,                                    /* properties_destroyed */
3757   0,                                    /* todo_flags_start */
3758   TODO_df_finish | TODO_verify_rtl_sharing |
3759   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3760  }
3761 };
3762
3763 #include "gt-gcse.h"