gcc-4_7/gcc/gcse.c

   1 /* Partial redundancy elimination / Hoisting for RTL.
   2    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
   3    2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* TODO
  22    - reordering of memory allocation and freeing to be more space efficient
  23    - do rough calc of how many regs are needed in each block, and a rough
  24      calc of how many regs are available in each class and use that to
  25      throttle back the code in cases where RTX_COST is minimal.
  26 */
  27
  28 /* References searched while implementing this.
  29
  30    Compilers Principles, Techniques and Tools
  31    Aho, Sethi, Ullman
  32    Addison-Wesley, 1988
  33
  34    Global Optimization by Suppression of Partial Redundancies
  35    E. Morel, C. Renvoise
  36    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  37
  38    A Portable Machine-Independent Global Optimizer - Design and Measurements
  39    Frederick Chow
  40    Stanford Ph.D. thesis, Dec. 1983
  41
  42    A Fast Algorithm for Code Movement Optimization
  43    D.M. Dhamdhere
  44    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  45
  46    A Solution to a Problem with Morel and Renvoise's
  47    Global Optimization by Suppression of Partial Redundancies
  48    K-H Drechsler, M.P. Stadel
  49    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  50
  51    Practical Adaptation of the Global Optimization
  52    Algorithm of Morel and Renvoise
  53    D.M. Dhamdhere
  54    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  55
  56    Efficiently Computing Static Single Assignment Form and the Control
  57    Dependence Graph
  58    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  59    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  60
  61    Lazy Code Motion
  62    J. Knoop, O. Ruthing, B. Steffen
  63    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  64
  65    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  66    Time for Reducible Flow Control
  67    Thomas Ball
  68    ACM Letters on Programming Languages and Systems,
  69    Vol. 2, Num. 1-4, Mar-Dec 1993
  70
  71    An Efficient Representation for Sparse Sets
  72    Preston Briggs, Linda Torczon
  73    ACM Letters on Programming Languages and Systems,
  74    Vol. 2, Num. 1-4, Mar-Dec 1993
  75
  76    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  77    K-H Drechsler, M.P. Stadel
  78    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  79
  80    Partial Dead Code Elimination
  81    J. Knoop, O. Ruthing, B. Steffen
  82    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  83
  84    Effective Partial Redundancy Elimination
  85    P. Briggs, K.D. Cooper
  86    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  87
  88    The Program Structure Tree: Computing Control Regions in Linear Time
  89    R. Johnson, D. Pearson, K. Pingali
  90    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  91
  92    Optimal Code Motion: Theory and Practice
  93    J. Knoop, O. Ruthing, B. Steffen
  94    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
  95
  96    The power of assignment motion
  97    J. Knoop, O. Ruthing, B. Steffen
  98    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
  99
 100    Global code motion / global value numbering
 101    C. Click
 102    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 103
 104    Value Driven Redundancy Elimination
 105    L.T. Simpson
 106    Rice University Ph.D. thesis, Apr. 1996
 107
 108    Value Numbering
 109    L.T. Simpson
 110    Massively Scalar Compiler Project, Rice University, Sep. 1996
 111
 112    High Performance Compilers for Parallel Computing
 113    Michael Wolfe
 114    Addison-Wesley, 1996
 115
 116    Advanced Compiler Design and Implementation
 117    Steven Muchnick
 118    Morgan Kaufmann, 1997
 119
 120    Building an Optimizing Compiler
 121    Robert Morgan
 122    Digital Press, 1998
 123
 124    People wishing to speed up the code here should read:
 125      Elimination Algorithms for Data Flow Analysis
 126      B.G. Ryder, M.C. Paull
 127      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 128
 129      How to Analyze Large Programs Efficiently and Informatively
 130      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 131      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 132
 133    People wishing to do something different can find various possibilities
 134    in the above papers and elsewhere.
 135 */
 136
 137 #include "config.h"
 138 #include "system.h"
 139 #include "coretypes.h"
 140 #include "tm.h"
 141 #include "diagnostic-core.h"
 142 #include "toplev.h"
 143
 144 #include "rtl.h"
 145 #include "tree.h"
 146 #include "tm_p.h"
 147 #include "regs.h"
 148 #include "hard-reg-set.h"
 149 #include "flags.h"
 150 #include "insn-config.h"
 151 #include "recog.h"
 152 #include "basic-block.h"
 153 #include "output.h"
 154 #include "function.h"
 155 #include "expr.h"
 156 #include "except.h"
 157 #include "ggc.h"
 158 #include "params.h"
 159 #include "cselib.h"
 160 #include "intl.h"
 161 #include "obstack.h"
 162 #include "timevar.h"
 163 #include "tree-pass.h"
 164 #include "hashtab.h"
 165 #include "df.h"
 166 #include "dbgcnt.h"
 167 #include "target.h"
 168 #include "gcse.h"
 169 #include "cfgloop.h"
 170
 171 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 172    are a superset of those done by classic GCSE.
 173
 174    Two passes of copy/constant propagation are done around PRE or hoisting
 175    because the first one enables more GCSE and the second one helps to clean
 176    up the copies that PRE and HOIST create.  This is needed more for PRE than
 177    for HOIST because code hoisting will try to use an existing register
 178    containing the common subexpression rather than create a new one.  This is
 179    harder to do for PRE because of the code motion (which HOIST doesn't do).
 180
 181    Expressions we are interested in GCSE-ing are of the form
 182    (set (pseudo-reg) (expression)).
 183    Function want_to_gcse_p says what these are.
 184
 185    In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
 186    This allows PRE to hoist expressions that are expressed in multiple insns,
 187    such as complex address calculations (e.g. for PIC code, or loads with a
 188    high part and a low part).
 189
 190    PRE handles moving invariant expressions out of loops (by treating them as
 191    partially redundant).
 192
 193    **********************
 194
 195    We used to support multiple passes but there are diminishing returns in
 196    doing so.  The first pass usually makes 90% of the changes that are doable.
 197    A second pass can make a few more changes made possible by the first pass.
 198    Experiments show any further passes don't make enough changes to justify
 199    the expense.
 200
 201    A study of spec92 using an unlimited number of passes:
 202    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 203    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 204    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 205
 206    It was found doing copy propagation between each pass enables further
 207    substitutions.
 208
 209    This study was done before expressions in REG_EQUAL notes were added as
 210    candidate expressions for optimization, and before the GIMPLE optimizers
 211    were added.  Probably, multiple passes is even less efficient now than
 212    at the time when the study was conducted.
 213
 214    PRE is quite expensive in complicated functions because the DFA can take
 215    a while to converge.  Hence we only perform one pass.
 216
 217    **********************
 218
 219    The steps for PRE are:
 220
 221    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 222
 223    2) Perform the data flow analysis for PRE.
 224
 225    3) Delete the redundant instructions
 226
 227    4) Insert the required copies [if any] that make the partially
 228       redundant instructions fully redundant.
 229
 230    5) For other reaching expressions, insert an instruction to copy the value
 231       to a newly created pseudo that will reach the redundant instruction.
 232
 233    The deletion is done first so that when we do insertions we
 234    know which pseudo reg to use.
 235
 236    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 237    argue it is not.  The number of iterations for the algorithm to converge
 238    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 239
 240    PRE GCSE depends heavily on the second CPROP pass to clean up the copies
 241    we create.  To make an expression reach the place where it's redundant,
 242    the result of the expression is copied to a new register, and the redundant
 243    expression is deleted by replacing it with this new register.  Classic GCSE
 244    doesn't have this problem as much as it computes the reaching defs of
 245    each register in each block and thus can try to use an existing
 246    register.  */
 247 \f
 248 /* GCSE global vars.  */
 249
 250 struct target_gcse default_target_gcse;
 251 #if SWITCHABLE_TARGET
 252 struct target_gcse *this_target_gcse = &default_target_gcse;
 253 #endif
 254
 255 /* Set to non-zero if CSE should run after all GCSE optimizations are done.  */
 256 int flag_rerun_cse_after_global_opts;
 257
 258 /* An obstack for our working variables.  */
 259 static struct obstack gcse_obstack;
 260
 261 struct reg_use {rtx reg_rtx; };
 262
 263 /* Hash table of expressions.  */
 264
 265 struct expr
 266 {
 267   /* The expression.  */
 268   rtx expr;
 269   /* Index in the available expression bitmaps.  */
 270   int bitmap_index;
 271   /* Next entry with the same hash.  */
 272   struct expr *next_same_hash;
 273   /* List of anticipatable occurrences in basic blocks in the function.
 274      An "anticipatable occurrence" is one that is the first occurrence in the
 275      basic block, the operands are not modified in the basic block prior
 276      to the occurrence and the output is not used between the start of
 277      the block and the occurrence.  */
 278   struct occr *antic_occr;
 279   /* List of available occurrence in basic blocks in the function.
 280      An "available occurrence" is one that is the last occurrence in the
 281      basic block and the operands are not modified by following statements in
 282      the basic block [including this insn].  */
 283   struct occr *avail_occr;
 284   /* Non-null if the computation is PRE redundant.
 285      The value is the newly created pseudo-reg to record a copy of the
 286      expression in all the places that reach the redundant copy.  */
 287   rtx reaching_reg;
 288   /* Maximum distance in instructions this expression can travel.
 289      We avoid moving simple expressions for more than a few instructions
 290      to keep register pressure under control.
 291      A value of "0" removes restrictions on how far the expression can
 292      travel.  */
 293   int max_distance;
 294 };
 295
 296 /* Occurrence of an expression.
 297    There is one per basic block.  If a pattern appears more than once the
 298    last appearance is used [or first for anticipatable expressions].  */
 299
 300 struct occr
 301 {
 302   /* Next occurrence of this expression.  */
 303   struct occr *next;
 304   /* The insn that computes the expression.  */
 305   rtx insn;
 306   /* Nonzero if this [anticipatable] occurrence has been deleted.  */
 307   char deleted_p;
 308   /* Nonzero if this [available] occurrence has been copied to
 309      reaching_reg.  */
 310   /* ??? This is mutually exclusive with deleted_p, so they could share
 311      the same byte.  */
 312   char copied_p;
 313 };
 314
 315 typedef struct occr *occr_t;
 316 DEF_VEC_P (occr_t);
 317 DEF_VEC_ALLOC_P (occr_t, heap);
 318
 319 /* Expression hash tables.
 320    Each hash table is an array of buckets.
 321    ??? It is known that if it were an array of entries, structure elements
 322    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 323    not clear whether in the final analysis a sufficient amount of memory would
 324    be saved as the size of the available expression bitmaps would be larger
 325    [one could build a mapping table without holes afterwards though].
 326    Someday I'll perform the computation and figure it out.  */
 327
 328 struct hash_table_d
 329 {
 330   /* The table itself.
 331      This is an array of `expr_hash_table_size' elements.  */
 332   struct expr **table;
 333
 334   /* Size of the hash table, in elements.  */
 335   unsigned int size;
 336
 337   /* Number of hash table elements.  */
 338   unsigned int n_elems;
 339 };
 340
 341 /* Expression hash table.  */
 342 static struct hash_table_d expr_hash_table;
 343
 344 /* This is a list of expressions which are MEMs and will be used by load
 345    or store motion.
 346    Load motion tracks MEMs which aren't killed by anything except itself,
 347    i.e. loads and stores to a single location.
 348    We can then allow movement of these MEM refs with a little special
 349    allowance. (all stores copy the same value to the reaching reg used
 350    for the loads).  This means all values used to store into memory must have
 351    no side effects so we can re-issue the setter value.  */
 352
 353 struct ls_expr
 354 {
 355   struct expr * expr;           /* Gcse expression reference for LM.  */
 356   rtx pattern;                  /* Pattern of this mem.  */
 357   rtx pattern_regs;             /* List of registers mentioned by the mem.  */
 358   rtx loads;                    /* INSN list of loads seen.  */
 359   rtx stores;                   /* INSN list of stores seen.  */
 360   struct ls_expr * next;        /* Next in the list.  */
 361   int invalid;                  /* Invalid for some reason.  */
 362   int index;                    /* If it maps to a bitmap index.  */
 363   unsigned int hash_index;      /* Index when in a hash table.  */
 364   rtx reaching_reg;             /* Register to use when re-writing.  */
 365 };
 366
 367 /* Head of the list of load/store memory refs.  */
 368 static struct ls_expr * pre_ldst_mems = NULL;
 369
 370 /* Hashtable for the load/store memory refs.  */
 371 static htab_t pre_ldst_table = NULL;
 372
 373 /* Bitmap containing one bit for each register in the program.
 374    Used when performing GCSE to track which registers have been set since
 375    the start of the basic block.  */
 376 static regset reg_set_bitmap;
 377
 378 /* Array, indexed by basic block number for a list of insns which modify
 379    memory within that block.  */
 380 static VEC (rtx,heap) **modify_mem_list;
 381 static bitmap modify_mem_list_set;
 382
 383 typedef struct modify_pair_s
 384 {
 385   rtx dest;                     /* A MEM.  */
 386   rtx dest_addr;                /* The canonical address of `dest'.  */
 387 } modify_pair;
 388
 389 DEF_VEC_O(modify_pair);
 390 DEF_VEC_ALLOC_O(modify_pair,heap);
 391
 392 /* This array parallels modify_mem_list, except that it stores MEMs
 393    being set and their canonicalized memory addresses.  */
 394 static VEC (modify_pair,heap) **canon_modify_mem_list;
 395
 396 /* Bitmap indexed by block numbers to record which blocks contain
 397    function calls.  */
 398 static bitmap blocks_with_calls;
 399
 400 /* Various variables for statistics gathering.  */
 401
 402 /* Memory used in a pass.
 403    This isn't intended to be absolutely precise.  Its intent is only
 404    to keep an eye on memory usage.  */
 405 static int bytes_used;
 406
 407 /* GCSE substitutions made.  */
 408 static int gcse_subst_count;
 409 /* Number of copy instructions created.  */
 410 static int gcse_create_count;
 411 \f
 412 /* Doing code hoisting.  */
 413 static bool doing_code_hoisting_p = false;
 414 \f
 415 /* For available exprs */
 416 static sbitmap *ae_kill;
 417 \f
 418 static void compute_can_copy (void);
 419 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
 420 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
 421 static void *gcse_alloc (unsigned long);
 422 static void alloc_gcse_mem (void);
 423 static void free_gcse_mem (void);
 424 static void hash_scan_insn (rtx, struct hash_table_d *);
 425 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
 426 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
 427 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
 428 static int want_to_gcse_p (rtx, int *);
 429 static int oprs_unchanged_p (const_rtx, const_rtx, int);
 430 static int oprs_anticipatable_p (const_rtx, const_rtx);
 431 static int oprs_available_p (const_rtx, const_rtx);
 432 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
 433                                   struct hash_table_d *);
 434 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
 435 static int expr_equiv_p (const_rtx, const_rtx);
 436 static void record_last_reg_set_info (rtx, int);
 437 static void record_last_mem_set_info (rtx);
 438 static void record_last_set_info (rtx, const_rtx, void *);
 439 static void compute_hash_table (struct hash_table_d *);
 440 static void alloc_hash_table (struct hash_table_d *);
 441 static void free_hash_table (struct hash_table_d *);
 442 static void compute_hash_table_work (struct hash_table_d *);
 443 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
 444 static void compute_transp (const_rtx, int, sbitmap *);
 445 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
 446                                       struct hash_table_d *);
 447 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
 448 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
 449 static void canon_list_insert (rtx, const_rtx, void *);
 450 static void alloc_pre_mem (int, int);
 451 static void free_pre_mem (void);
 452 static struct edge_list *compute_pre_data (void);
 453 static int pre_expr_reaches_here_p (basic_block, struct expr *,
 454                                     basic_block);
 455 static void insert_insn_end_basic_block (struct expr *, basic_block);
 456 static void pre_insert_copy_insn (struct expr *, rtx);
 457 static void pre_insert_copies (void);
 458 static int pre_delete (void);
 459 static int pre_gcse (struct edge_list *);
 460 static int one_pre_gcse_pass (void);
 461 static void add_label_notes (rtx, rtx);
 462 static void alloc_code_hoist_mem (int, int);
 463 static void free_code_hoist_mem (void);
 464 static void compute_code_hoist_vbeinout (void);
 465 static void compute_code_hoist_data (void);
 466 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
 467                                       int, int *);
 468 static int hoist_code (void);
 469 static int one_code_hoisting_pass (void);
 470 static rtx process_insert_insn (struct expr *);
 471 static int pre_edge_insert (struct edge_list *, struct expr **);
 472 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
 473                                          basic_block, char *);
 474 static struct ls_expr * ldst_entry (rtx);
 475 static void free_ldst_entry (struct ls_expr *);
 476 static void free_ld_motion_mems (void);
 477 static void print_ldst_list (FILE *);
 478 static struct ls_expr * find_rtx_in_ldst (rtx);
 479 static int simple_mem (const_rtx);
 480 static void invalidate_any_buried_refs (rtx);
 481 static void compute_ld_motion_mems (void);
 482 static void trim_ld_motion_mems (void);
 483 static void update_ld_motion_stores (struct expr *);
 484 static void clear_modify_mem_tables (void);
 485 static void free_modify_mem_tables (void);
 486 static rtx gcse_emit_move_after (rtx, rtx, rtx);
 487 static bool is_too_expensive (const char *);
 488 static unsigned find_loop_lsm_limit (struct loop*);
 489 static void adjust_loop_lsm_limit (struct loop*, unsigned);
 490 static void init_loop_lsm_limit (void);
 491 static void fini_loop_lsm_limit (void);
 492
 493 #define GNEW(T)                 ((T *) gmalloc (sizeof (T)))
 494 #define GCNEW(T)                ((T *) gcalloc (1, sizeof (T)))
 495
 496 #define GNEWVEC(T, N)           ((T *) gmalloc (sizeof (T) * (N)))
 497 #define GCNEWVEC(T, N)          ((T *) gcalloc ((N), sizeof (T)))
 498
 499 #define GNEWVAR(T, S)           ((T *) gmalloc ((S)))
 500 #define GCNEWVAR(T, S)          ((T *) gcalloc (1, (S)))
 501
 502 #define GOBNEW(T)               ((T *) gcse_alloc (sizeof (T)))
 503 #define GOBNEWVAR(T, S)         ((T *) gcse_alloc ((S)))
 504 \f
 505 /* Misc. utilities.  */
 506
 507 #define can_copy \
 508   (this_target_gcse->x_can_copy)
 509 #define can_copy_init_p \
 510   (this_target_gcse->x_can_copy_init_p)
 511
 512 /* Compute which modes support reg/reg copy operations.  */
 513
 514 static void
 515 compute_can_copy (void)
 516 {
 517   int i;
 518 #ifndef AVOID_CCMODE_COPIES
 519   rtx reg, insn;
 520 #endif
 521   memset (can_copy, 0, NUM_MACHINE_MODES);
 522
 523   start_sequence ();
 524   for (i = 0; i < NUM_MACHINE_MODES; i++)
 525     if (GET_MODE_CLASS (i) == MODE_CC)
 526       {
 527 #ifdef AVOID_CCMODE_COPIES
 528         can_copy[i] = 0;
 529 #else
 530         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 531         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 532         if (recog (PATTERN (insn), insn, NULL) >= 0)
 533           can_copy[i] = 1;
 534 #endif
 535       }
 536     else
 537       can_copy[i] = 1;
 538
 539   end_sequence ();
 540 }
 541
 542 /* Returns whether the mode supports reg/reg copy operations.  */
 543
 544 bool
 545 can_copy_p (enum machine_mode mode)
 546 {
 547   if (! can_copy_init_p)
 548     {
 549       compute_can_copy ();
 550       can_copy_init_p = true;
 551     }
 552
 553   return can_copy[mode] != 0;
 554 }
 555 \f
 556 /* Cover function to xmalloc to record bytes allocated.  */
 557
 558 static void *
 559 gmalloc (size_t size)
 560 {
 561   bytes_used += size;
 562   return xmalloc (size);
 563 }
 564
 565 /* Cover function to xcalloc to record bytes allocated.  */
 566
 567 static void *
 568 gcalloc (size_t nelem, size_t elsize)
 569 {
 570   bytes_used += nelem * elsize;
 571   return xcalloc (nelem, elsize);
 572 }
 573
 574 /* Cover function to obstack_alloc.  */
 575
 576 static void *
 577 gcse_alloc (unsigned long size)
 578 {
 579   bytes_used += size;
 580   return obstack_alloc (&gcse_obstack, size);
 581 }
 582
 583 /* Allocate memory for the reg/memory set tracking tables.
 584    This is called at the start of each pass.  */
 585
 586 static void
 587 alloc_gcse_mem (void)
 588 {
 589   /* Allocate vars to track sets of regs.  */
 590   reg_set_bitmap = ALLOC_REG_SET (NULL);
 591
 592   /* Allocate array to keep a list of insns which modify memory in each
 593      basic block.  */
 594   modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
 595   canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
 596                                     last_basic_block);
 597   modify_mem_list_set = BITMAP_ALLOC (NULL);
 598   blocks_with_calls = BITMAP_ALLOC (NULL);
 599 }
 600
 601 /* Free memory allocated by alloc_gcse_mem.  */
 602
 603 static void
 604 free_gcse_mem (void)
 605 {
 606   FREE_REG_SET (reg_set_bitmap);
 607
 608   free_modify_mem_tables ();
 609   BITMAP_FREE (modify_mem_list_set);
 610   BITMAP_FREE (blocks_with_calls);
 611 }
 612 \f
 613 /* Compute the local properties of each recorded expression.
 614
 615    Local properties are those that are defined by the block, irrespective of
 616    other blocks.
 617
 618    An expression is transparent in a block if its operands are not modified
 619    in the block.
 620
 621    An expression is computed (locally available) in a block if it is computed
 622    at least once and expression would contain the same value if the
 623    computation was moved to the end of the block.
 624
 625    An expression is locally anticipatable in a block if it is computed at
 626    least once and expression would contain the same value if the computation
 627    was moved to the beginning of the block.
 628
 629    We call this routine for pre and code hoisting.  They all compute
 630    basically the same information and thus can easily share this code.
 631
 632    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
 633    properties.  If NULL, then it is not necessary to compute or record that
 634    particular property.
 635
 636    TABLE controls which hash table to look at.  */
 637
 638 static void
 639 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
 640                           struct hash_table_d *table)
 641 {
 642   unsigned int i;
 643
 644   /* Initialize any bitmaps that were passed in.  */
 645   if (transp)
 646     {
 647       sbitmap_vector_ones (transp, last_basic_block);
 648     }
 649
 650   if (comp)
 651     sbitmap_vector_zero (comp, last_basic_block);
 652   if (antloc)
 653     sbitmap_vector_zero (antloc, last_basic_block);
 654
 655   for (i = 0; i < table->size; i++)
 656     {
 657       struct expr *expr;
 658
 659       for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
 660         {
 661           int indx = expr->bitmap_index;
 662           struct occr *occr;
 663
 664           /* The expression is transparent in this block if it is not killed.
 665              We start by assuming all are transparent [none are killed], and
 666              then reset the bits for those that are.  */
 667           if (transp)
 668             compute_transp (expr->expr, indx, transp);
 669
 670           /* The occurrences recorded in antic_occr are exactly those that
 671              we want to set to nonzero in ANTLOC.  */
 672           if (antloc)
 673             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
 674               {
 675                 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
 676
 677                 /* While we're scanning the table, this is a good place to
 678                    initialize this.  */
 679                 occr->deleted_p = 0;
 680               }
 681
 682           /* The occurrences recorded in avail_occr are exactly those that
 683              we want to set to nonzero in COMP.  */
 684           if (comp)
 685             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
 686               {
 687                 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
 688
 689                 /* While we're scanning the table, this is a good place to
 690                    initialize this.  */
 691                 occr->copied_p = 0;
 692               }
 693
 694           /* While we're scanning the table, this is a good place to
 695              initialize this.  */
 696           expr->reaching_reg = 0;
 697         }
 698     }
 699 }
 700 \f
 701 /* Hash table support.  */
 702
 703 struct reg_avail_info
 704 {
 705   basic_block last_bb;
 706   int first_set;
 707   int last_set;
 708 };
 709
 710 static struct reg_avail_info *reg_avail_info;
 711 static basic_block current_bb;
 712
 713 /* See whether X, the source of a set, is something we want to consider for
 714    GCSE.  */
 715
 716 static int
 717 want_to_gcse_p (rtx x, int *max_distance_ptr)
 718 {
 719 #ifdef STACK_REGS
 720   /* On register stack architectures, don't GCSE constants from the
 721      constant pool, as the benefits are often swamped by the overhead
 722      of shuffling the register stack between basic blocks.  */
 723   if (IS_STACK_MODE (GET_MODE (x)))
 724     x = avoid_constant_pool_reference (x);
 725 #endif
 726
 727   /* GCSE'ing constants:
 728
 729      We do not specifically distinguish between constant and non-constant
 730      expressions in PRE and Hoist.  We use set_src_cost below to limit
 731      the maximum distance simple expressions can travel.
 732
 733      Nevertheless, constants are much easier to GCSE, and, hence,
 734      it is easy to overdo the optimizations.  Usually, excessive PRE and
 735      Hoisting of constant leads to increased register pressure.
 736
 737      RA can deal with this by rematerialing some of the constants.
 738      Therefore, it is important that the back-end generates sets of constants
 739      in a way that allows reload rematerialize them under high register
 740      pressure, i.e., a pseudo register with REG_EQUAL to constant
 741      is set only once.  Failing to do so will result in IRA/reload
 742      spilling such constants under high register pressure instead of
 743      rematerializing them.  */
 744
 745   switch (GET_CODE (x))
 746     {
 747     case REG:
 748     case SUBREG:
 749     case CALL:
 750       return 0;
 751
 752     case CONST_INT:
 753     case CONST_DOUBLE:
 754     case CONST_FIXED:
 755     case CONST_VECTOR:
 756       if (!doing_code_hoisting_p)
 757         /* Do not PRE constants.  */
 758         return 0;
 759
 760       /* FALLTHRU */
 761
 762     default:
 763       if (doing_code_hoisting_p)
 764         /* PRE doesn't implement max_distance restriction.  */
 765         {
 766           int cost;
 767           int max_distance;
 768
 769           gcc_assert (!optimize_function_for_speed_p (cfun)
 770                       && optimize_function_for_size_p (cfun));
 771           cost = set_src_cost (x, 0);
 772
 773           if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
 774             {
 775               max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
 776               if (max_distance == 0)
 777                 return 0;
 778
 779               gcc_assert (max_distance > 0);
 780             }
 781           else
 782             max_distance = 0;
 783
 784           if (max_distance_ptr)
 785             *max_distance_ptr = max_distance;
 786         }
 787
 788       return can_assign_to_reg_without_clobbers_p (x);
 789     }
 790 }
 791
 792 /* Used internally by can_assign_to_reg_without_clobbers_p.  */
 793
 794 static GTY(()) rtx test_insn;
 795
 796 /* Return true if we can assign X to a pseudo register such that the
 797    resulting insn does not result in clobbering a hard register as a
 798    side-effect.
 799
 800    Additionally, if the target requires it, check that the resulting insn
 801    can be copied.  If it cannot, this means that X is special and probably
 802    has hidden side-effects we don't want to mess with.
 803
 804    This function is typically used by code motion passes, to verify
 805    that it is safe to insert an insn without worrying about clobbering
 806    maybe live hard regs.  */
 807
 808 bool
 809 can_assign_to_reg_without_clobbers_p (rtx x)
 810 {
 811   int num_clobbers = 0;
 812   int icode;
 813
 814   /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
 815   if (general_operand (x, GET_MODE (x)))
 816     return 1;
 817   else if (GET_MODE (x) == VOIDmode)
 818     return 0;
 819
 820   /* Otherwise, check if we can make a valid insn from it.  First initialize
 821      our test insn if we haven't already.  */
 822   if (test_insn == 0)
 823     {
 824       test_insn
 825         = make_insn_raw (gen_rtx_SET (VOIDmode,
 826                                       gen_rtx_REG (word_mode,
 827                                                    FIRST_PSEUDO_REGISTER * 2),
 828                                       const0_rtx));
 829       NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
 830     }
 831
 832   /* Now make an insn like the one we would make when GCSE'ing and see if
 833      valid.  */
 834   PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
 835   SET_SRC (PATTERN (test_insn)) = x;
 836
 837   icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
 838   if (icode < 0)
 839     return false;
 840
 841   if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
 842     return false;
 843
 844   if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
 845     return false;
 846
 847   return true;
 848 }
 849
 850 /* Return nonzero if the operands of expression X are unchanged from the
 851    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
 852    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
 853
 854 static int
 855 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
 856 {
 857   int i, j;
 858   enum rtx_code code;
 859   const char *fmt;
 860
 861   if (x == 0)
 862     return 1;
 863
 864   code = GET_CODE (x);
 865   switch (code)
 866     {
 867     case REG:
 868       {
 869         struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
 870
 871         if (info->last_bb != current_bb)
 872           return 1;
 873         if (avail_p)
 874           return info->last_set < DF_INSN_LUID (insn);
 875         else
 876           return info->first_set >= DF_INSN_LUID (insn);
 877       }
 878
 879     case MEM:
 880       if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
 881                                   x, avail_p))
 882         return 0;
 883       else
 884         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
 885
 886     case PRE_DEC:
 887     case PRE_INC:
 888     case POST_DEC:
 889     case POST_INC:
 890     case PRE_MODIFY:
 891     case POST_MODIFY:
 892       return 0;
 893
 894     case PC:
 895     case CC0: /*FIXME*/
 896     case CONST:
 897     case CONST_INT:
 898     case CONST_DOUBLE:
 899     case CONST_FIXED:
 900     case CONST_VECTOR:
 901     case SYMBOL_REF:
 902     case LABEL_REF:
 903     case ADDR_VEC:
 904     case ADDR_DIFF_VEC:
 905       return 1;
 906
 907     default:
 908       break;
 909     }
 910
 911   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
 912     {
 913       if (fmt[i] == 'e')
 914         {
 915           /* If we are about to do the last recursive call needed at this
 916              level, change it into iteration.  This function is called enough
 917              to be worth it.  */
 918           if (i == 0)
 919             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
 920
 921           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
 922             return 0;
 923         }
 924       else if (fmt[i] == 'E')
 925         for (j = 0; j < XVECLEN (x, i); j++)
 926           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
 927             return 0;
 928     }
 929
 930   return 1;
 931 }
 932
 933 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p.  */
 934
 935 struct mem_conflict_info
 936 {
 937   /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
 938      see if a memory store conflicts with this memory load.  */
 939   const_rtx mem;
 940
 941   /* True if mems_conflict_for_gcse_p finds a conflict between two memory
 942      references.  */
 943   bool conflict;
 944 };
 945
 946 /* DEST is the output of an instruction.  If it is a memory reference and
 947    possibly conflicts with the load found in DATA, then communicate this
 948    information back through DATA.  */
 949
 950 static void
 951 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
 952                           void *data)
 953 {
 954   struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
 955
 956   while (GET_CODE (dest) == SUBREG
 957          || GET_CODE (dest) == ZERO_EXTRACT
 958          || GET_CODE (dest) == STRICT_LOW_PART)
 959     dest = XEXP (dest, 0);
 960
 961   /* If DEST is not a MEM, then it will not conflict with the load.  Note
 962      that function calls are assumed to clobber memory, but are handled
 963      elsewhere.  */
 964   if (! MEM_P (dest))
 965     return;
 966
 967   /* If we are setting a MEM in our list of specially recognized MEMs,
 968      don't mark as killed this time.  */
 969   if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
 970     {
 971       if (!find_rtx_in_ldst (dest))
 972         mci->conflict = true;
 973       return;
 974     }
 975
 976   if (true_dependence (dest, GET_MODE (dest), mci->mem))
 977     mci->conflict = true;
 978 }
 979
 980 /* Return nonzero if the expression in X (a memory reference) is killed
 981    in block BB before or after the insn with the LUID in UID_LIMIT.
 982    AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
 983    before UID_LIMIT.
 984
 985    To check the entire block, set UID_LIMIT to max_uid + 1 and
 986    AVAIL_P to 0.  */
 987
 988 static int
 989 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
 990                         int avail_p)
 991 {
 992   VEC (rtx,heap) *list = modify_mem_list[bb->index];
 993   rtx setter;
 994   unsigned ix;
 995
 996   /* If this is a readonly then we aren't going to be changing it.  */
 997   if (MEM_READONLY_P (x))
 998     return 0;
 999
1000   FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
1001     {
1002       struct mem_conflict_info mci;
1003
1004       /* Ignore entries in the list that do not apply.  */
1005       if ((avail_p
1006            && DF_INSN_LUID (setter) < uid_limit)
1007           || (! avail_p
1008               && DF_INSN_LUID (setter) > uid_limit))
1009         continue;
1010
1011       /* If SETTER is a call everything is clobbered.  Note that calls
1012          to pure functions are never put on the list, so we need not
1013          worry about them.  */
1014       if (CALL_P (setter))
1015         return 1;
1016
1017       /* SETTER must be an INSN of some kind that sets memory.  Call
1018          note_stores to examine each hunk of memory that is modified.  */
1019       mci.mem = x;
1020       mci.conflict = false;
1021       note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1022       if (mci.conflict)
1023         return 1;
1024     }
1025   return 0;
1026 }
1027
1028 /* Return nonzero if the operands of expression X are unchanged from
1029    the start of INSN's basic block up to but not including INSN.  */
1030
1031 static int
1032 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1033 {
1034   return oprs_unchanged_p (x, insn, 0);
1035 }
1036
1037 /* Return nonzero if the operands of expression X are unchanged from
1038    INSN to the end of INSN's basic block.  */
1039
1040 static int
1041 oprs_available_p (const_rtx x, const_rtx insn)
1042 {
1043   return oprs_unchanged_p (x, insn, 1);
1044 }
1045
1046 /* Hash expression X.
1047
1048    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1049    indicating if a volatile operand is found or if the expression contains
1050    something we don't want to insert in the table.  HASH_TABLE_SIZE is
1051    the current size of the hash table to be probed.  */
1052
1053 static unsigned int
1054 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1055            int hash_table_size)
1056 {
1057   unsigned int hash;
1058
1059   *do_not_record_p = 0;
1060
1061   hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1062   return hash % hash_table_size;
1063 }
1064
1065 /* Return nonzero if exp1 is equivalent to exp2.  */
1066
1067 static int
1068 expr_equiv_p (const_rtx x, const_rtx y)
1069 {
1070   return exp_equiv_p (x, y, 0, true);
1071 }
1072
1073 /* Insert expression X in INSN in the hash TABLE.
1074    If it is already present, record it as the last occurrence in INSN's
1075    basic block.
1076
1077    MODE is the mode of the value X is being stored into.
1078    It is only used if X is a CONST_INT.
1079
1080    ANTIC_P is nonzero if X is an anticipatable expression.
1081    AVAIL_P is nonzero if X is an available expression.
1082
1083    MAX_DISTANCE is the maximum distance in instructions this expression can
1084    be moved.  */
1085
1086 static void
1087 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1088                       int avail_p, int max_distance, struct hash_table_d *table)
1089 {
1090   int found, do_not_record_p;
1091   unsigned int hash;
1092   struct expr *cur_expr, *last_expr = NULL;
1093   struct occr *antic_occr, *avail_occr;
1094
1095   hash = hash_expr (x, mode, &do_not_record_p, table->size);
1096
1097   /* Do not insert expression in table if it contains volatile operands,
1098      or if hash_expr determines the expression is something we don't want
1099      to or can't handle.  */
1100   if (do_not_record_p)
1101     return;
1102
1103   cur_expr = table->table[hash];
1104   found = 0;
1105
1106   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1107     {
1108       /* If the expression isn't found, save a pointer to the end of
1109          the list.  */
1110       last_expr = cur_expr;
1111       cur_expr = cur_expr->next_same_hash;
1112     }
1113
1114   if (! found)
1115     {
1116       cur_expr = GOBNEW (struct expr);
1117       bytes_used += sizeof (struct expr);
1118       if (table->table[hash] == NULL)
1119         /* This is the first pattern that hashed to this index.  */
1120         table->table[hash] = cur_expr;
1121       else
1122         /* Add EXPR to end of this hash chain.  */
1123         last_expr->next_same_hash = cur_expr;
1124
1125       /* Set the fields of the expr element.  */
1126       cur_expr->expr = x;
1127       cur_expr->bitmap_index = table->n_elems++;
1128       cur_expr->next_same_hash = NULL;
1129       cur_expr->antic_occr = NULL;
1130       cur_expr->avail_occr = NULL;
1131       gcc_assert (max_distance >= 0);
1132       cur_expr->max_distance = max_distance;
1133     }
1134   else
1135     gcc_assert (cur_expr->max_distance == max_distance);
1136
1137   /* Now record the occurrence(s).  */
1138   if (antic_p)
1139     {
1140       antic_occr = cur_expr->antic_occr;
1141
1142       if (antic_occr
1143           && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1144         antic_occr = NULL;
1145
1146       if (antic_occr)
1147         /* Found another instance of the expression in the same basic block.
1148            Prefer the currently recorded one.  We want the first one in the
1149            block and the block is scanned from start to end.  */
1150         ; /* nothing to do */
1151       else
1152         {
1153           /* First occurrence of this expression in this basic block.  */
1154           antic_occr = GOBNEW (struct occr);
1155           bytes_used += sizeof (struct occr);
1156           antic_occr->insn = insn;
1157           antic_occr->next = cur_expr->antic_occr;
1158           antic_occr->deleted_p = 0;
1159           cur_expr->antic_occr = antic_occr;
1160         }
1161     }
1162
1163   if (avail_p)
1164     {
1165       avail_occr = cur_expr->avail_occr;
1166
1167       if (avail_occr
1168           && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1169         {
1170           /* Found another instance of the expression in the same basic block.
1171              Prefer this occurrence to the currently recorded one.  We want
1172              the last one in the block and the block is scanned from start
1173              to end.  */
1174           avail_occr->insn = insn;
1175         }
1176       else
1177         {
1178           /* First occurrence of this expression in this basic block.  */
1179           avail_occr = GOBNEW (struct occr);
1180           bytes_used += sizeof (struct occr);
1181           avail_occr->insn = insn;
1182           avail_occr->next = cur_expr->avail_occr;
1183           avail_occr->deleted_p = 0;
1184           cur_expr->avail_occr = avail_occr;
1185         }
1186     }
1187 }
1188
1189 /* Scan SET present in INSN and add an entry to the hash TABLE.  */
1190
1191 static void
1192 hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
1193 {
1194   rtx src = SET_SRC (set);
1195   rtx dest = SET_DEST (set);
1196   rtx note;
1197
1198   if (GET_CODE (src) == CALL)
1199     hash_scan_call (src, insn, table);
1200
1201   else if (REG_P (dest))
1202     {
1203       unsigned int regno = REGNO (dest);
1204       int max_distance = 0;
1205
1206       /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1207
1208          This allows us to do a single GCSE pass and still eliminate
1209          redundant constants, addresses or other expressions that are
1210          constructed with multiple instructions.
1211
1212          However, keep the original SRC if INSN is a simple reg-reg move.
1213          In this case, there will almost always be a REG_EQUAL note on the
1214          insn that sets SRC.  By recording the REG_EQUAL value here as SRC
1215          for INSN, we miss copy propagation opportunities and we perform the
1216          same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1217          do more than one PRE GCSE pass.
1218
1219          Note that this does not impede profitable constant propagations.  We
1220          "look through" reg-reg sets in lookup_avail_set.  */
1221       note = find_reg_equal_equiv_note (insn);
1222       if (note != 0
1223           && REG_NOTE_KIND (note) == REG_EQUAL
1224           && !REG_P (src)
1225           && want_to_gcse_p (XEXP (note, 0), NULL))
1226         src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1227
1228       /* Only record sets of pseudo-regs in the hash table.  */
1229       if (regno >= FIRST_PSEUDO_REGISTER
1230           /* Don't GCSE something if we can't do a reg/reg copy.  */
1231           && can_copy_p (GET_MODE (dest))
1232           /* GCSE commonly inserts instruction after the insn.  We can't
1233              do that easily for EH edges so disable GCSE on these for now.  */
1234           /* ??? We can now easily create new EH landing pads at the
1235              gimple level, for splitting edges; there's no reason we
1236              can't do the same thing at the rtl level.  */
1237           && !can_throw_internal (insn)
1238           /* Is SET_SRC something we want to gcse?  */
1239           && want_to_gcse_p (src, &max_distance)
1240           /* Don't CSE a nop.  */
1241           && ! set_noop_p (set)
1242           /* Don't GCSE if it has attached REG_EQUIV note.
1243              At this point this only function parameters should have
1244              REG_EQUIV notes and if the argument slot is used somewhere
1245              explicitly, it means address of parameter has been taken,
1246              so we should not extend the lifetime of the pseudo.  */
1247           && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1248         {
1249           /* An expression is not anticipatable if its operands are
1250              modified before this insn or if this is not the only SET in
1251              this insn.  The latter condition does not have to mean that
1252              SRC itself is not anticipatable, but we just will not be
1253              able to handle code motion of insns with multiple sets.  */
1254           int antic_p = oprs_anticipatable_p (src, insn)
1255                         && !multiple_sets (insn);
1256           /* An expression is not available if its operands are
1257              subsequently modified, including this insn.  It's also not
1258              available if this is a branch, because we can't insert
1259              a set after the branch.  */
1260           int avail_p = (oprs_available_p (src, insn)
1261                          && ! JUMP_P (insn));
1262
1263           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1264                                 max_distance, table);
1265         }
1266     }
1267   /* In case of store we want to consider the memory value as available in
1268      the REG stored in that memory. This makes it possible to remove
1269      redundant loads from due to stores to the same location.  */
1270   else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1271       {
1272         unsigned int regno = REGNO (src);
1273         int max_distance = 0;
1274
1275         /* Only record sets of pseudo-regs in the hash table.  */
1276         if (regno >= FIRST_PSEUDO_REGISTER
1277            /* Don't GCSE something if we can't do a reg/reg copy.  */
1278            && can_copy_p (GET_MODE (src))
1279            /* GCSE commonly inserts instruction after the insn.  We can't
1280               do that easily for EH edges so disable GCSE on these for now.  */
1281            && !can_throw_internal (insn)
1282            /* Is SET_DEST something we want to gcse?  */
1283            && want_to_gcse_p (dest, &max_distance)
1284            /* Don't CSE a nop.  */
1285            && ! set_noop_p (set)
1286            /* Don't GCSE if it has attached REG_EQUIV note.
1287               At this point this only function parameters should have
1288               REG_EQUIV notes and if the argument slot is used somewhere
1289               explicitly, it means address of parameter has been taken,
1290               so we should not extend the lifetime of the pseudo.  */
1291            && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1292                || ! MEM_P (XEXP (note, 0))))
1293              {
1294                /* Stores are never anticipatable.  */
1295                int antic_p = 0;
1296                /* An expression is not available if its operands are
1297                   subsequently modified, including this insn.  It's also not
1298                   available if this is a branch, because we can't insert
1299                   a set after the branch.  */
1300                int avail_p = oprs_available_p (dest, insn)
1301                              && ! JUMP_P (insn);
1302
1303                /* Record the memory expression (DEST) in the hash table.  */
1304                insert_expr_in_table (dest, GET_MODE (dest), insn,
1305                                      antic_p, avail_p, max_distance, table);
1306              }
1307       }
1308 }
1309
1310 static void
1311 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1312                    struct hash_table_d *table ATTRIBUTE_UNUSED)
1313 {
1314   /* Currently nothing to do.  */
1315 }
1316
1317 static void
1318 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1319                 struct hash_table_d *table ATTRIBUTE_UNUSED)
1320 {
1321   /* Currently nothing to do.  */
1322 }
1323
1324 /* Process INSN and add hash table entries as appropriate.  */
1325
1326 static void
1327 hash_scan_insn (rtx insn, struct hash_table_d *table)
1328 {
1329   rtx pat = PATTERN (insn);
1330   int i;
1331
1332   /* Pick out the sets of INSN and for other forms of instructions record
1333      what's been modified.  */
1334
1335   if (GET_CODE (pat) == SET)
1336     hash_scan_set (pat, insn, table);
1337
1338   else if (GET_CODE (pat) == CLOBBER)
1339     hash_scan_clobber (pat, insn, table);
1340
1341   else if (GET_CODE (pat) == CALL)
1342     hash_scan_call (pat, insn, table);
1343
1344   else if (GET_CODE (pat) == PARALLEL)
1345     for (i = 0; i < XVECLEN (pat, 0); i++)
1346       {
1347         rtx x = XVECEXP (pat, 0, i);
1348
1349         if (GET_CODE (x) == SET)
1350           hash_scan_set (x, insn, table);
1351         else if (GET_CODE (x) == CLOBBER)
1352           hash_scan_clobber (x, insn, table);
1353         else if (GET_CODE (x) == CALL)
1354           hash_scan_call (x, insn, table);
1355       }
1356 }
1357
1358 /* Dump the hash table TABLE to file FILE under the name NAME.  */
1359
1360 static void
1361 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1362 {
1363   int i;
1364   /* Flattened out table, so it's printed in proper order.  */
1365   struct expr **flat_table;
1366   unsigned int *hash_val;
1367   struct expr *expr;
1368
1369   flat_table = XCNEWVEC (struct expr *, table->n_elems);
1370   hash_val = XNEWVEC (unsigned int, table->n_elems);
1371
1372   for (i = 0; i < (int) table->size; i++)
1373     for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1374       {
1375         flat_table[expr->bitmap_index] = expr;
1376         hash_val[expr->bitmap_index] = i;
1377       }
1378
1379   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1380            name, table->size, table->n_elems);
1381
1382   for (i = 0; i < (int) table->n_elems; i++)
1383     if (flat_table[i] != 0)
1384       {
1385         expr = flat_table[i];
1386         fprintf (file, "Index %d (hash value %d; max distance %d)\n  ",
1387                  expr->bitmap_index, hash_val[i], expr->max_distance);
1388         print_rtl (file, expr->expr);
1389         fprintf (file, "\n");
1390       }
1391
1392   fprintf (file, "\n");
1393
1394   free (flat_table);
1395   free (hash_val);
1396 }
1397
1398 /* Record register first/last/block set information for REGNO in INSN.
1399
1400    first_set records the first place in the block where the register
1401    is set and is used to compute "anticipatability".
1402
1403    last_set records the last place in the block where the register
1404    is set and is used to compute "availability".
1405
1406    last_bb records the block for which first_set and last_set are
1407    valid, as a quick test to invalidate them.  */
1408
1409 static void
1410 record_last_reg_set_info (rtx insn, int regno)
1411 {
1412   struct reg_avail_info *info = &reg_avail_info[regno];
1413   int luid = DF_INSN_LUID (insn);
1414
1415   info->last_set = luid;
1416   if (info->last_bb != current_bb)
1417     {
1418       info->last_bb = current_bb;
1419       info->first_set = luid;
1420     }
1421 }
1422
1423 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1424    Note we store a pair of elements in the list, so they have to be
1425    taken off pairwise.  */
1426
1427 static void
1428 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1429                    void * v_insn)
1430 {
1431   rtx dest_addr, insn;
1432   int bb;
1433   modify_pair *pair;
1434
1435   while (GET_CODE (dest) == SUBREG
1436       || GET_CODE (dest) == ZERO_EXTRACT
1437       || GET_CODE (dest) == STRICT_LOW_PART)
1438     dest = XEXP (dest, 0);
1439
1440   /* If DEST is not a MEM, then it will not conflict with a load.  Note
1441      that function calls are assumed to clobber memory, but are handled
1442      elsewhere.  */
1443
1444   if (! MEM_P (dest))
1445     return;
1446
1447   dest_addr = get_addr (XEXP (dest, 0));
1448   dest_addr = canon_rtx (dest_addr);
1449   insn = (rtx) v_insn;
1450   bb = BLOCK_FOR_INSN (insn)->index;
1451
1452   pair = VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], NULL);
1453   pair->dest = dest;
1454   pair->dest_addr = dest_addr;
1455 }
1456
1457 /* Record memory modification information for INSN.  We do not actually care
1458    about the memory location(s) that are set, or even how they are set (consider
1459    a CALL_INSN).  We merely need to record which insns modify memory.  */
1460
1461 static void
1462 record_last_mem_set_info (rtx insn)
1463 {
1464   int bb = BLOCK_FOR_INSN (insn)->index;
1465
1466   /* load_killed_in_block_p will handle the case of calls clobbering
1467      everything.  */
1468   VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1469   bitmap_set_bit (modify_mem_list_set, bb);
1470
1471   if (CALL_P (insn))
1472     bitmap_set_bit (blocks_with_calls, bb);
1473   else
1474     note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1475 }
1476
1477 /* Called from compute_hash_table via note_stores to handle one
1478    SET or CLOBBER in an insn.  DATA is really the instruction in which
1479    the SET is taking place.  */
1480
1481 static void
1482 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1483 {
1484   rtx last_set_insn = (rtx) data;
1485
1486   if (GET_CODE (dest) == SUBREG)
1487     dest = SUBREG_REG (dest);
1488
1489   if (REG_P (dest))
1490     record_last_reg_set_info (last_set_insn, REGNO (dest));
1491   else if (MEM_P (dest)
1492            /* Ignore pushes, they clobber nothing.  */
1493            && ! push_operand (dest, GET_MODE (dest)))
1494     record_last_mem_set_info (last_set_insn);
1495 }
1496
1497 /* Top level function to create an expression hash table.
1498
1499    Expression entries are placed in the hash table if
1500    - they are of the form (set (pseudo-reg) src),
1501    - src is something we want to perform GCSE on,
1502    - none of the operands are subsequently modified in the block
1503
1504    Currently src must be a pseudo-reg or a const_int.
1505
1506    TABLE is the table computed.  */
1507
1508 static void
1509 compute_hash_table_work (struct hash_table_d *table)
1510 {
1511   int i;
1512
1513   /* re-Cache any INSN_LIST nodes we have allocated.  */
1514   clear_modify_mem_tables ();
1515   /* Some working arrays used to track first and last set in each block.  */
1516   reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1517
1518   for (i = 0; i < max_reg_num (); ++i)
1519     reg_avail_info[i].last_bb = NULL;
1520
1521   FOR_EACH_BB (current_bb)
1522     {
1523       rtx insn;
1524       unsigned int regno;
1525
1526       /* First pass over the instructions records information used to
1527          determine when registers and memory are first and last set.  */
1528       FOR_BB_INSNS (current_bb, insn)
1529         {
1530           if (!NONDEBUG_INSN_P (insn))
1531             continue;
1532
1533           if (CALL_P (insn))
1534             {
1535               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1536                 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1537                   record_last_reg_set_info (insn, regno);
1538
1539               if (! RTL_CONST_OR_PURE_CALL_P (insn))
1540                 record_last_mem_set_info (insn);
1541             }
1542
1543           note_stores (PATTERN (insn), record_last_set_info, insn);
1544         }
1545
1546       /* The next pass builds the hash table.  */
1547       FOR_BB_INSNS (current_bb, insn)
1548         if (NONDEBUG_INSN_P (insn))
1549           hash_scan_insn (insn, table);
1550     }
1551
1552   free (reg_avail_info);
1553   reg_avail_info = NULL;
1554 }
1555
1556 /* Allocate space for the set/expr hash TABLE.
1557    It is used to determine the number of buckets to use.  */
1558
1559 static void
1560 alloc_hash_table (struct hash_table_d *table)
1561 {
1562   int n;
1563
1564   n = get_max_insn_count ();
1565
1566   table->size = n / 4;
1567   if (table->size < 11)
1568     table->size = 11;
1569
1570   /* Attempt to maintain efficient use of hash table.
1571      Making it an odd number is simplest for now.
1572      ??? Later take some measurements.  */
1573   table->size |= 1;
1574   n = table->size * sizeof (struct expr *);
1575   table->table = GNEWVAR (struct expr *, n);
1576 }
1577
1578 /* Free things allocated by alloc_hash_table.  */
1579
1580 static void
1581 free_hash_table (struct hash_table_d *table)
1582 {
1583   free (table->table);
1584 }
1585
1586 /* Compute the expression hash table TABLE.  */
1587
1588 static void
1589 compute_hash_table (struct hash_table_d *table)
1590 {
1591   /* Initialize count of number of entries in hash table.  */
1592   table->n_elems = 0;
1593   memset (table->table, 0, table->size * sizeof (struct expr *));
1594
1595   compute_hash_table_work (table);
1596 }
1597 \f
1598 /* Expression tracking support.  */
1599
1600 /* Clear canon_modify_mem_list and modify_mem_list tables.  */
1601 static void
1602 clear_modify_mem_tables (void)
1603 {
1604   unsigned i;
1605   bitmap_iterator bi;
1606
1607   EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1608     {
1609       VEC_free (rtx, heap, modify_mem_list[i]);
1610       VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1611     }
1612   bitmap_clear (modify_mem_list_set);
1613   bitmap_clear (blocks_with_calls);
1614 }
1615
1616 /* Release memory used by modify_mem_list_set.  */
1617
1618 static void
1619 free_modify_mem_tables (void)
1620 {
1621   clear_modify_mem_tables ();
1622   free (modify_mem_list);
1623   free (canon_modify_mem_list);
1624   modify_mem_list = 0;
1625   canon_modify_mem_list = 0;
1626 }
1627 \f
1628 /* For each block, compute whether X is transparent.  X is either an
1629    expression or an assignment [though we don't care which, for this context
1630    an assignment is treated as an expression].  For each block where an
1631    element of X is modified, reset the INDX bit in BMAP.  */
1632
1633 static void
1634 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1635 {
1636   int i, j;
1637   enum rtx_code code;
1638   const char *fmt;
1639
1640   /* repeat is used to turn tail-recursion into iteration since GCC
1641      can't do it when there's no return value.  */
1642  repeat:
1643
1644   if (x == 0)
1645     return;
1646
1647   code = GET_CODE (x);
1648   switch (code)
1649     {
1650     case REG:
1651         {
1652           df_ref def;
1653           for (def = DF_REG_DEF_CHAIN (REGNO (x));
1654                def;
1655                def = DF_REF_NEXT_REG (def))
1656             RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1657         }
1658
1659       return;
1660
1661     case MEM:
1662       if (! MEM_READONLY_P (x))
1663         {
1664           bitmap_iterator bi;
1665           unsigned bb_index;
1666           rtx x_addr;
1667
1668           x_addr = get_addr (XEXP (x, 0));
1669           x_addr = canon_rtx (x_addr);
1670
1671           /* First handle all the blocks with calls.  We don't need to
1672              do any list walking for them.  */
1673           EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1674             {
1675               RESET_BIT (bmap[bb_index], indx);
1676             }
1677
1678           /* Now iterate over the blocks which have memory modifications
1679              but which do not have any calls.  */
1680           EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1681                                           blocks_with_calls,
1682                                           0, bb_index, bi)
1683             {
1684               VEC (modify_pair,heap) *list
1685                 = canon_modify_mem_list[bb_index];
1686               modify_pair *pair;
1687               unsigned ix;
1688
1689               FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1690                 {
1691                   rtx dest = pair->dest;
1692                   rtx dest_addr = pair->dest_addr;
1693
1694                   if (canon_true_dependence (dest, GET_MODE (dest),
1695                                              dest_addr, x, x_addr))
1696                     RESET_BIT (bmap[bb_index], indx);
1697                 }
1698             }
1699         }
1700
1701       x = XEXP (x, 0);
1702       goto repeat;
1703
1704     case PC:
1705     case CC0: /*FIXME*/
1706     case CONST:
1707     case CONST_INT:
1708     case CONST_DOUBLE:
1709     case CONST_FIXED:
1710     case CONST_VECTOR:
1711     case SYMBOL_REF:
1712     case LABEL_REF:
1713     case ADDR_VEC:
1714     case ADDR_DIFF_VEC:
1715       return;
1716
1717     default:
1718       break;
1719     }
1720
1721   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1722     {
1723       if (fmt[i] == 'e')
1724         {
1725           /* If we are about to do the last recursive call
1726              needed at this level, change it into iteration.
1727              This function is called enough to be worth it.  */
1728           if (i == 0)
1729             {
1730               x = XEXP (x, i);
1731               goto repeat;
1732             }
1733
1734           compute_transp (XEXP (x, i), indx, bmap);
1735         }
1736       else if (fmt[i] == 'E')
1737         for (j = 0; j < XVECLEN (x, i); j++)
1738           compute_transp (XVECEXP (x, i, j), indx, bmap);
1739     }
1740 }
1741 \f
1742 /* Compute PRE+LCM working variables.  */
1743
1744 /* Local properties of expressions.  */
1745
1746 /* Nonzero for expressions that are transparent in the block.  */
1747 static sbitmap *transp;
1748
1749 /* Nonzero for expressions that are computed (available) in the block.  */
1750 static sbitmap *comp;
1751
1752 /* Nonzero for expressions that are locally anticipatable in the block.  */
1753 static sbitmap *antloc;
1754
1755 /* Nonzero for expressions where this block is an optimal computation
1756    point.  */
1757 static sbitmap *pre_optimal;
1758
1759 /* Nonzero for expressions which are redundant in a particular block.  */
1760 static sbitmap *pre_redundant;
1761
1762 /* Nonzero for expressions which should be inserted on a specific edge.  */
1763 static sbitmap *pre_insert_map;
1764
1765 /* Nonzero for expressions which should be deleted in a specific block.  */
1766 static sbitmap *pre_delete_map;
1767
1768 /* Allocate vars used for PRE analysis.  */
1769
1770 static void
1771 alloc_pre_mem (int n_blocks, int n_exprs)
1772 {
1773   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1774   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1775   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1776
1777   pre_optimal = NULL;
1778   pre_redundant = NULL;
1779   pre_insert_map = NULL;
1780   pre_delete_map = NULL;
1781   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1782
1783   /* pre_insert and pre_delete are allocated later.  */
1784 }
1785
1786 /* Free vars used for PRE analysis.  */
1787
1788 static void
1789 free_pre_mem (void)
1790 {
1791   sbitmap_vector_free (transp);
1792   sbitmap_vector_free (comp);
1793
1794   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
1795
1796   if (pre_optimal)
1797     sbitmap_vector_free (pre_optimal);
1798   if (pre_redundant)
1799     sbitmap_vector_free (pre_redundant);
1800   if (pre_insert_map)
1801     sbitmap_vector_free (pre_insert_map);
1802   if (pre_delete_map)
1803     sbitmap_vector_free (pre_delete_map);
1804
1805   transp = comp = NULL;
1806   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1807 }
1808
1809 /* Remove certain expressions from anticipatable and transparent
1810    sets of basic blocks that have incoming abnormal edge.
1811    For PRE remove potentially trapping expressions to avoid placing
1812    them on abnormal edges.  For hoisting remove memory references that
1813    can be clobbered by calls.  */
1814
1815 static void
1816 prune_expressions (bool pre_p)
1817 {
1818   sbitmap prune_exprs;
1819   struct expr *expr;
1820   unsigned int ui;
1821   basic_block bb;
1822
1823   prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1824   sbitmap_zero (prune_exprs);
1825   for (ui = 0; ui < expr_hash_table.size; ui++)
1826     {
1827       for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1828         {
1829           /* Note potentially trapping expressions.  */
1830           if (may_trap_p (expr->expr))
1831             {
1832               SET_BIT (prune_exprs, expr->bitmap_index);
1833               continue;
1834             }
1835
1836           if (!pre_p && MEM_P (expr->expr))
1837             /* Note memory references that can be clobbered by a call.
1838                We do not split abnormal edges in hoisting, so would
1839                a memory reference get hoisted along an abnormal edge,
1840                it would be placed /before/ the call.  Therefore, only
1841                constant memory references can be hoisted along abnormal
1842                edges.  */
1843             {
1844               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1845                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1846                 continue;
1847
1848               if (MEM_READONLY_P (expr->expr)
1849                   && !MEM_VOLATILE_P (expr->expr)
1850                   && MEM_NOTRAP_P (expr->expr))
1851                 /* Constant memory reference, e.g., a PIC address.  */
1852                 continue;
1853
1854               /* ??? Optimally, we would use interprocedural alias
1855                  analysis to determine if this mem is actually killed
1856                  by this call.  */
1857
1858               SET_BIT (prune_exprs, expr->bitmap_index);
1859             }
1860         }
1861     }
1862
1863   FOR_EACH_BB (bb)
1864     {
1865       edge e;
1866       edge_iterator ei;
1867
1868       /* If the current block is the destination of an abnormal edge, we
1869          kill all trapping (for PRE) and memory (for hoist) expressions
1870          because we won't be able to properly place the instruction on
1871          the edge.  So make them neither anticipatable nor transparent.
1872          This is fairly conservative.
1873
1874          ??? For hoisting it may be necessary to check for set-and-jump
1875          instructions here, not just for abnormal edges.  The general problem
1876          is that when an expression cannot not be placed right at the end of
1877          a basic block we should account for any side-effects of a subsequent
1878          jump instructions that could clobber the expression.  It would
1879          be best to implement this check along the lines of
1880          hoist_expr_reaches_here_p where the target block is already known
1881          and, hence, there's no need to conservatively prune expressions on
1882          "intermediate" set-and-jump instructions.  */
1883       FOR_EACH_EDGE (e, ei, bb->preds)
1884         if ((e->flags & EDGE_ABNORMAL)
1885             && (pre_p || CALL_P (BB_END (e->src))))
1886           {
1887             sbitmap_difference (antloc[bb->index],
1888                                 antloc[bb->index], prune_exprs);
1889             sbitmap_difference (transp[bb->index],
1890                                 transp[bb->index], prune_exprs);
1891             break;
1892           }
1893     }
1894
1895   sbitmap_free (prune_exprs);
1896 }
1897
1898 /* It may be necessary to insert a large number of insns on edges to
1899    make the existing occurrences of expressions fully redundant.  This
1900    routine examines the set of insertions and deletions and if the ratio
1901    of insertions to deletions is too high for a particular expression, then
1902    the expression is removed from the insertion/deletion sets.
1903
1904    N_ELEMS is the number of elements in the hash table.  */
1905
1906 static void
1907 prune_insertions_deletions (int n_elems)
1908 {
1909   sbitmap_iterator sbi;
1910   sbitmap prune_exprs;
1911
1912   /* We always use I to iterate over blocks/edges and J to iterate over
1913      expressions.  */
1914   unsigned int i, j;
1915
1916   /* Counts for the number of times an expression needs to be inserted and
1917      number of times an expression can be removed as a result.  */
1918   int *insertions = GCNEWVEC (int, n_elems);
1919   int *deletions = GCNEWVEC (int, n_elems);
1920
1921   /* Set of expressions which require too many insertions relative to
1922      the number of deletions achieved.  We will prune these out of the
1923      insertion/deletion sets.  */
1924   prune_exprs = sbitmap_alloc (n_elems);
1925   sbitmap_zero (prune_exprs);
1926
1927   /* Iterate over the edges counting the number of times each expression
1928      needs to be inserted.  */
1929   for (i = 0; i < (unsigned) n_edges; i++)
1930     {
1931       EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1932         insertions[j]++;
1933     }
1934
1935   /* Similarly for deletions, but those occur in blocks rather than on
1936      edges.  */
1937   for (i = 0; i < (unsigned) last_basic_block; i++)
1938     {
1939       EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1940         deletions[j]++;
1941     }
1942
1943   /* Now that we have accurate counts, iterate over the elements in the
1944      hash table and see if any need too many insertions relative to the
1945      number of evaluations that can be removed.  If so, mark them in
1946      PRUNE_EXPRS.  */
1947   for (j = 0; j < (unsigned) n_elems; j++)
1948     if (deletions[j]
1949         && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1950       SET_BIT (prune_exprs, j);
1951
1952   /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS.  */
1953   EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1954     {
1955       for (i = 0; i < (unsigned) n_edges; i++)
1956         RESET_BIT (pre_insert_map[i], j);
1957
1958       for (i = 0; i < (unsigned) last_basic_block; i++)
1959         RESET_BIT (pre_delete_map[i], j);
1960     }
1961
1962   sbitmap_free (prune_exprs);
1963   free (insertions);
1964   free (deletions);
1965 }
1966
1967 /* Top level routine to do the dataflow analysis needed by PRE.  */
1968
1969 static struct edge_list *
1970 compute_pre_data (void)
1971 {
1972   struct edge_list *edge_list;
1973   basic_block bb;
1974
1975   compute_local_properties (transp, comp, antloc, &expr_hash_table);
1976   prune_expressions (true);
1977   sbitmap_vector_zero (ae_kill, last_basic_block);
1978
1979   /* Compute ae_kill for each basic block using:
1980
1981      ~(TRANSP | COMP)
1982   */
1983
1984   FOR_EACH_BB (bb)
1985     {
1986       sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1987       sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1988     }
1989
1990   edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1991                             ae_kill, &pre_insert_map, &pre_delete_map);
1992   sbitmap_vector_free (antloc);
1993   antloc = NULL;
1994   sbitmap_vector_free (ae_kill);
1995   ae_kill = NULL;
1996
1997   prune_insertions_deletions (expr_hash_table.n_elems);
1998
1999   return edge_list;
2000 }
2001 \f
2002 /* PRE utilities */
2003
2004 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
2005    block BB.
2006
2007    VISITED is a pointer to a working buffer for tracking which BB's have
2008    been visited.  It is NULL for the top-level call.
2009
2010    We treat reaching expressions that go through blocks containing the same
2011    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2012    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2013    2 as not reaching.  The intent is to improve the probability of finding
2014    only one reaching expression and to reduce register lifetimes by picking
2015    the closest such expression.  */
2016
2017 static int
2018 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
2019                               basic_block bb, char *visited)
2020 {
2021   edge pred;
2022   edge_iterator ei;
2023
2024   FOR_EACH_EDGE (pred, ei, bb->preds)
2025     {
2026       basic_block pred_bb = pred->src;
2027
2028       if (pred->src == ENTRY_BLOCK_PTR
2029           /* Has predecessor has already been visited?  */
2030           || visited[pred_bb->index])
2031         ;/* Nothing to do.  */
2032
2033       /* Does this predecessor generate this expression?  */
2034       else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2035         {
2036           /* Is this the occurrence we're looking for?
2037              Note that there's only one generating occurrence per block
2038              so we just need to check the block number.  */
2039           if (occr_bb == pred_bb)
2040             return 1;
2041
2042           visited[pred_bb->index] = 1;
2043         }
2044       /* Ignore this predecessor if it kills the expression.  */
2045       else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2046         visited[pred_bb->index] = 1;
2047
2048       /* Neither gen nor kill.  */
2049       else
2050         {
2051           visited[pred_bb->index] = 1;
2052           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2053             return 1;
2054         }
2055     }
2056
2057   /* All paths have been checked.  */
2058   return 0;
2059 }
2060
2061 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2062    memory allocated for that function is returned.  */
2063
2064 static int
2065 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2066 {
2067   int rval;
2068   char *visited = XCNEWVEC (char, last_basic_block);
2069
2070   rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2071
2072   free (visited);
2073   return rval;
2074 }
2075 \f
2076 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it.  */
2077
2078 static rtx
2079 process_insert_insn (struct expr *expr)
2080 {
2081   rtx reg = expr->reaching_reg;
2082   /* Copy the expression to make sure we don't have any sharing issues.  */
2083   rtx exp = copy_rtx (expr->expr);
2084   rtx pat;
2085
2086   start_sequence ();
2087
2088   /* If the expression is something that's an operand, like a constant,
2089      just copy it to a register.  */
2090   if (general_operand (exp, GET_MODE (reg)))
2091     emit_move_insn (reg, exp);
2092
2093   /* Otherwise, make a new insn to compute this expression and make sure the
2094      insn will be recognized (this also adds any needed CLOBBERs).  */
2095   else
2096     {
2097       rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2098
2099       if (insn_invalid_p (insn))
2100         gcc_unreachable ();
2101     }
2102
2103   pat = get_insns ();
2104   end_sequence ();
2105
2106   return pat;
2107 }
2108
2109 /* Add EXPR to the end of basic block BB.
2110
2111    This is used by both the PRE and code hoisting.  */
2112
2113 static void
2114 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2115 {
2116   rtx insn = BB_END (bb);
2117   rtx new_insn;
2118   rtx reg = expr->reaching_reg;
2119   int regno = REGNO (reg);
2120   rtx pat, pat_end;
2121
2122   pat = process_insert_insn (expr);
2123   gcc_assert (pat && INSN_P (pat));
2124
2125   pat_end = pat;
2126   while (NEXT_INSN (pat_end) != NULL_RTX)
2127     pat_end = NEXT_INSN (pat_end);
2128
2129   /* If the last insn is a jump, insert EXPR in front [taking care to
2130      handle cc0, etc. properly].  Similarly we need to care trapping
2131      instructions in presence of non-call exceptions.  */
2132
2133   if (JUMP_P (insn)
2134       || (NONJUMP_INSN_P (insn)
2135           && (!single_succ_p (bb)
2136               || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2137     {
2138 #ifdef HAVE_cc0
2139       rtx note;
2140 #endif
2141
2142       /* If this is a jump table, then we can't insert stuff here.  Since
2143          we know the previous real insn must be the tablejump, we insert
2144          the new instruction just before the tablejump.  */
2145       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2146           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2147         insn = prev_active_insn (insn);
2148
2149 #ifdef HAVE_cc0
2150       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2151          if cc0 isn't set.  */
2152       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2153       if (note)
2154         insn = XEXP (note, 0);
2155       else
2156         {
2157           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2158           if (maybe_cc0_setter
2159               && INSN_P (maybe_cc0_setter)
2160               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2161             insn = maybe_cc0_setter;
2162         }
2163 #endif
2164       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
2165       new_insn = emit_insn_before_noloc (pat, insn, bb);
2166     }
2167
2168   /* Likewise if the last insn is a call, as will happen in the presence
2169      of exception handling.  */
2170   else if (CALL_P (insn)
2171            && (!single_succ_p (bb)
2172                || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2173     {
2174       /* Keeping in mind targets with small register classes and parameters
2175          in registers, we search backward and place the instructions before
2176          the first parameter is loaded.  Do this for everyone for consistency
2177          and a presumption that we'll get better code elsewhere as well.  */
2178
2179       /* Since different machines initialize their parameter registers
2180          in different orders, assume nothing.  Collect the set of all
2181          parameter registers.  */
2182       insn = find_first_parameter_load (insn, BB_HEAD (bb));
2183
2184       /* If we found all the parameter loads, then we want to insert
2185          before the first parameter load.
2186
2187          If we did not find all the parameter loads, then we might have
2188          stopped on the head of the block, which could be a CODE_LABEL.
2189          If we inserted before the CODE_LABEL, then we would be putting
2190          the insn in the wrong basic block.  In that case, put the insn
2191          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
2192       while (LABEL_P (insn)
2193              || NOTE_INSN_BASIC_BLOCK_P (insn))
2194         insn = NEXT_INSN (insn);
2195
2196       new_insn = emit_insn_before_noloc (pat, insn, bb);
2197     }
2198   else
2199     new_insn = emit_insn_after_noloc (pat, insn, bb);
2200
2201   while (1)
2202     {
2203       if (INSN_P (pat))
2204         add_label_notes (PATTERN (pat), new_insn);
2205       if (pat == pat_end)
2206         break;
2207       pat = NEXT_INSN (pat);
2208     }
2209
2210   gcse_create_count++;
2211
2212   if (dump_file)
2213     {
2214       fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2215                bb->index, INSN_UID (new_insn));
2216       fprintf (dump_file, "copying expression %d to reg %d\n",
2217                expr->bitmap_index, regno);
2218     }
2219 }
2220
2221 /* Insert partially redundant expressions on edges in the CFG to make
2222    the expressions fully redundant.  */
2223
2224 static int
2225 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2226 {
2227   int e, i, j, num_edges, set_size, did_insert = 0;
2228   sbitmap *inserted;
2229
2230   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2231      if it reaches any of the deleted expressions.  */
2232
2233   set_size = pre_insert_map[0]->size;
2234   num_edges = NUM_EDGES (edge_list);
2235   inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2236   sbitmap_vector_zero (inserted, num_edges);
2237
2238   for (e = 0; e < num_edges; e++)
2239     {
2240       int indx;
2241       basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2242
2243       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2244         {
2245           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2246
2247           for (j = indx;
2248                insert && j < (int) expr_hash_table.n_elems;
2249                j++, insert >>= 1)
2250             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2251               {
2252                 struct expr *expr = index_map[j];
2253                 struct occr *occr;
2254
2255                 /* Now look at each deleted occurrence of this expression.  */
2256                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2257                   {
2258                     if (! occr->deleted_p)
2259                       continue;
2260
2261                     /* Insert this expression on this edge if it would
2262                        reach the deleted occurrence in BB.  */
2263                     if (!TEST_BIT (inserted[e], j))
2264                       {
2265                         rtx insn;
2266                         edge eg = INDEX_EDGE (edge_list, e);
2267
2268                         /* We can't insert anything on an abnormal and
2269                            critical edge, so we insert the insn at the end of
2270                            the previous block. There are several alternatives
2271                            detailed in Morgans book P277 (sec 10.5) for
2272                            handling this situation.  This one is easiest for
2273                            now.  */
2274
2275                         if (eg->flags & EDGE_ABNORMAL)
2276                           insert_insn_end_basic_block (index_map[j], bb);
2277                         else
2278                           {
2279                             insn = process_insert_insn (index_map[j]);
2280                             insert_insn_on_edge (insn, eg);
2281                           }
2282
2283                         if (dump_file)
2284                           {
2285                             fprintf (dump_file, "PRE: edge (%d,%d), ",
2286                                      bb->index,
2287                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2288                             fprintf (dump_file, "copy expression %d\n",
2289                                      expr->bitmap_index);
2290                           }
2291
2292                         update_ld_motion_stores (expr);
2293                         SET_BIT (inserted[e], j);
2294                         did_insert = 1;
2295                         gcse_create_count++;
2296                       }
2297                   }
2298               }
2299         }
2300     }
2301
2302   sbitmap_vector_free (inserted);
2303   return did_insert;
2304 }
2305
2306 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2307    Given "old_reg <- expr" (INSN), instead of adding after it
2308      reaching_reg <- old_reg
2309    it's better to do the following:
2310      reaching_reg <- expr
2311      old_reg      <- reaching_reg
2312    because this way copy propagation can discover additional PRE
2313    opportunities.  But if this fails, we try the old way.
2314    When "expr" is a store, i.e.
2315    given "MEM <- old_reg", instead of adding after it
2316      reaching_reg <- old_reg
2317    it's better to add it before as follows:
2318      reaching_reg <- old_reg
2319      MEM          <- reaching_reg.  */
2320
2321 static void
2322 pre_insert_copy_insn (struct expr *expr, rtx insn)
2323 {
2324   rtx reg = expr->reaching_reg;
2325   int regno = REGNO (reg);
2326   int indx = expr->bitmap_index;
2327   rtx pat = PATTERN (insn);
2328   rtx set, first_set, new_insn;
2329   rtx old_reg;
2330   int i;
2331
2332   /* This block matches the logic in hash_scan_insn.  */
2333   switch (GET_CODE (pat))
2334     {
2335     case SET:
2336       set = pat;
2337       break;
2338
2339     case PARALLEL:
2340       /* Search through the parallel looking for the set whose
2341          source was the expression that we're interested in.  */
2342       first_set = NULL_RTX;
2343       set = NULL_RTX;
2344       for (i = 0; i < XVECLEN (pat, 0); i++)
2345         {
2346           rtx x = XVECEXP (pat, 0, i);
2347           if (GET_CODE (x) == SET)
2348             {
2349               /* If the source was a REG_EQUAL or REG_EQUIV note, we
2350                  may not find an equivalent expression, but in this
2351                  case the PARALLEL will have a single set.  */
2352               if (first_set == NULL_RTX)
2353                 first_set = x;
2354               if (expr_equiv_p (SET_SRC (x), expr->expr))
2355                 {
2356                   set = x;
2357                   break;
2358                 }
2359             }
2360         }
2361
2362       gcc_assert (first_set);
2363       if (set == NULL_RTX)
2364         set = first_set;
2365       break;
2366
2367     default:
2368       gcc_unreachable ();
2369     }
2370
2371   if (REG_P (SET_DEST (set)))
2372     {
2373       old_reg = SET_DEST (set);
2374       /* Check if we can modify the set destination in the original insn.  */
2375       if (validate_change (insn, &SET_DEST (set), reg, 0))
2376         {
2377           new_insn = gen_move_insn (old_reg, reg);
2378           new_insn = emit_insn_after (new_insn, insn);
2379         }
2380       else
2381         {
2382           new_insn = gen_move_insn (reg, old_reg);
2383           new_insn = emit_insn_after (new_insn, insn);
2384         }
2385     }
2386   else /* This is possible only in case of a store to memory.  */
2387     {
2388       old_reg = SET_SRC (set);
2389       new_insn = gen_move_insn (reg, old_reg);
2390
2391       /* Check if we can modify the set source in the original insn.  */
2392       if (validate_change (insn, &SET_SRC (set), reg, 0))
2393         new_insn = emit_insn_before (new_insn, insn);
2394       else
2395         new_insn = emit_insn_after (new_insn, insn);
2396     }
2397
2398   gcse_create_count++;
2399
2400   if (dump_file)
2401     fprintf (dump_file,
2402              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2403               BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2404               INSN_UID (insn), regno);
2405 }
2406
2407 /* Copy available expressions that reach the redundant expression
2408    to `reaching_reg'.  */
2409
2410 static void
2411 pre_insert_copies (void)
2412 {
2413   unsigned int i, added_copy;
2414   struct expr *expr;
2415   struct occr *occr;
2416   struct occr *avail;
2417
2418   /* For each available expression in the table, copy the result to
2419      `reaching_reg' if the expression reaches a deleted one.
2420
2421      ??? The current algorithm is rather brute force.
2422      Need to do some profiling.  */
2423
2424   for (i = 0; i < expr_hash_table.size; i++)
2425     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2426       {
2427         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
2428            we don't want to insert a copy here because the expression may not
2429            really be redundant.  So only insert an insn if the expression was
2430            deleted.  This test also avoids further processing if the
2431            expression wasn't deleted anywhere.  */
2432         if (expr->reaching_reg == NULL)
2433           continue;
2434
2435         /* Set when we add a copy for that expression.  */
2436         added_copy = 0;
2437
2438         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2439           {
2440             if (! occr->deleted_p)
2441               continue;
2442
2443             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2444               {
2445                 rtx insn = avail->insn;
2446
2447                 /* No need to handle this one if handled already.  */
2448                 if (avail->copied_p)
2449                   continue;
2450
2451                 /* Don't handle this one if it's a redundant one.  */
2452                 if (INSN_DELETED_P (insn))
2453                   continue;
2454
2455                 /* Or if the expression doesn't reach the deleted one.  */
2456                 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2457                                                expr,
2458                                                BLOCK_FOR_INSN (occr->insn)))
2459                   continue;
2460
2461                 added_copy = 1;
2462
2463                 /* Copy the result of avail to reaching_reg.  */
2464                 pre_insert_copy_insn (expr, insn);
2465                 avail->copied_p = 1;
2466               }
2467           }
2468
2469           if (added_copy)
2470             update_ld_motion_stores (expr);
2471       }
2472 }
2473
2474 /* Emit move from SRC to DEST noting the equivalence with expression computed
2475    in INSN.  */
2476
2477 static rtx
2478 gcse_emit_move_after (rtx dest, rtx src, rtx insn)
2479 {
2480   rtx new_rtx;
2481   rtx set = single_set (insn), set2;
2482   rtx note;
2483   rtx eqv;
2484
2485   /* This should never fail since we're creating a reg->reg copy
2486      we've verified to be valid.  */
2487
2488   new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2489
2490   /* Note the equivalence for local CSE pass.  */
2491   set2 = single_set (new_rtx);
2492   if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2493     return new_rtx;
2494   if ((note = find_reg_equal_equiv_note (insn)))
2495     eqv = XEXP (note, 0);
2496   else
2497     eqv = SET_SRC (set);
2498
2499   set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2500
2501   return new_rtx;
2502 }
2503
2504 /* Delete redundant computations.
2505    Deletion is done by changing the insn to copy the `reaching_reg' of
2506    the expression into the result of the SET.  It is left to later passes
2507    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2508
2509    Return nonzero if a change is made.  */
2510
2511 static int
2512 pre_delete (void)
2513 {
2514   unsigned int i;
2515   int changed;
2516   struct expr *expr;
2517   struct occr *occr;
2518
2519   changed = 0;
2520   for (i = 0; i < expr_hash_table.size; i++)
2521     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2522       {
2523         int indx = expr->bitmap_index;
2524
2525         /* We only need to search antic_occr since we require ANTLOC != 0.  */
2526         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2527           {
2528             rtx insn = occr->insn;
2529             rtx set;
2530             basic_block bb = BLOCK_FOR_INSN (insn);
2531
2532             /* We only delete insns that have a single_set.  */
2533             if (TEST_BIT (pre_delete_map[bb->index], indx)
2534                 && (set = single_set (insn)) != 0
2535                 && dbg_cnt (pre_insn))
2536               {
2537                 /* Create a pseudo-reg to store the result of reaching
2538                    expressions into.  Get the mode for the new pseudo from
2539                    the mode of the original destination pseudo.  */
2540                 if (expr->reaching_reg == NULL)
2541                   expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2542
2543                 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2544                 delete_insn (insn);
2545                 occr->deleted_p = 1;
2546                 changed = 1;
2547                 gcse_subst_count++;
2548
2549                 if (dump_file)
2550                   {
2551                     fprintf (dump_file,
2552                              "PRE: redundant insn %d (expression %d) in ",
2553                                INSN_UID (insn), indx);
2554                     fprintf (dump_file, "bb %d, reaching reg is %d\n",
2555                              bb->index, REGNO (expr->reaching_reg));
2556                   }
2557               }
2558           }
2559       }
2560
2561   return changed;
2562 }
2563
2564 /* Perform GCSE optimizations using PRE.
2565    This is called by one_pre_gcse_pass after all the dataflow analysis
2566    has been done.
2567
2568    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2569    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2570    Compiler Design and Implementation.
2571
2572    ??? A new pseudo reg is created to hold the reaching expression.  The nice
2573    thing about the classical approach is that it would try to use an existing
2574    reg.  If the register can't be adequately optimized [i.e. we introduce
2575    reload problems], one could add a pass here to propagate the new register
2576    through the block.
2577
2578    ??? We don't handle single sets in PARALLELs because we're [currently] not
2579    able to copy the rest of the parallel when we insert copies to create full
2580    redundancies from partial redundancies.  However, there's no reason why we
2581    can't handle PARALLELs in the cases where there are no partial
2582    redundancies.  */
2583
2584 static int
2585 pre_gcse (struct edge_list *edge_list)
2586 {
2587   unsigned int i;
2588   int did_insert, changed;
2589   struct expr **index_map;
2590   struct expr *expr;
2591
2592   /* Compute a mapping from expression number (`bitmap_index') to
2593      hash table entry.  */
2594
2595   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2596   for (i = 0; i < expr_hash_table.size; i++)
2597     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2598       index_map[expr->bitmap_index] = expr;
2599
2600   /* Delete the redundant insns first so that
2601      - we know what register to use for the new insns and for the other
2602        ones with reaching expressions
2603      - we know which insns are redundant when we go to create copies  */
2604
2605   changed = pre_delete ();
2606   did_insert = pre_edge_insert (edge_list, index_map);
2607
2608   /* In other places with reaching expressions, copy the expression to the
2609      specially allocated pseudo-reg that reaches the redundant expr.  */
2610   pre_insert_copies ();
2611   if (did_insert)
2612     {
2613       commit_edge_insertions ();
2614       changed = 1;
2615     }
2616
2617   free (index_map);
2618   return changed;
2619 }
2620
2621 /* Top level routine to perform one PRE GCSE pass.
2622
2623    Return nonzero if a change was made.  */
2624
2625 static int
2626 one_pre_gcse_pass (void)
2627 {
2628   int changed = 0;
2629
2630   gcse_subst_count = 0;
2631   gcse_create_count = 0;
2632
2633   /* Return if there's nothing to do, or it is too expensive.  */
2634   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2635       || is_too_expensive (_("PRE disabled")))
2636     return 0;
2637
2638   /* We need alias.  */
2639   init_alias_analysis ();
2640
2641   bytes_used = 0;
2642   gcc_obstack_init (&gcse_obstack);
2643   alloc_gcse_mem ();
2644
2645   alloc_hash_table (&expr_hash_table);
2646   add_noreturn_fake_exit_edges ();
2647   if (flag_gcse_lm)
2648     compute_ld_motion_mems ();
2649
2650   compute_hash_table (&expr_hash_table);
2651   if (flag_gcse_lm)
2652     trim_ld_motion_mems ();
2653   if (dump_file)
2654     dump_hash_table (dump_file, "Expression", &expr_hash_table);
2655
2656   if (expr_hash_table.n_elems > 0)
2657     {
2658       struct edge_list *edge_list;
2659       alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2660       edge_list = compute_pre_data ();
2661       changed |= pre_gcse (edge_list);
2662       free_edge_list (edge_list);
2663       free_pre_mem ();
2664     }
2665
2666   if (flag_gcse_lm)
2667     free_ld_motion_mems ();
2668   remove_fake_exit_edges ();
2669   free_hash_table (&expr_hash_table);
2670
2671   free_gcse_mem ();
2672   obstack_free (&gcse_obstack, NULL);
2673
2674   /* We are finished with alias.  */
2675   end_alias_analysis ();
2676
2677   if (dump_file)
2678     {
2679       fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2680                current_function_name (), n_basic_blocks, bytes_used);
2681       fprintf (dump_file, "%d substs, %d insns created\n",
2682                gcse_subst_count, gcse_create_count);
2683     }
2684
2685   return changed;
2686 }
2687 \f
2688 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2689    to INSN.  If such notes are added to an insn which references a
2690    CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
2691    that note, because the following loop optimization pass requires
2692    them.  */
2693
2694 /* ??? If there was a jump optimization pass after gcse and before loop,
2695    then we would not need to do this here, because jump would add the
2696    necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
2697
2698 static void
2699 add_label_notes (rtx x, rtx insn)
2700 {
2701   enum rtx_code code = GET_CODE (x);
2702   int i, j;
2703   const char *fmt;
2704
2705   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2706     {
2707       /* This code used to ignore labels that referred to dispatch tables to
2708          avoid flow generating (slightly) worse code.
2709
2710          We no longer ignore such label references (see LABEL_REF handling in
2711          mark_jump_label for additional information).  */
2712
2713       /* There's no reason for current users to emit jump-insns with
2714          such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2715          notes.  */
2716       gcc_assert (!JUMP_P (insn));
2717       add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2718
2719       if (LABEL_P (XEXP (x, 0)))
2720         LABEL_NUSES (XEXP (x, 0))++;
2721
2722       return;
2723     }
2724
2725   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2726     {
2727       if (fmt[i] == 'e')
2728         add_label_notes (XEXP (x, i), insn);
2729       else if (fmt[i] == 'E')
2730         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2731           add_label_notes (XVECEXP (x, i, j), insn);
2732     }
2733 }
2734
2735 /* Code Hoisting variables and subroutines.  */
2736
2737 /* Very busy expressions.  */
2738 static sbitmap *hoist_vbein;
2739 static sbitmap *hoist_vbeout;
2740
2741 /* ??? We could compute post dominators and run this algorithm in
2742    reverse to perform tail merging, doing so would probably be
2743    more effective than the tail merging code in jump.c.
2744
2745    It's unclear if tail merging could be run in parallel with
2746    code hoisting.  It would be nice.  */
2747
2748 /* Allocate vars used for code hoisting analysis.  */
2749
2750 static void
2751 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2752 {
2753   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2754   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2755   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2756
2757   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2758   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2759 }
2760
2761 /* Free vars used for code hoisting analysis.  */
2762
2763 static void
2764 free_code_hoist_mem (void)
2765 {
2766   sbitmap_vector_free (antloc);
2767   sbitmap_vector_free (transp);
2768   sbitmap_vector_free (comp);
2769
2770   sbitmap_vector_free (hoist_vbein);
2771   sbitmap_vector_free (hoist_vbeout);
2772
2773   free_dominance_info (CDI_DOMINATORS);
2774 }
2775
2776 /* Compute the very busy expressions at entry/exit from each block.
2777
2778    An expression is very busy if all paths from a given point
2779    compute the expression.  */
2780
2781 static void
2782 compute_code_hoist_vbeinout (void)
2783 {
2784   int changed, passes;
2785   basic_block bb;
2786
2787   sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2788   sbitmap_vector_zero (hoist_vbein, last_basic_block);
2789
2790   passes = 0;
2791   changed = 1;
2792
2793   while (changed)
2794     {
2795       changed = 0;
2796
2797       /* We scan the blocks in the reverse order to speed up
2798          the convergence.  */
2799       FOR_EACH_BB_REVERSE (bb)
2800         {
2801           if (bb->next_bb != EXIT_BLOCK_PTR)
2802             {
2803               sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
2804                                              hoist_vbein, bb->index);
2805
2806               /* Include expressions in VBEout that are calculated
2807                  in BB and available at its end.  */
2808               sbitmap_a_or_b (hoist_vbeout[bb->index],
2809                               hoist_vbeout[bb->index], comp[bb->index]);
2810             }
2811
2812           changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2813                                               antloc[bb->index],
2814                                               hoist_vbeout[bb->index],
2815                                               transp[bb->index]);
2816         }
2817
2818       passes++;
2819     }
2820
2821   if (dump_file)
2822     {
2823       fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2824
2825       FOR_EACH_BB (bb)
2826         {
2827           fprintf (dump_file, "vbein (%d): ", bb->index);
2828           dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2829           fprintf (dump_file, "vbeout(%d): ", bb->index);
2830           dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2831         }
2832     }
2833 }
2834
2835 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
2836
2837 static void
2838 compute_code_hoist_data (void)
2839 {
2840   compute_local_properties (transp, comp, antloc, &expr_hash_table);
2841   prune_expressions (false);
2842   compute_code_hoist_vbeinout ();
2843   calculate_dominance_info (CDI_DOMINATORS);
2844   if (dump_file)
2845     fprintf (dump_file, "\n");
2846 }
2847
2848 /* Determine if the expression identified by EXPR_INDEX would
2849    reach BB unimpared if it was placed at the end of EXPR_BB.
2850    Stop the search if the expression would need to be moved more
2851    than DISTANCE instructions.
2852
2853    It's unclear exactly what Muchnick meant by "unimpared".  It seems
2854    to me that the expression must either be computed or transparent in
2855    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
2856    would allow the expression to be hoisted out of loops, even if
2857    the expression wasn't a loop invariant.
2858
2859    Contrast this to reachability for PRE where an expression is
2860    considered reachable if *any* path reaches instead of *all*
2861    paths.  */
2862
2863 static int
2864 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2865                            char *visited, int distance, int *bb_size)
2866 {
2867   edge pred;
2868   edge_iterator ei;
2869   int visited_allocated_locally = 0;
2870
2871   /* Terminate the search if distance, for which EXPR is allowed to move,
2872      is exhausted.  */
2873   if (distance > 0)
2874     {
2875       distance -= bb_size[bb->index];
2876
2877       if (distance <= 0)
2878         return 0;
2879     }
2880   else
2881     gcc_assert (distance == 0);
2882
2883   if (visited == NULL)
2884     {
2885       visited_allocated_locally = 1;
2886       visited = XCNEWVEC (char, last_basic_block);
2887     }
2888
2889   FOR_EACH_EDGE (pred, ei, bb->preds)
2890     {
2891       basic_block pred_bb = pred->src;
2892
2893       if (pred->src == ENTRY_BLOCK_PTR)
2894         break;
2895       else if (pred_bb == expr_bb)
2896         continue;
2897       else if (visited[pred_bb->index])
2898         continue;
2899
2900       else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2901         break;
2902
2903       /* Not killed.  */
2904       else
2905         {
2906           visited[pred_bb->index] = 1;
2907           if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2908                                            visited, distance, bb_size))
2909             break;
2910         }
2911     }
2912   if (visited_allocated_locally)
2913     free (visited);
2914
2915   return (pred == NULL);
2916 }
2917 \f
2918 /* Find occurence in BB.  */
2919
2920 static struct occr *
2921 find_occr_in_bb (struct occr *occr, basic_block bb)
2922 {
2923   /* Find the right occurrence of this expression.  */
2924   while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2925     occr = occr->next;
2926
2927   return occr;
2928 }
2929
2930 /* Actually perform code hoisting.  */
2931
2932 static int
2933 hoist_code (void)
2934 {
2935   basic_block bb, dominated;
2936   VEC (basic_block, heap) *dom_tree_walk;
2937   unsigned int dom_tree_walk_index;
2938   VEC (basic_block, heap) *domby;
2939   unsigned int i,j;
2940   struct expr **index_map;
2941   struct expr *expr;
2942   int *to_bb_head;
2943   int *bb_size;
2944   int changed = 0;
2945
2946   /* Compute a mapping from expression number (`bitmap_index') to
2947      hash table entry.  */
2948
2949   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2950   for (i = 0; i < expr_hash_table.size; i++)
2951     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2952       index_map[expr->bitmap_index] = expr;
2953
2954   /* Calculate sizes of basic blocks and note how far
2955      each instruction is from the start of its block.  We then use this
2956      data to restrict distance an expression can travel.  */
2957
2958   to_bb_head = XCNEWVEC (int, get_max_uid ());
2959   bb_size = XCNEWVEC (int, last_basic_block);
2960
2961   FOR_EACH_BB (bb)
2962     {
2963       rtx insn;
2964       int to_head;
2965
2966       to_head = 0;
2967       FOR_BB_INSNS (bb, insn)
2968         {
2969           /* Don't count debug instructions to avoid them affecting
2970              decision choices.  */
2971           if (NONDEBUG_INSN_P (insn))
2972             to_bb_head[INSN_UID (insn)] = to_head++;
2973         }
2974
2975       bb_size[bb->index] = to_head;
2976     }
2977
2978   gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2979               && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2980                   == ENTRY_BLOCK_PTR->next_bb));
2981
2982   dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2983                                             ENTRY_BLOCK_PTR->next_bb);
2984
2985   /* Walk over each basic block looking for potentially hoistable
2986      expressions, nothing gets hoisted from the entry block.  */
2987   FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2988     {
2989       domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2990
2991       if (VEC_length (basic_block, domby) == 0)
2992         continue;
2993
2994       /* Examine each expression that is very busy at the exit of this
2995          block.  These are the potentially hoistable expressions.  */
2996       for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
2997         {
2998           if (TEST_BIT (hoist_vbeout[bb->index], i))
2999             {
3000               /* Current expression.  */
3001               struct expr *expr = index_map[i];
3002               /* Number of occurences of EXPR that can be hoisted to BB.  */
3003               int hoistable = 0;
3004               /* Basic blocks that have occurences reachable from BB.  */
3005               bitmap_head _from_bbs, *from_bbs = &_from_bbs;
3006               /* Occurences reachable from BB.  */
3007               VEC (occr_t, heap) *occrs_to_hoist = NULL;
3008               /* We want to insert the expression into BB only once, so
3009                  note when we've inserted it.  */
3010               int insn_inserted_p;
3011               occr_t occr;
3012
3013               bitmap_initialize (from_bbs, 0);
3014
3015               /* If an expression is computed in BB and is available at end of
3016                  BB, hoist all occurences dominated by BB to BB.  */
3017               if (TEST_BIT (comp[bb->index], i))
3018                 {
3019                   occr = find_occr_in_bb (expr->antic_occr, bb);
3020
3021                   if (occr)
3022                     {
3023                       /* An occurence might've been already deleted
3024                          while processing a dominator of BB.  */
3025                       if (!occr->deleted_p)
3026                         {
3027                           gcc_assert (NONDEBUG_INSN_P (occr->insn));
3028                           hoistable++;
3029                         }
3030                     }
3031                   else
3032                     hoistable++;
3033                 }
3034
3035               /* We've found a potentially hoistable expression, now
3036                  we look at every block BB dominates to see if it
3037                  computes the expression.  */
3038               FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3039                 {
3040                   int max_distance;
3041
3042                   /* Ignore self dominance.  */
3043                   if (bb == dominated)
3044                     continue;
3045                   /* We've found a dominated block, now see if it computes
3046                      the busy expression and whether or not moving that
3047                      expression to the "beginning" of that block is safe.  */
3048                   if (!TEST_BIT (antloc[dominated->index], i))
3049                     continue;
3050
3051                   occr = find_occr_in_bb (expr->antic_occr, dominated);
3052                   gcc_assert (occr);
3053
3054                   /* An occurence might've been already deleted
3055                      while processing a dominator of BB.  */
3056                   if (occr->deleted_p)
3057                     continue;
3058                   gcc_assert (NONDEBUG_INSN_P (occr->insn));
3059
3060                   max_distance = expr->max_distance;
3061                   if (max_distance > 0)
3062                     /* Adjust MAX_DISTANCE to account for the fact that
3063                        OCCR won't have to travel all of DOMINATED, but
3064                        only part of it.  */
3065                     max_distance += (bb_size[dominated->index]
3066                                      - to_bb_head[INSN_UID (occr->insn)]);
3067
3068                   /* Note if the expression would reach the dominated block
3069                      unimpared if it was placed at the end of BB.
3070
3071                      Keep track of how many times this expression is hoistable
3072                      from a dominated block into BB.  */
3073                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3074                                                  max_distance, bb_size))
3075                     {
3076                       hoistable++;
3077                       VEC_safe_push (occr_t, heap,
3078                                      occrs_to_hoist, occr);
3079                       bitmap_set_bit (from_bbs, dominated->index);
3080                     }
3081                 }
3082
3083               /* If we found more than one hoistable occurrence of this
3084                  expression, then note it in the vector of expressions to
3085                  hoist.  It makes no sense to hoist things which are computed
3086                  in only one BB, and doing so tends to pessimize register
3087                  allocation.  One could increase this value to try harder
3088                  to avoid any possible code expansion due to register
3089                  allocation issues; however experiments have shown that
3090                  the vast majority of hoistable expressions are only movable
3091                  from two successors, so raising this threshold is likely
3092                  to nullify any benefit we get from code hoisting.  */
3093               if (hoistable > 1 && dbg_cnt (hoist_insn))
3094                 {
3095                   /* If (hoistable != VEC_length), then there is
3096                      an occurence of EXPR in BB itself.  Don't waste
3097                      time looking for LCA in this case.  */
3098                   if ((unsigned) hoistable
3099                       == VEC_length (occr_t, occrs_to_hoist))
3100                     {
3101                       basic_block lca;
3102
3103                       lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3104                                                               from_bbs);
3105                       if (lca != bb)
3106                         /* Punt, it's better to hoist these occurences to
3107                            LCA.  */
3108                         VEC_free (occr_t, heap, occrs_to_hoist);
3109                     }
3110                 }
3111               else
3112                 /* Punt, no point hoisting a single occurence.  */
3113                 VEC_free (occr_t, heap, occrs_to_hoist);
3114
3115               insn_inserted_p = 0;
3116
3117               /* Walk through occurences of I'th expressions we want
3118                  to hoist to BB and make the transformations.  */
3119               FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3120                 {
3121                   rtx insn;
3122                   rtx set;
3123
3124                   gcc_assert (!occr->deleted_p);
3125
3126                   insn = occr->insn;
3127                   set = single_set (insn);
3128                   gcc_assert (set);
3129
3130                   /* Create a pseudo-reg to store the result of reaching
3131                      expressions into.  Get the mode for the new pseudo
3132                      from the mode of the original destination pseudo.
3133
3134                      It is important to use new pseudos whenever we
3135                      emit a set.  This will allow reload to use
3136                      rematerialization for such registers.  */
3137                   if (!insn_inserted_p)
3138                     expr->reaching_reg
3139                       = gen_reg_rtx_and_attrs (SET_DEST (set));
3140
3141                   gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3142                                         insn);
3143                   delete_insn (insn);
3144                   occr->deleted_p = 1;
3145                   changed = 1;
3146                   gcse_subst_count++;
3147
3148                   if (!insn_inserted_p)
3149                     {
3150                       insert_insn_end_basic_block (expr, bb);
3151                       insn_inserted_p = 1;
3152                     }
3153                 }
3154
3155               VEC_free (occr_t, heap, occrs_to_hoist);
3156               bitmap_clear (from_bbs);
3157             }
3158         }
3159       VEC_free (basic_block, heap, domby);
3160     }
3161
3162   VEC_free (basic_block, heap, dom_tree_walk);
3163   free (bb_size);
3164   free (to_bb_head);
3165   free (index_map);
3166
3167   return changed;
3168 }
3169
3170 /* Top level routine to perform one code hoisting (aka unification) pass
3171
3172    Return nonzero if a change was made.  */
3173
3174 static int
3175 one_code_hoisting_pass (void)
3176 {
3177   int changed = 0;
3178
3179   gcse_subst_count = 0;
3180   gcse_create_count = 0;
3181
3182   /* Return if there's nothing to do, or it is too expensive.  */
3183   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3184       || is_too_expensive (_("GCSE disabled")))
3185     return 0;
3186
3187   doing_code_hoisting_p = true;
3188
3189   /* We need alias.  */
3190   init_alias_analysis ();
3191
3192   bytes_used = 0;
3193   gcc_obstack_init (&gcse_obstack);
3194   alloc_gcse_mem ();
3195
3196   alloc_hash_table (&expr_hash_table);
3197   compute_hash_table (&expr_hash_table);
3198   if (dump_file)
3199     dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3200
3201   if (expr_hash_table.n_elems > 0)
3202     {
3203       alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3204       compute_code_hoist_data ();
3205       changed = hoist_code ();
3206       free_code_hoist_mem ();
3207     }
3208
3209   free_hash_table (&expr_hash_table);
3210   free_gcse_mem ();
3211   obstack_free (&gcse_obstack, NULL);
3212
3213   /* We are finished with alias.  */
3214   end_alias_analysis ();
3215
3216   if (dump_file)
3217     {
3218       fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3219                current_function_name (), n_basic_blocks, bytes_used);
3220       fprintf (dump_file, "%d substs, %d insns created\n",
3221                gcse_subst_count, gcse_create_count);
3222     }
3223
3224   doing_code_hoisting_p = false;
3225
3226   return changed;
3227 }
3228 \f
3229 /*  Here we provide the things required to do store motion towards the exit.
3230     In order for this to be effective, gcse also needed to be taught how to
3231     move a load when it is killed only by a store to itself.
3232
3233             int i;
3234             float a[10];
3235
3236             void foo(float scale)
3237             {
3238               for (i=0; i<10; i++)
3239                 a[i] *= scale;
3240             }
3241
3242     'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3243     the load out since its live around the loop, and stored at the bottom
3244     of the loop.
3245
3246       The 'Load Motion' referred to and implemented in this file is
3247     an enhancement to gcse which when using edge based LCM, recognizes
3248     this situation and allows gcse to move the load out of the loop.
3249
3250       Once gcse has hoisted the load, store motion can then push this
3251     load towards the exit, and we end up with no loads or stores of 'i'
3252     in the loop.  */
3253
3254 static hashval_t
3255 pre_ldst_expr_hash (const void *p)
3256 {
3257   int do_not_record_p = 0;
3258   const struct ls_expr *const x = (const struct ls_expr *) p;
3259   return
3260     hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3261 }
3262
3263 static int
3264 pre_ldst_expr_eq (const void *p1, const void *p2)
3265 {
3266   const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3267     *const ptr2 = (const struct ls_expr *) p2;
3268   return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3269 }
3270
3271 /* This will search the ldst list for a matching expression. If it
3272    doesn't find one, we create one and initialize it.  */
3273
3274 static struct ls_expr *
3275 ldst_entry (rtx x)
3276 {
3277   int do_not_record_p = 0;
3278   struct ls_expr * ptr;
3279   unsigned int hash;
3280   void **slot;
3281   struct ls_expr e;
3282
3283   hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3284                    NULL,  /*have_reg_qty=*/false);
3285
3286   e.pattern = x;
3287   slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3288   if (*slot)
3289     return (struct ls_expr *)*slot;
3290
3291   ptr = XNEW (struct ls_expr);
3292
3293   ptr->next         = pre_ldst_mems;
3294   ptr->expr         = NULL;
3295   ptr->pattern      = x;
3296   ptr->pattern_regs = NULL_RTX;
3297   ptr->loads        = NULL_RTX;
3298   ptr->stores       = NULL_RTX;
3299   ptr->reaching_reg = NULL_RTX;
3300   ptr->invalid      = 0;
3301   ptr->index        = 0;
3302   ptr->hash_index   = hash;
3303   pre_ldst_mems     = ptr;
3304   *slot = ptr;
3305
3306   return ptr;
3307 }
3308
3309 /* Free up an individual ldst entry.  */
3310
3311 static void
3312 free_ldst_entry (struct ls_expr * ptr)
3313 {
3314   free_INSN_LIST_list (& ptr->loads);
3315   free_INSN_LIST_list (& ptr->stores);
3316
3317   free (ptr);
3318 }
3319
3320 /* Free up all memory associated with the ldst list.  */
3321
3322 static void
3323 free_ld_motion_mems (void)
3324 {
3325   if (pre_ldst_table)
3326     htab_delete (pre_ldst_table);
3327   pre_ldst_table = NULL;
3328
3329   while (pre_ldst_mems)
3330     {
3331       struct ls_expr * tmp = pre_ldst_mems;
3332
3333       pre_ldst_mems = pre_ldst_mems->next;
3334
3335       free_ldst_entry (tmp);
3336     }
3337
3338   pre_ldst_mems = NULL;
3339 }
3340
3341 /* Dump debugging info about the ldst list.  */
3342
3343 static void
3344 print_ldst_list (FILE * file)
3345 {
3346   struct ls_expr * ptr;
3347
3348   fprintf (file, "LDST list: \n");
3349
3350   for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3351     {
3352       fprintf (file, "  Pattern (%3d): ", ptr->index);
3353
3354       print_rtl (file, ptr->pattern);
3355
3356       fprintf (file, "\n         Loads : ");
3357
3358       if (ptr->loads)
3359         print_rtl (file, ptr->loads);
3360       else
3361         fprintf (file, "(nil)");
3362
3363       fprintf (file, "\n        Stores : ");
3364
3365       if (ptr->stores)
3366         print_rtl (file, ptr->stores);
3367       else
3368         fprintf (file, "(nil)");
3369
3370       fprintf (file, "\n\n");
3371     }
3372
3373   fprintf (file, "\n");
3374 }
3375
3376 /* Returns 1 if X is in the list of ldst only expressions.  */
3377
3378 static struct ls_expr *
3379 find_rtx_in_ldst (rtx x)
3380 {
3381   struct ls_expr e;
3382   void **slot;
3383   if (!pre_ldst_table)
3384     return NULL;
3385   e.pattern = x;
3386   slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3387   if (!slot || ((struct ls_expr *)*slot)->invalid)
3388     return NULL;
3389   return (struct ls_expr *) *slot;
3390 }
3391 \f
3392 /* Load Motion for loads which only kill themselves.  */
3393
3394 /* Return true if x, a MEM, is a simple access with no side effects.
3395    These are the types of loads we consider for the ld_motion list,
3396    otherwise we let the usual aliasing take care of it.  */
3397
3398 static int
3399 simple_mem (const_rtx x)
3400 {
3401   if (MEM_VOLATILE_P (x))
3402     return 0;
3403
3404   if (GET_MODE (x) == BLKmode)
3405     return 0;
3406
3407   /* If we are handling exceptions, we must be careful with memory references
3408      that may trap.  If we are not, the behavior is undefined, so we may just
3409      continue.  */
3410   if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3411     return 0;
3412
3413   if (side_effects_p (x))
3414     return 0;
3415
3416   /* Do not consider function arguments passed on stack.  */
3417   if (reg_mentioned_p (stack_pointer_rtx, x))
3418     return 0;
3419
3420   if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3421     return 0;
3422
3423   return 1;
3424 }
3425
3426 /* Make sure there isn't a buried reference in this pattern anywhere.
3427    If there is, invalidate the entry for it since we're not capable
3428    of fixing it up just yet.. We have to be sure we know about ALL
3429    loads since the aliasing code will allow all entries in the
3430    ld_motion list to not-alias itself.  If we miss a load, we will get
3431    the wrong value since gcse might common it and we won't know to
3432    fix it up.  */
3433
3434 static void
3435 invalidate_any_buried_refs (rtx x)
3436 {
3437   const char * fmt;
3438   int i, j;
3439   struct ls_expr * ptr;
3440
3441   /* Invalidate it in the list.  */
3442   if (MEM_P (x) && simple_mem (x))
3443     {
3444       ptr = ldst_entry (x);
3445       ptr->invalid = 1;
3446     }
3447
3448   /* Recursively process the insn.  */
3449   fmt = GET_RTX_FORMAT (GET_CODE (x));
3450
3451   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3452     {
3453       if (fmt[i] == 'e')
3454         invalidate_any_buried_refs (XEXP (x, i));
3455       else if (fmt[i] == 'E')
3456         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3457           invalidate_any_buried_refs (XVECEXP (x, i, j));
3458     }
3459 }
3460
3461 /* The maximum number of load/store motions that can be performed for one
3462    loop.  */
3463 static unsigned maximum_lsm_limit;
3464
3465 /* Find all the 'simple' MEMs which are used in LOADs and STORES.  Simple
3466    being defined as MEM loads and stores to symbols, with no side effects
3467    and no registers in the expression.  For a MEM destination, we also
3468    check that the insn is still valid if we replace the destination with a
3469    REG, as is done in update_ld_motion_stores.  If there are any uses/defs
3470    which don't match this criteria, they are invalidated and trimmed out
3471    later.  */
3472
3473 static void
3474 compute_ld_motion_mems (void)
3475 {
3476   struct ls_expr * ptr;
3477   basic_block bb;
3478   rtx insn;
3479
3480   init_loop_lsm_limit ();
3481
3482   pre_ldst_mems = NULL;
3483   pre_ldst_table
3484     = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
3485
3486   FOR_EACH_BB (bb)
3487     {
3488       struct loop *loop = bb->loop_father;
3489       unsigned ld_motion_limit = find_loop_lsm_limit (loop);
3490       unsigned ld_motion_count = 0;
3491
3492       FOR_BB_INSNS (bb, insn)
3493         {
3494           if (NONDEBUG_INSN_P (insn))
3495             {
3496               if (GET_CODE (PATTERN (insn)) == SET)
3497                 {
3498                   rtx src = SET_SRC (PATTERN (insn));
3499                   rtx dest = SET_DEST (PATTERN (insn));
3500
3501                   /* Check for a simple LOAD...  */
3502                   if (MEM_P (src) && simple_mem (src))
3503                     {
3504                       ptr = ldst_entry (src);
3505                       if (REG_P (dest))
3506                         ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3507                       else
3508                         ptr->invalid = 1;
3509                     }
3510                   else
3511                     {
3512                       /* Make sure there isn't a buried load somewhere.  */
3513                       invalidate_any_buried_refs (src);
3514                     }
3515
3516                   /* Check for stores. Don't worry about aliased ones, they
3517                      will block any movement we might do later. We only care
3518                      about this exact pattern since those are the only
3519                      circumstance that we will ignore the aliasing info.  */
3520                   if (MEM_P (dest) && simple_mem (dest))
3521                     {
3522                       ptr = ldst_entry (dest);
3523
3524                       if (! MEM_P (src)
3525                           && GET_CODE (src) != ASM_OPERANDS
3526                           /* Check for REG manually since want_to_gcse_p
3527                              returns 0 for all REGs.  */
3528                           && can_assign_to_reg_without_clobbers_p (src)
3529                           && ld_motion_count < ld_motion_limit)
3530                         {
3531                           ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3532                           ld_motion_count++;
3533                         }
3534                       else
3535                         {
3536                           ptr->invalid = 1;
3537                           if (dump_file && ld_motion_count >= ld_motion_limit)
3538                             fprintf (dump_file,
3539                                      "suppress ld_motion in loop=%d bb=%d due"
3540                                      " to reg pressure.\n",
3541                                      loop->num, bb->index);
3542
3543                         }
3544                     }
3545                 }
3546               else
3547                 invalidate_any_buried_refs (PATTERN (insn));
3548             }
3549         }
3550       adjust_loop_lsm_limit (loop, ld_motion_count);
3551     }
3552
3553   fini_loop_lsm_limit ();
3554 }
3555
3556 typedef struct
3557 {
3558   struct loop *loop;
3559   unsigned lsm_limit;
3560 } loop_lsm_limit_map;
3561
3562 /* hash_table that maps loop to lsm_limit.  */
3563 static htab_t loop_lsm_limit_map_htab = NULL;
3564
3565 /* hash function for loop_lsm_limit_map_htab.  */
3566
3567 static hashval_t
3568 loop_lsm_limit_map_hash (const void *p)
3569 {
3570   const loop_lsm_limit_map *const x = (const loop_lsm_limit_map*) p;
3571   return htab_hash_pointer (x->loop);
3572 }
3573
3574 /* hash equal function for loop_lsm_limit_map_htab.  */
3575
3576 static int
3577 loop_lsm_limit_map_eq (const void *p1, const void *p2)
3578 {
3579   const loop_lsm_limit_map *const ptr1 = (const loop_lsm_limit_map *) p1;
3580   const loop_lsm_limit_map *const ptr2 = (const loop_lsm_limit_map *) p2;
3581
3582   return htab_eq_pointer (ptr1->loop, ptr2->loop);
3583 }
3584
3585 /* free one entry in loop_lsm_limit_map_htab.  */
3586
3587 static int
3588 free_loop_lsm_limit_map (void **slot, void *data ATTRIBUTE_UNUSED)
3589 {
3590   free(*(loop_lsm_limit_map **) slot);
3591   return 1;
3592 }
3593
3594 /* use the loops strcture to find enclosing loop for each basic_block */
3595 static struct loops loops_lsm;
3596 static bool dominator_info_avail_before_lsm_limit = false;
3597
3598 /* Initalization function for suppressing execessive load/store motions.
3599    Build the loop structure so that we can count the number of motions
3600    performed. Set the maximum number of motions for a loop.  */
3601
3602 static void
3603 init_loop_lsm_limit (void)
3604 {
3605   dominator_info_avail_before_lsm_limit = dom_info_available_p(CDI_DOMINATORS);
3606   flow_loops_find (&loops_lsm);
3607
3608   loop_lsm_limit_map_htab = htab_create (59, loop_lsm_limit_map_hash,
3609                                          loop_lsm_limit_map_eq, NULL);
3610
3611   /* avail_regs minus the induction variables  */
3612   maximum_lsm_limit = target_avail_regs - 1;
3613 }
3614
3615 /* Cleanup function: destroy loop structure and free space.  */
3616
3617 static void
3618 fini_loop_lsm_limit (void)
3619 {
3620   basic_block bb;
3621
3622   flow_loops_free (&loops_lsm);
3623
3624   if (!dominator_info_avail_before_lsm_limit)
3625     free_dominance_info (CDI_DOMINATORS);
3626
3627   FOR_ALL_BB (bb)
3628     bb->loop_father = NULL;
3629
3630   if (loop_lsm_limit_map_htab)
3631     {
3632       htab_traverse (loop_lsm_limit_map_htab, free_loop_lsm_limit_map, NULL);
3633       htab_delete (loop_lsm_limit_map_htab);
3634     }
3635 }
3636
3637 /* This routine tries to find the number of registers that
3638    (1) live across the iteration, and
3639    (2) referenced in the loop.
3640    those store-motions performed in tree-lim falls into this category.  */
3641
3642 static unsigned
3643 estimate_reg_pressure_before_lsm (struct loop *loop)
3644 {
3645   basic_block *body;
3646   unsigned int i;
3647   unsigned int count;
3648   regset live_across_iteration;
3649   regset used_in_loop;
3650   basic_block header, latch;
3651
3652   if (!loop)
3653     return 0;
3654
3655   gcc_assert (loop != loops_lsm.tree_root);
3656
3657   latch = loop->latch;
3658   /* don't handle multiple latches */
3659   if (!latch)
3660     return 0;
3661
3662   header = loop->header;
3663   gcc_assert (header);
3664
3665   live_across_iteration = ALLOC_REG_SET (&reg_obstack);
3666   COPY_REG_SET (live_across_iteration, DF_LR_IN (header));
3667   AND_REG_SET (live_across_iteration, DF_LR_OUT (latch));
3668
3669   used_in_loop = ALLOC_REG_SET (&reg_obstack);
3670   CLEAR_REG_SET (used_in_loop);
3671
3672   body = get_loop_body (loop);
3673   /* this loop computes a subset of must-use */
3674   for (i=0; i<loop->num_nodes; i++)
3675     {
3676        basic_block bb = *(body + i);
3677
3678        if (dominated_by_p (CDI_DOMINATORS, latch, bb))
3679          IOR_REG_SET (used_in_loop, &(DF_LR_BB_INFO (bb)->use));
3680     }
3681
3682
3683   AND_REG_SET (live_across_iteration, used_in_loop);
3684
3685   count = bitmap_count_bits (live_across_iteration);
3686
3687   FREE_REG_SET (live_across_iteration);
3688   FREE_REG_SET (used_in_loop);
3689
3690   return count;
3691 }
3692
3693 /* Find the number of load/store motion we can perform without
3694    create excessive register pressure to loop LOOP  */
3695
3696 static unsigned
3697 find_loop_lsm_limit (struct loop *loop)
3698 {
3699   void **slot;
3700   loop_lsm_limit_map t, *ret;
3701   unsigned reg_pressure;
3702   unsigned limit = maximum_lsm_limit;
3703
3704   if (!loop || loop == loops_lsm.tree_root)
3705     return (unsigned)-1;
3706
3707   t.loop = loop;
3708   slot = htab_find_slot (loop_lsm_limit_map_htab, &t, NO_INSERT);
3709   if (slot)
3710     return (*(loop_lsm_limit_map**)slot)->lsm_limit;
3711
3712   /* insert this loop and initialize it  */
3713   reg_pressure = estimate_reg_pressure_before_lsm (loop);
3714   ret = (loop_lsm_limit_map*) gmalloc (sizeof (loop_lsm_limit_map));
3715   gcc_assert (ret);
3716
3717   limit = (limit > reg_pressure ? limit - reg_pressure : 0);
3718   ret->loop = loop;
3719   ret->lsm_limit = limit;
3720   slot = htab_find_slot (loop_lsm_limit_map_htab, ret, INSERT);
3721   gcc_assert (slot);
3722   (*slot) = (void**)ret;
3723
3724   return limit;
3725 }
3726
3727 /* Adjust the lsm limit based on the instances that have been
3728    performed.  */
3729
3730 static void
3731 adjust_loop_lsm_limit (struct loop *loop, unsigned performed)
3732 {
3733   void **slot;
3734   loop_lsm_limit_map t;
3735   unsigned limit;
3736
3737   if (performed == 0 || loop == NULL || loop == loops_lsm.tree_root)
3738     return;
3739
3740   t.loop = loop;
3741   slot = htab_find_slot (loop_lsm_limit_map_htab, &t, NO_INSERT);
3742   gcc_assert (slot);
3743   limit = (*(loop_lsm_limit_map**)slot)->lsm_limit;
3744   gcc_assert (limit >= performed);
3745   (*(loop_lsm_limit_map**)slot)->lsm_limit = limit - performed;
3746 }
3747
3748 /* Remove any references that have been either invalidated or are not in the
3749    expression list for pre gcse.  */
3750
3751 static void
3752 trim_ld_motion_mems (void)
3753 {
3754   struct ls_expr * * last = & pre_ldst_mems;
3755   struct ls_expr * ptr = pre_ldst_mems;
3756
3757   while (ptr != NULL)
3758     {
3759       struct expr * expr;
3760
3761       /* Delete if entry has been made invalid.  */
3762       if (! ptr->invalid)
3763         {
3764           /* Delete if we cannot find this mem in the expression list.  */
3765           unsigned int hash = ptr->hash_index % expr_hash_table.size;
3766
3767           for (expr = expr_hash_table.table[hash];
3768                expr != NULL;
3769                expr = expr->next_same_hash)
3770             if (expr_equiv_p (expr->expr, ptr->pattern))
3771               break;
3772         }
3773       else
3774         expr = (struct expr *) 0;
3775
3776       if (expr)
3777         {
3778           /* Set the expression field if we are keeping it.  */
3779           ptr->expr = expr;
3780           last = & ptr->next;
3781           ptr = ptr->next;
3782         }
3783       else
3784         {
3785           *last = ptr->next;
3786           htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3787           free_ldst_entry (ptr);
3788           ptr = * last;
3789         }
3790     }
3791
3792   /* Show the world what we've found.  */
3793   if (dump_file && pre_ldst_mems != NULL)
3794     print_ldst_list (dump_file);
3795 }
3796
3797 /* This routine will take an expression which we are replacing with
3798    a reaching register, and update any stores that are needed if
3799    that expression is in the ld_motion list.  Stores are updated by
3800    copying their SRC to the reaching register, and then storing
3801    the reaching register into the store location. These keeps the
3802    correct value in the reaching register for the loads.  */
3803
3804 static void
3805 update_ld_motion_stores (struct expr * expr)
3806 {
3807   struct ls_expr * mem_ptr;
3808
3809   if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3810     {
3811       /* We can try to find just the REACHED stores, but is shouldn't
3812          matter to set the reaching reg everywhere...  some might be
3813          dead and should be eliminated later.  */
3814
3815       /* We replace (set mem expr) with (set reg expr) (set mem reg)
3816          where reg is the reaching reg used in the load.  We checked in
3817          compute_ld_motion_mems that we can replace (set mem expr) with
3818          (set reg expr) in that insn.  */
3819       rtx list = mem_ptr->stores;
3820
3821       for ( ; list != NULL_RTX; list = XEXP (list, 1))
3822         {
3823           rtx insn = XEXP (list, 0);
3824           rtx pat = PATTERN (insn);
3825           rtx src = SET_SRC (pat);
3826           rtx reg = expr->reaching_reg;
3827           rtx copy;
3828
3829           /* If we've already copied it, continue.  */
3830           if (expr->reaching_reg == src)
3831             continue;
3832
3833           if (dump_file)
3834             {
3835               fprintf (dump_file, "PRE:  store updated with reaching reg ");
3836               print_rtl (dump_file, reg);
3837               fprintf (dump_file, ":\n  ");
3838               print_inline_rtx (dump_file, insn, 8);
3839               fprintf (dump_file, "\n");
3840             }
3841
3842           copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3843           emit_insn_before (copy, insn);
3844           SET_SRC (pat) = reg;
3845           df_insn_rescan (insn);
3846
3847           /* un-recognize this pattern since it's probably different now.  */
3848           INSN_CODE (insn) = -1;
3849           gcse_create_count++;
3850         }
3851     }
3852 }
3853 \f
3854 /* Return true if the graph is too expensive to optimize. PASS is the
3855    optimization about to be performed.  */
3856
3857 static bool
3858 is_too_expensive (const char *pass)
3859 {
3860   /* Trying to perform global optimizations on flow graphs which have
3861      a high connectivity will take a long time and is unlikely to be
3862      particularly useful.
3863
3864      In normal circumstances a cfg should have about twice as many
3865      edges as blocks.  But we do not want to punish small functions
3866      which have a couple switch statements.  Rather than simply
3867      threshold the number of blocks, uses something with a more
3868      graceful degradation.  */
3869   if (n_edges > 20000 + n_basic_blocks * 4)
3870     {
3871       warning (OPT_Wdisabled_optimization,
3872                "%s: %d basic blocks and %d edges/basic block",
3873                pass, n_basic_blocks, n_edges / n_basic_blocks);
3874
3875       return true;
3876     }
3877
3878   /* If allocating memory for the dataflow bitmaps would take up too much
3879      storage it's better just to disable the optimization.  */
3880   if ((n_basic_blocks
3881        * SBITMAP_SET_SIZE (max_reg_num ())
3882        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3883     {
3884       warning (OPT_Wdisabled_optimization,
3885                "%s: %d basic blocks and %d registers",
3886                pass, n_basic_blocks, max_reg_num ());
3887
3888       return true;
3889     }
3890
3891   return false;
3892 }
3893 \f
3894 /* All the passes implemented in this file.  Each pass has its
3895    own gate and execute function, and at the end of the file a
3896    pass definition for passes.c.
3897
3898    We do not construct an accurate cfg in functions which call
3899    setjmp, so none of these passes runs if the function calls
3900    setjmp.
3901    FIXME: Should just handle setjmp via REG_SETJMP notes.  */
3902
3903 static bool
3904 gate_rtl_pre (void)
3905 {
3906   return optimize > 0 && flag_gcse
3907     && !cfun->calls_setjmp
3908     && optimize_function_for_speed_p (cfun)
3909     && dbg_cnt (pre);
3910 }
3911
3912 static unsigned int
3913 execute_rtl_pre (void)
3914 {
3915   int changed;
3916   delete_unreachable_blocks ();
3917   df_analyze ();
3918   changed = one_pre_gcse_pass ();
3919   flag_rerun_cse_after_global_opts |= changed;
3920   if (changed)
3921     cleanup_cfg (0);
3922   return 0;
3923 }
3924
3925 static bool
3926 gate_rtl_hoist (void)
3927 {
3928   return optimize > 0 && flag_gcse
3929     && !cfun->calls_setjmp
3930     /* It does not make sense to run code hoisting unless we are optimizing
3931        for code size -- it rarely makes programs faster, and can make then
3932        bigger if we did PRE (when optimizing for space, we don't run PRE).  */
3933     && optimize_function_for_size_p (cfun)
3934     && dbg_cnt (hoist);
3935 }
3936
3937 static unsigned int
3938 execute_rtl_hoist (void)
3939 {
3940   int changed;
3941   delete_unreachable_blocks ();
3942   df_analyze ();
3943   changed = one_code_hoisting_pass ();
3944   flag_rerun_cse_after_global_opts |= changed;
3945   if (changed)
3946     cleanup_cfg (0);
3947   return 0;
3948 }
3949
3950 struct rtl_opt_pass pass_rtl_pre =
3951 {
3952  {
3953   RTL_PASS,
3954   "rtl pre",                            /* name */
3955   gate_rtl_pre,                         /* gate */
3956   execute_rtl_pre,                      /* execute */
3957   NULL,                                 /* sub */
3958   NULL,                                 /* next */
3959   0,                                    /* static_pass_number */
3960   TV_PRE,                               /* tv_id */
3961   PROP_cfglayout,                       /* properties_required */
3962   0,                                    /* properties_provided */
3963   0,                                    /* properties_destroyed */
3964   0,                                    /* todo_flags_start */
3965   TODO_df_finish | TODO_verify_rtl_sharing |
3966   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3967  }
3968 };
3969
3970 struct rtl_opt_pass pass_rtl_hoist =
3971 {
3972  {
3973   RTL_PASS,
3974   "hoist",                              /* name */
3975   gate_rtl_hoist,                       /* gate */
3976   execute_rtl_hoist,                    /* execute */
3977   NULL,                                 /* sub */
3978   NULL,                                 /* next */
3979   0,                                    /* static_pass_number */
3980   TV_HOIST,                             /* tv_id */
3981   PROP_cfglayout,                       /* properties_required */
3982   0,                                    /* properties_provided */
3983   0,                                    /* properties_destroyed */
3984   0,                                    /* todo_flags_start */
3985   TODO_df_finish | TODO_verify_rtl_sharing |
3986   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3987  }
3988 };
3989
3990 #include "gt-gcse.h"