gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162 #include "params.h"
 163
 164 #include "obstack.h"
 165 #define obstack_chunk_alloc gmalloc
 166 #define obstack_chunk_free free
 167
 168 /* Maximum number of passes to perform.  */
 169 #define MAX_PASSES 1
 170
 171 /* Propagate flow information through back edges and thus enable PRE's
 172    moving loop invariant calculations out of loops.
 173
 174    Originally this tended to create worse overall code, but several
 175    improvements during the development of PRE seem to have made following
 176    back edges generally a win.
 177
 178    Note much of the loop invariant code motion done here would normally
 179    be done by loop.c, which has more heuristics for when to move invariants
 180    out of loops.  At some point we might need to move some of those
 181    heuristics into gcse.c.  */
 182 #define FOLLOW_BACK_EDGES 1
 183
 184 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 185    are a superset of those done by GCSE.
 186
 187    We perform the following steps:
 188
 189    1) Compute basic block information.
 190
 191    2) Compute table of places where registers are set.
 192
 193    3) Perform copy/constant propagation.
 194
 195    4) Perform global cse.
 196
 197    5) Perform another pass of copy/constant propagation.
 198
 199    Two passes of copy/constant propagation are done because the first one
 200    enables more GCSE and the second one helps to clean up the copies that
 201    GCSE creates.  This is needed more for PRE than for Classic because Classic
 202    GCSE will try to use an existing register containing the common
 203    subexpression rather than create a new one.  This is harder to do for PRE
 204    because of the code motion (which Classic GCSE doesn't do).
 205
 206    Expressions we are interested in GCSE-ing are of the form
 207    (set (pseudo-reg) (expression)).
 208    Function want_to_gcse_p says what these are.
 209
 210    PRE handles moving invariant expressions out of loops (by treating them as
 211    partially redundant).
 212
 213    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 214    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 215    (Rice University) on value numbering is a useful reference for this.
 216
 217    **********************
 218
 219    We used to support multiple passes but there are diminishing returns in
 220    doing so.  The first pass usually makes 90% of the changes that are doable.
 221    A second pass can make a few more changes made possible by the first pass.
 222    Experiments show any further passes don't make enough changes to justify
 223    the expense.
 224
 225    A study of spec92 using an unlimited number of passes:
 226    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 227    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 228    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 229
 230    It was found doing copy propagation between each pass enables further
 231    substitutions.
 232
 233    PRE is quite expensive in complicated functions because the DFA can take
 234    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 235    be modified if one wants to experiment.
 236
 237    **********************
 238
 239    The steps for PRE are:
 240
 241    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 242
 243    2) Perform the data flow analysis for PRE.
 244
 245    3) Delete the redundant instructions
 246
 247    4) Insert the required copies [if any] that make the partially
 248       redundant instructions fully redundant.
 249
 250    5) For other reaching expressions, insert an instruction to copy the value
 251       to a newly created pseudo that will reach the redundant instruction.
 252
 253    The deletion is done first so that when we do insertions we
 254    know which pseudo reg to use.
 255
 256    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 257    argue it is not.  The number of iterations for the algorithm to converge
 258    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 259
 260    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 261    we create.  To make an expression reach the place where it's redundant,
 262    the result of the expression is copied to a new register, and the redundant
 263    expression is deleted by replacing it with this new register.  Classic GCSE
 264    doesn't have this problem as much as it computes the reaching defs of
 265    each register in each block and thus can try to use an existing register.
 266
 267    **********************
 268
 269    A fair bit of simplicity is created by creating small functions for simple
 270    tasks, even when the function is only called in one place.  This may
 271    measurably slow things down [or may not] by creating more function call
 272    overhead than is necessary.  The source is laid out so that it's trivial
 273    to make the affected functions inline so that one can measure what speed
 274    up, if any, can be achieved, and maybe later when things settle things can
 275    be rearranged.
 276
 277    Help stamp out big monolithic functions!  */
 278 \f
 279 /* GCSE global vars.  */
 280
 281 /* -dG dump file.  */
 282 static FILE *gcse_file;
 283
 284 /* Note whether or not we should run jump optimization after gcse.  We
 285    want to do this for two cases.
 286
 287     * If we changed any jumps via cprop.
 288
 289     * If we added any labels via edge splitting.  */
 290
 291 static int run_jump_opt_after_gcse;
 292
 293 /* Bitmaps are normally not included in debugging dumps.
 294    However it's useful to be able to print them from GDB.
 295    We could create special functions for this, but it's simpler to
 296    just allow passing stderr to the dump_foo fns.  Since stderr can
 297    be a macro, we store a copy here.  */
 298 static FILE *debug_stderr;
 299
 300 /* An obstack for our working variables.  */
 301 static struct obstack gcse_obstack;
 302
 303 /* Non-zero for each mode that supports (set (reg) (reg)).
 304    This is trivially true for integer and floating point values.
 305    It may or may not be true for condition codes.  */
 306 static char can_copy_p[(int) NUM_MACHINE_MODES];
 307
 308 /* Non-zero if can_copy_p has been initialized.  */
 309 static int can_copy_init_p;
 310
 311 struct reg_use {rtx reg_rtx; };
 312
 313 /* Hash table of expressions.  */
 314
 315 struct expr
 316 {
 317   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 318   rtx expr;
 319   /* Index in the available expression bitmaps.  */
 320   int bitmap_index;
 321   /* Next entry with the same hash.  */
 322   struct expr *next_same_hash;
 323   /* List of anticipatable occurrences in basic blocks in the function.
 324      An "anticipatable occurrence" is one that is the first occurrence in the
 325      basic block, the operands are not modified in the basic block prior
 326      to the occurrence and the output is not used between the start of
 327      the block and the occurrence.  */
 328   struct occr *antic_occr;
 329   /* List of available occurrence in basic blocks in the function.
 330      An "available occurrence" is one that is the last occurrence in the
 331      basic block and the operands are not modified by following statements in
 332      the basic block [including this insn].  */
 333   struct occr *avail_occr;
 334   /* Non-null if the computation is PRE redundant.
 335      The value is the newly created pseudo-reg to record a copy of the
 336      expression in all the places that reach the redundant copy.  */
 337   rtx reaching_reg;
 338 };
 339
 340 /* Occurrence of an expression.
 341    There is one per basic block.  If a pattern appears more than once the
 342    last appearance is used [or first for anticipatable expressions].  */
 343
 344 struct occr
 345 {
 346   /* Next occurrence of this expression.  */
 347   struct occr *next;
 348   /* The insn that computes the expression.  */
 349   rtx insn;
 350   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 351   char deleted_p;
 352   /* Non-zero if this [available] occurrence has been copied to
 353      reaching_reg.  */
 354   /* ??? This is mutually exclusive with deleted_p, so they could share
 355      the same byte.  */
 356   char copied_p;
 357 };
 358
 359 /* Expression and copy propagation hash tables.
 360    Each hash table is an array of buckets.
 361    ??? It is known that if it were an array of entries, structure elements
 362    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 363    not clear whether in the final analysis a sufficient amount of memory would
 364    be saved as the size of the available expression bitmaps would be larger
 365    [one could build a mapping table without holes afterwards though].
 366    Someday I'll perform the computation and figure it out.  */
 367
 368 /* Total size of the expression hash table, in elements.  */
 369 static unsigned int expr_hash_table_size;
 370
 371 /* The table itself.
 372    This is an array of `expr_hash_table_size' elements.  */
 373 static struct expr **expr_hash_table;
 374
 375 /* Total size of the copy propagation hash table, in elements.  */
 376 static unsigned int set_hash_table_size;
 377
 378 /* The table itself.
 379    This is an array of `set_hash_table_size' elements.  */
 380 static struct expr **set_hash_table;
 381
 382 /* Mapping of uids to cuids.
 383    Only real insns get cuids.  */
 384 static int *uid_cuid;
 385
 386 /* Highest UID in UID_CUID.  */
 387 static int max_uid;
 388
 389 /* Get the cuid of an insn.  */
 390 #ifdef ENABLE_CHECKING
 391 #define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
 392 #else
 393 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 394 #endif
 395
 396 /* Number of cuids.  */
 397 static int max_cuid;
 398
 399 /* Mapping of cuids to insns.  */
 400 static rtx *cuid_insn;
 401
 402 /* Get insn from cuid.  */
 403 #define CUID_INSN(CUID) (cuid_insn[CUID])
 404
 405 /* Maximum register number in function prior to doing gcse + 1.
 406    Registers created during this pass have regno >= max_gcse_regno.
 407    This is named with "gcse" to not collide with global of same name.  */
 408 static unsigned int max_gcse_regno;
 409
 410 /* Maximum number of cse-able expressions found.  */
 411 static int n_exprs;
 412
 413 /* Maximum number of assignments for copy propagation found.  */
 414 static int n_sets;
 415
 416 /* Table of registers that are modified.
 417
 418    For each register, each element is a list of places where the pseudo-reg
 419    is set.
 420
 421    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 422    requires knowledge of which blocks kill which regs [and thus could use
 423    a bitmap instead of the lists `reg_set_table' uses].
 424
 425    `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
 426    num-regs) [however perhaps it may be useful to keep the data as is].  One
 427    advantage of recording things this way is that `reg_set_table' is fairly
 428    sparse with respect to pseudo regs but for hard regs could be fairly dense
 429    [relatively speaking].  And recording sets of pseudo-regs in lists speeds
 430    up functions like compute_transp since in the case of pseudo-regs we only
 431    need to iterate over the number of times a pseudo-reg is set, not over the
 432    number of basic blocks [clearly there is a bit of a slow down in the cases
 433    where a pseudo is set more than once in a block, however it is believed
 434    that the net effect is to speed things up].  This isn't done for hard-regs
 435    because recording call-clobbered hard-regs in `reg_set_table' at each
 436    function call can consume a fair bit of memory, and iterating over
 437    hard-regs stored this way in compute_transp will be more expensive.  */
 438
 439 typedef struct reg_set
 440 {
 441   /* The next setting of this register.  */
 442   struct reg_set *next;
 443   /* The insn where it was set.  */
 444   rtx insn;
 445 } reg_set;
 446
 447 static reg_set **reg_set_table;
 448
 449 /* Size of `reg_set_table'.
 450    The table starts out at max_gcse_regno + slop, and is enlarged as
 451    necessary.  */
 452 static int reg_set_table_size;
 453
 454 /* Amount to grow `reg_set_table' by when it's full.  */
 455 #define REG_SET_TABLE_SLOP 100
 456
 457 /* Bitmap containing one bit for each register in the program.
 458    Used when performing GCSE to track which registers have been set since
 459    the start of the basic block.  */
 460 static sbitmap reg_set_bitmap;
 461
 462 /* For each block, a bitmap of registers set in the block.
 463    This is used by expr_killed_p and compute_transp.
 464    It is computed during hash table computation and not by compute_sets
 465    as it includes registers added since the last pass (or between cprop and
 466    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 467 static sbitmap *reg_set_in_block;
 468
 469 /* For each block, non-zero if memory is set in that block.
 470    This is computed during hash table computation and is used by
 471    expr_killed_p and compute_transp.
 472    ??? Handling of memory is very simple, we don't make any attempt
 473    to optimize things (later).
 474    ??? This can be computed by compute_sets since the information
 475    doesn't change.  */
 476 static char *mem_set_in_block;
 477
 478 /* Various variables for statistics gathering.  */
 479
 480 /* Memory used in a pass.
 481    This isn't intended to be absolutely precise.  Its intent is only
 482    to keep an eye on memory usage.  */
 483 static int bytes_used;
 484
 485 /* GCSE substitutions made.  */
 486 static int gcse_subst_count;
 487 /* Number of copy instructions created.  */
 488 static int gcse_create_count;
 489 /* Number of constants propagated.  */
 490 static int const_prop_count;
 491 /* Number of copys propagated.  */
 492 static int copy_prop_count;
 493 \f
 494 /* These variables are used by classic GCSE.
 495    Normally they'd be defined a bit later, but `rd_gen' needs to
 496    be declared sooner.  */
 497
 498 /* Each block has a bitmap of each type.
 499    The length of each blocks bitmap is:
 500
 501        max_cuid  - for reaching definitions
 502        n_exprs - for available expressions
 503
 504    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 505    rd_kill[block_num][cuid_num]
 506    ae_kill[block_num][expr_num]                  */
 507
 508 /* For reaching defs */
 509 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 510
 511 /* for available exprs */
 512 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 513
 514 /* Objects of this type are passed around by the null-pointer check
 515    removal routines.  */
 516 struct null_pointer_info
 517 {
 518   /* The basic block being processed.  */
 519   int current_block;
 520   /* The first register to be handled in this pass.  */
 521   unsigned int min_reg;
 522   /* One greater than the last register to be handled in this pass.  */
 523   unsigned int max_reg;
 524   sbitmap *nonnull_local;
 525   sbitmap *nonnull_killed;
 526 };
 527 \f
 528 static void compute_can_copy    PARAMS ((void));
 529 static char *gmalloc            PARAMS ((unsigned int));
 530 static char *grealloc           PARAMS ((char *, unsigned int));
 531 static char *gcse_alloc         PARAMS ((unsigned long));
 532 static void alloc_gcse_mem      PARAMS ((rtx));
 533 static void free_gcse_mem       PARAMS ((void));
 534 static void alloc_reg_set_mem   PARAMS ((int));
 535 static void free_reg_set_mem    PARAMS ((void));
 536 static int get_bitmap_width     PARAMS ((int, int, int));
 537 static void record_one_set      PARAMS ((int, rtx));
 538 static void record_set_info     PARAMS ((rtx, rtx, void *));
 539 static void compute_sets        PARAMS ((rtx));
 540 static void hash_scan_insn      PARAMS ((rtx, int, int));
 541 static void hash_scan_set       PARAMS ((rtx, rtx, int));
 542 static void hash_scan_clobber   PARAMS ((rtx, rtx));
 543 static void hash_scan_call      PARAMS ((rtx, rtx));
 544 static int want_to_gcse_p       PARAMS ((rtx));
 545 static int oprs_unchanged_p     PARAMS ((rtx, rtx, int));
 546 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
 547 static int oprs_available_p     PARAMS ((rtx, rtx));
 548 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
 549                                           int, int));
 550 static void insert_set_in_table PARAMS ((rtx, rtx));
 551 static unsigned int hash_expr   PARAMS ((rtx, enum machine_mode, int *, int));
 552 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
 553 static unsigned int hash_string_1 PARAMS ((const char *));
 554 static unsigned int hash_set    PARAMS ((int, int));
 555 static int expr_equiv_p         PARAMS ((rtx, rtx));
 556 static void record_last_reg_set_info PARAMS ((rtx, int));
 557 static void record_last_mem_set_info PARAMS ((rtx));
 558 static void record_last_set_info PARAMS ((rtx, rtx, void *));
 559 static void compute_hash_table  PARAMS ((int));
 560 static void alloc_set_hash_table PARAMS ((int));
 561 static void free_set_hash_table PARAMS ((void));
 562 static void compute_set_hash_table PARAMS ((void));
 563 static void alloc_expr_hash_table PARAMS ((unsigned int));
 564 static void free_expr_hash_table PARAMS ((void));
 565 static void compute_expr_hash_table PARAMS ((void));
 566 static void dump_hash_table     PARAMS ((FILE *, const char *, struct expr **,
 567                                          int, int));
 568 static struct expr *lookup_expr PARAMS ((rtx));
 569 static struct expr *lookup_set  PARAMS ((unsigned int, rtx));
 570 static struct expr *next_set    PARAMS ((unsigned int, struct expr *));
 571 static void reset_opr_set_tables PARAMS ((void));
 572 static int oprs_not_set_p       PARAMS ((rtx, rtx));
 573 static void mark_call           PARAMS ((rtx));
 574 static void mark_set            PARAMS ((rtx, rtx));
 575 static void mark_clobber        PARAMS ((rtx, rtx));
 576 static void mark_oprs_set       PARAMS ((rtx));
 577 static void alloc_cprop_mem     PARAMS ((int, int));
 578 static void free_cprop_mem      PARAMS ((void));
 579 static void compute_transp      PARAMS ((rtx, int, sbitmap *, int));
 580 static void compute_transpout   PARAMS ((void));
 581 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
 582                                               int));
 583 static void compute_cprop_data  PARAMS ((void));
 584 static void find_used_regs      PARAMS ((rtx));
 585 static int try_replace_reg      PARAMS ((rtx, rtx, rtx));
 586 static struct expr *find_avail_set PARAMS ((int, rtx));
 587 static int cprop_jump           PARAMS ((rtx, rtx, struct reg_use *, rtx));
 588 #ifdef HAVE_cc0
 589 static int cprop_cc0_jump       PARAMS ((rtx, struct reg_use *, rtx));
 590 #endif
 591 static int cprop_insn           PARAMS ((rtx, int));
 592 static int cprop                PARAMS ((int));
 593 static int one_cprop_pass       PARAMS ((int, int));
 594 static void alloc_pre_mem       PARAMS ((int, int));
 595 static void free_pre_mem        PARAMS ((void));
 596 static void compute_pre_data    PARAMS ((void));
 597 static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
 598 static void insert_insn_end_bb  PARAMS ((struct expr *, int, int));
 599 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
 600 static void pre_insert_copies   PARAMS ((void));
 601 static int pre_delete           PARAMS ((void));
 602 static int pre_gcse             PARAMS ((void));
 603 static int one_pre_gcse_pass    PARAMS ((int));
 604 static void add_label_notes     PARAMS ((rtx, rtx));
 605 static void alloc_code_hoist_mem PARAMS ((int, int));
 606 static void free_code_hoist_mem PARAMS ((void));
 607 static void compute_code_hoist_vbeinout PARAMS ((void));
 608 static void compute_code_hoist_data PARAMS ((void));
 609 static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
 610 static void hoist_code          PARAMS ((void));
 611 static int one_code_hoisting_pass PARAMS ((void));
 612 static void alloc_rd_mem        PARAMS ((int, int));
 613 static void free_rd_mem         PARAMS ((void));
 614 static void handle_rd_kill_set  PARAMS ((rtx, int, int));
 615 static void compute_kill_rd     PARAMS ((void));
 616 static void compute_rd          PARAMS ((void));
 617 static void alloc_avail_expr_mem PARAMS ((int, int));
 618 static void free_avail_expr_mem PARAMS ((void));
 619 static void compute_ae_gen      PARAMS ((void));
 620 static int expr_killed_p        PARAMS ((rtx, int));
 621 static void compute_ae_kill     PARAMS ((sbitmap *, sbitmap *));
 622 static int expr_reaches_here_p  PARAMS ((struct occr *, struct expr *,
 623                                          int, int));
 624 static rtx computing_insn       PARAMS ((struct expr *, rtx));
 625 static int def_reaches_here_p   PARAMS ((rtx, rtx));
 626 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
 627 static int handle_avail_expr    PARAMS ((rtx, struct expr *));
 628 static int classic_gcse         PARAMS ((void));
 629 static int one_classic_gcse_pass PARAMS ((int));
 630 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
 631 static void delete_null_pointer_checks_1 PARAMS ((varray_type *, unsigned int *,
 632                                                   sbitmap *, sbitmap *,
 633                                                   struct null_pointer_info *));
 634 static rtx process_insert_insn  PARAMS ((struct expr *));
 635 static int pre_edge_insert      PARAMS ((struct edge_list *, struct expr **));
 636 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
 637                                              int, int, char *));
 638 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
 639                                                  int, char *));
 640 \f
 641 /* Entry point for global common subexpression elimination.
 642    F is the first instruction in the function.  */
 643
 644 int
 645 gcse_main (f, file)
 646      rtx f;
 647      FILE *file;
 648 {
 649   int changed, pass;
 650   /* Bytes used at start of pass.  */
 651   int initial_bytes_used;
 652   /* Maximum number of bytes used by a pass.  */
 653   int max_pass_bytes;
 654   /* Point to release obstack data from for each pass.  */
 655   char *gcse_obstack_bottom;
 656
 657   /* We do not construct an accurate cfg in functions which call
 658      setjmp, so just punt to be safe.  */
 659   if (current_function_calls_setjmp)
 660     return 0;
 661
 662   /* Assume that we do not need to run jump optimizations after gcse.  */
 663   run_jump_opt_after_gcse = 0;
 664
 665   /* For calling dump_foo fns from gdb.  */
 666   debug_stderr = stderr;
 667   gcse_file = file;
 668
 669   /* Identify the basic block information for this function, including
 670      successors and predecessors.  */
 671   max_gcse_regno = max_reg_num ();
 672
 673   if (file)
 674     dump_flow_info (file);
 675
 676   /* Return if there's nothing to do.  */
 677   if (n_basic_blocks <= 1)
 678     return 0;
 679
 680   /* Trying to perform global optimizations on flow graphs which have
 681      a high connectivity will take a long time and is unlikely to be
 682      particularly useful.
 683
 684      In normal circumstances a cfg should have about twice has many edges
 685      as blocks.  But we do not want to punish small functions which have
 686      a couple switch statements.  So we require a relatively large number
 687      of basic blocks and the ratio of edges to blocks to be high.  */
 688   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 689     {
 690       if (warn_disabled_optimization)
 691       warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
 692                n_basic_blocks, n_edges / n_basic_blocks);
 693       return 0;
 694     }
 695
 696   /* If allocating memory for the cprop bitmap would take up too much
 697      storage it's better just to disable the optimization.  */
 698   if ((n_basic_blocks
 699        * SBITMAP_SET_SIZE (max_gcse_regno)
 700        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
 701     {
 702       if (warn_disabled_optimization)
 703         warning ("GCSE disabled: %d basic blocks and %d registers",
 704                  n_basic_blocks, max_gcse_regno);
 705
 706       return 0;
 707     }
 708
 709   /* See what modes support reg/reg copy operations.  */
 710   if (! can_copy_init_p)
 711     {
 712       compute_can_copy ();
 713       can_copy_init_p = 1;
 714     }
 715
 716   gcc_obstack_init (&gcse_obstack);
 717   bytes_used = 0;
 718
 719   /* Record where pseudo-registers are set.  This data is kept accurate
 720      during each pass.  ??? We could also record hard-reg information here
 721      [since it's unchanging], however it is currently done during hash table
 722      computation.
 723
 724      It may be tempting to compute MEM set information here too, but MEM sets
 725      will be subject to code motion one day and thus we need to compute
 726      information about memory sets when we build the hash tables.  */
 727
 728   alloc_reg_set_mem (max_gcse_regno);
 729   compute_sets (f);
 730
 731   pass = 0;
 732   initial_bytes_used = bytes_used;
 733   max_pass_bytes = 0;
 734   gcse_obstack_bottom = gcse_alloc (1);
 735   changed = 1;
 736   while (changed && pass < MAX_PASSES)
 737     {
 738       changed = 0;
 739       if (file)
 740         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 741
 742       /* Initialize bytes_used to the space for the pred/succ lists,
 743          and the reg_set_table data.  */
 744       bytes_used = initial_bytes_used;
 745
 746       /* Each pass may create new registers, so recalculate each time.  */
 747       max_gcse_regno = max_reg_num ();
 748
 749       alloc_gcse_mem (f);
 750
 751       /* Don't allow constant propagation to modify jumps
 752          during this pass.  */
 753       changed = one_cprop_pass (pass + 1, 0);
 754
 755       if (optimize_size)
 756         changed |= one_classic_gcse_pass (pass + 1);
 757       else
 758         {
 759           changed |= one_pre_gcse_pass (pass + 1);
 760           free_reg_set_mem ();
 761           alloc_reg_set_mem (max_reg_num ());
 762           compute_sets (f);
 763           run_jump_opt_after_gcse = 1;
 764         }
 765
 766       if (max_pass_bytes < bytes_used)
 767         max_pass_bytes = bytes_used;
 768
 769       /* Free up memory, then reallocate for code hoisting.  We can
 770          not re-use the existing allocated memory because the tables
 771          will not have info for the insns or registers created by
 772          partial redundancy elimination.  */
 773       free_gcse_mem ();
 774
 775       /* It does not make sense to run code hoisting unless we optimizing
 776          for code size -- it rarely makes programs faster, and can make
 777          them bigger if we did partial redundancy elimination (when optimizing
 778          for space, we use a classic gcse algorithm instead of partial
 779          redundancy algorithms).  */
 780       if (optimize_size)
 781         {
 782           max_gcse_regno = max_reg_num ();
 783           alloc_gcse_mem (f);
 784           changed |= one_code_hoisting_pass ();
 785           free_gcse_mem ();
 786
 787           if (max_pass_bytes < bytes_used)
 788             max_pass_bytes = bytes_used;
 789         }
 790
 791       if (file)
 792         {
 793           fprintf (file, "\n");
 794           fflush (file);
 795         }
 796
 797       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 798       pass++;
 799     }
 800
 801   /* Do one last pass of copy propagation, including cprop into
 802      conditional jumps.  */
 803
 804   max_gcse_regno = max_reg_num ();
 805   alloc_gcse_mem (f);
 806   /* This time, go ahead and allow cprop to alter jumps.  */
 807   one_cprop_pass (pass + 1, 1);
 808   free_gcse_mem ();
 809
 810   if (file)
 811     {
 812       fprintf (file, "GCSE of %s: %d basic blocks, ",
 813                current_function_name, n_basic_blocks);
 814       fprintf (file, "%d pass%s, %d bytes\n\n",
 815                pass, pass > 1 ? "es" : "", max_pass_bytes);
 816     }
 817
 818   obstack_free (&gcse_obstack, NULL_PTR);
 819   free_reg_set_mem ();
 820   return run_jump_opt_after_gcse;
 821 }
 822 \f
 823 /* Misc. utilities.  */
 824
 825 /* Compute which modes support reg/reg copy operations.  */
 826
 827 static void
 828 compute_can_copy ()
 829 {
 830   int i;
 831 #ifndef AVOID_CCMODE_COPIES
 832   rtx reg,insn;
 833 #endif
 834   memset (can_copy_p, 0, NUM_MACHINE_MODES);
 835
 836   start_sequence ();
 837   for (i = 0; i < NUM_MACHINE_MODES; i++)
 838     if (GET_MODE_CLASS (i) == MODE_CC)
 839       {
 840 #ifdef AVOID_CCMODE_COPIES
 841         can_copy_p[i] = 0;
 842 #else
 843         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 844         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 845         if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 846           can_copy_p[i] = 1;
 847 #endif
 848       }
 849     else
 850       can_copy_p[i] = 1;
 851
 852   end_sequence ();
 853 }
 854 \f
 855 /* Cover function to xmalloc to record bytes allocated.  */
 856
 857 static char *
 858 gmalloc (size)
 859      unsigned int size;
 860 {
 861   bytes_used += size;
 862   return xmalloc (size);
 863 }
 864
 865 /* Cover function to xrealloc.
 866    We don't record the additional size since we don't know it.
 867    It won't affect memory usage stats much anyway.  */
 868
 869 static char *
 870 grealloc (ptr, size)
 871      char *ptr;
 872      unsigned int size;
 873 {
 874   return xrealloc (ptr, size);
 875 }
 876
 877 /* Cover function to obstack_alloc.
 878    We don't need to record the bytes allocated here since
 879    obstack_chunk_alloc is set to gmalloc.  */
 880
 881 static char *
 882 gcse_alloc (size)
 883      unsigned long size;
 884 {
 885   return (char *) obstack_alloc (&gcse_obstack, size);
 886 }
 887
 888 /* Allocate memory for the cuid mapping array,
 889    and reg/memory set tracking tables.
 890
 891    This is called at the start of each pass.  */
 892
 893 static void
 894 alloc_gcse_mem (f)
 895      rtx f;
 896 {
 897   int i,n;
 898   rtx insn;
 899
 900   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 901      CUIDs are like UIDs except they increase monotonically, have no gaps,
 902      and only apply to real insns.  */
 903
 904   max_uid = get_max_uid ();
 905   n = (max_uid + 1) * sizeof (int);
 906   uid_cuid = (int *) gmalloc (n);
 907   memset ((char *) uid_cuid, 0, n);
 908   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 909     {
 910       if (INSN_P (insn))
 911         uid_cuid[INSN_UID (insn)] = i++;
 912       else
 913         uid_cuid[INSN_UID (insn)] = i;
 914     }
 915
 916   /* Create a table mapping cuids to insns.  */
 917
 918   max_cuid = i;
 919   n = (max_cuid + 1) * sizeof (rtx);
 920   cuid_insn = (rtx *) gmalloc (n);
 921   memset ((char *) cuid_insn, 0, n);
 922   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 923     if (INSN_P (insn))
 924       CUID_INSN (i++) = insn;
 925
 926   /* Allocate vars to track sets of regs.  */
 927   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 928
 929   /* Allocate vars to track sets of regs, memory per block.  */
 930   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 931                                                        max_gcse_regno);
 932   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 933 }
 934
 935 /* Free memory allocated by alloc_gcse_mem.  */
 936
 937 static void
 938 free_gcse_mem ()
 939 {
 940   free (uid_cuid);
 941   free (cuid_insn);
 942
 943   free (reg_set_bitmap);
 944
 945   free (reg_set_in_block);
 946   free (mem_set_in_block);
 947 }
 948
 949 /* Many of the global optimization algorithms work by solving dataflow
 950    equations for various expressions.  Initially, some local value is
 951    computed for each expression in each block.  Then, the values across the
 952    various blocks are combined (by following flow graph edges) to arrive at
 953    global values.  Conceptually, each set of equations is independent.  We
 954    may therefore solve all the equations in parallel, solve them one at a
 955    time, or pick any intermediate approach.
 956
 957    When you're going to need N two-dimensional bitmaps, each X (say, the
 958    number of blocks) by Y (say, the number of expressions), call this
 959    function.  It's not important what X and Y represent; only that Y
 960    correspond to the things that can be done in parallel.  This function will
 961    return an appropriate chunking factor C; you should solve C sets of
 962    equations in parallel.  By going through this function, we can easily
 963    trade space against time; by solving fewer equations in parallel we use
 964    less space.  */
 965
 966 static int
 967 get_bitmap_width (n, x, y)
 968      int n;
 969      int x;
 970      int y;
 971 {
 972   /* It's not really worth figuring out *exactly* how much memory will
 973      be used by a particular choice.  The important thing is to get
 974      something approximately right.  */
 975   size_t max_bitmap_memory = 10 * 1024 * 1024;
 976
 977   /* The number of bytes we'd use for a single column of minimum
 978      width.  */
 979   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 980
 981   /* Often, it's reasonable just to solve all the equations in
 982      parallel.  */
 983   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
 984     return y;
 985
 986   /* Otherwise, pick the largest width we can, without going over the
 987      limit.  */
 988   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
 989                              / column_size);
 990 }
 991 \f
 992 /* Compute the local properties of each recorded expression.
 993
 994    Local properties are those that are defined by the block, irrespective of
 995    other blocks.
 996
 997    An expression is transparent in a block if its operands are not modified
 998    in the block.
 999
1000    An expression is computed (locally available) in a block if it is computed
1001    at least once and expression would contain the same value if the
1002    computation was moved to the end of the block.
1003
1004    An expression is locally anticipatable in a block if it is computed at
1005    least once and expression would contain the same value if the computation
1006    was moved to the beginning of the block.
1007
1008    We call this routine for cprop, pre and code hoisting.  They all compute
1009    basically the same information and thus can easily share this code.
1010
1011    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1012    properties.  If NULL, then it is not necessary to compute or record that
1013    particular property.
1014
1015    SETP controls which hash table to look at.  If zero, this routine looks at
1016    the expr hash table; if nonzero this routine looks at the set hash table.
1017    Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1018    ABSALTERED.  */
1019
1020 static void
1021 compute_local_properties (transp, comp, antloc, setp)
1022      sbitmap *transp;
1023      sbitmap *comp;
1024      sbitmap *antloc;
1025      int setp;
1026 {
1027   unsigned int i, hash_table_size;
1028   struct expr **hash_table;
1029
1030   /* Initialize any bitmaps that were passed in.  */
1031   if (transp)
1032     {
1033       if (setp)
1034         sbitmap_vector_zero (transp, n_basic_blocks);
1035       else
1036         sbitmap_vector_ones (transp, n_basic_blocks);
1037     }
1038
1039   if (comp)
1040     sbitmap_vector_zero (comp, n_basic_blocks);
1041   if (antloc)
1042     sbitmap_vector_zero (antloc, n_basic_blocks);
1043
1044   /* We use the same code for cprop, pre and hoisting.  For cprop
1045      we care about the set hash table, for pre and hoisting we
1046      care about the expr hash table.  */
1047   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1048   hash_table = setp ? set_hash_table : expr_hash_table;
1049
1050   for (i = 0; i < hash_table_size; i++)
1051     {
1052       struct expr *expr;
1053
1054       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1055         {
1056           int indx = expr->bitmap_index;
1057           struct occr *occr;
1058
1059           /* The expression is transparent in this block if it is not killed.
1060              We start by assuming all are transparent [none are killed], and
1061              then reset the bits for those that are.  */
1062           if (transp)
1063             compute_transp (expr->expr, indx, transp, setp);
1064
1065           /* The occurrences recorded in antic_occr are exactly those that
1066              we want to set to non-zero in ANTLOC.  */
1067           if (antloc)
1068             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1069               {
1070                 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1071
1072                 /* While we're scanning the table, this is a good place to
1073                    initialize this.  */
1074                 occr->deleted_p = 0;
1075               }
1076
1077           /* The occurrences recorded in avail_occr are exactly those that
1078              we want to set to non-zero in COMP.  */
1079           if (comp)
1080             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1081               {
1082                 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1083
1084                 /* While we're scanning the table, this is a good place to
1085                    initialize this.  */
1086                 occr->copied_p = 0;
1087               }
1088
1089           /* While we're scanning the table, this is a good place to
1090              initialize this.  */
1091           expr->reaching_reg = 0;
1092         }
1093     }
1094 }
1095 \f
1096 /* Register set information.
1097
1098    `reg_set_table' records where each register is set or otherwise
1099    modified.  */
1100
1101 static struct obstack reg_set_obstack;
1102
1103 static void
1104 alloc_reg_set_mem (n_regs)
1105      int n_regs;
1106 {
1107   unsigned int n;
1108
1109   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1110   n = reg_set_table_size * sizeof (struct reg_set *);
1111   reg_set_table = (struct reg_set **) gmalloc (n);
1112   memset ((char *) reg_set_table, 0, n);
1113
1114   gcc_obstack_init (&reg_set_obstack);
1115 }
1116
1117 static void
1118 free_reg_set_mem ()
1119 {
1120   free (reg_set_table);
1121   obstack_free (&reg_set_obstack, NULL_PTR);
1122 }
1123
1124 /* Record REGNO in the reg_set table.  */
1125
1126 static void
1127 record_one_set (regno, insn)
1128      int regno;
1129      rtx insn;
1130 {
1131   /* allocate a new reg_set element and link it onto the list */
1132   struct reg_set *new_reg_info;
1133
1134   /* If the table isn't big enough, enlarge it.  */
1135   if (regno >= reg_set_table_size)
1136     {
1137       int new_size = regno + REG_SET_TABLE_SLOP;
1138
1139       reg_set_table
1140         = (struct reg_set **) grealloc ((char *) reg_set_table,
1141                                         new_size * sizeof (struct reg_set *));
1142       memset ((char *) (reg_set_table + reg_set_table_size), 0,
1143              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1144       reg_set_table_size = new_size;
1145     }
1146
1147   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1148                                                    sizeof (struct reg_set));
1149   bytes_used += sizeof (struct reg_set);
1150   new_reg_info->insn = insn;
1151   new_reg_info->next = reg_set_table[regno];
1152   reg_set_table[regno] = new_reg_info;
1153 }
1154
1155 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1156    an insn.  The DATA is really the instruction in which the SET is
1157    occurring.  */
1158
1159 static void
1160 record_set_info (dest, setter, data)
1161      rtx dest, setter ATTRIBUTE_UNUSED;
1162      void *data;
1163 {
1164   rtx record_set_insn = (rtx) data;
1165
1166   if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1167     record_one_set (REGNO (dest), record_set_insn);
1168 }
1169
1170 /* Scan the function and record each set of each pseudo-register.
1171
1172    This is called once, at the start of the gcse pass.  See the comments for
1173    `reg_set_table' for further documenation.  */
1174
1175 static void
1176 compute_sets (f)
1177      rtx f;
1178 {
1179   rtx insn;
1180
1181   for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1182     if (INSN_P (insn))
1183       note_stores (PATTERN (insn), record_set_info, insn);
1184 }
1185 \f
1186 /* Hash table support.  */
1187
1188 /* For each register, the cuid of the first/last insn in the block to set it,
1189    or -1 if not set.  */
1190 #define NEVER_SET -1
1191 static int *reg_first_set;
1192 static int *reg_last_set;
1193
1194 /* While computing "first/last set" info, this is the CUID of first/last insn
1195    to set memory or -1 if not set.  `mem_last_set' is also used when
1196    performing GCSE to record whether memory has been set since the beginning
1197    of the block.
1198
1199    Note that handling of memory is very simple, we don't make any attempt
1200    to optimize things (later).  */
1201 static int mem_first_set;
1202 static int mem_last_set;
1203
1204 /* Perform a quick check whether X, the source of a set, is something
1205    we want to consider for GCSE.  */
1206
1207 static int
1208 want_to_gcse_p (x)
1209      rtx x;
1210 {
1211   switch (GET_CODE (x))
1212     {
1213     case REG:
1214     case SUBREG:
1215     case CONST_INT:
1216     case CONST_DOUBLE:
1217     case CALL:
1218       return 0;
1219
1220     default:
1221       break;
1222     }
1223
1224   return 1;
1225 }
1226
1227 /* Return non-zero if the operands of expression X are unchanged from the
1228    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1229    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1230
1231 static int
1232 oprs_unchanged_p (x, insn, avail_p)
1233      rtx x, insn;
1234      int avail_p;
1235 {
1236   int i, j;
1237   enum rtx_code code;
1238   const char *fmt;
1239
1240   if (x == 0)
1241     return 1;
1242
1243   code = GET_CODE (x);
1244   switch (code)
1245     {
1246     case REG:
1247       if (avail_p)
1248         return (reg_last_set[REGNO (x)] == NEVER_SET
1249                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1250       else
1251         return (reg_first_set[REGNO (x)] == NEVER_SET
1252                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1253
1254     case MEM:
1255       if (avail_p && mem_last_set != NEVER_SET
1256           && mem_last_set >= INSN_CUID (insn))
1257         return 0;
1258       else if (! avail_p && mem_first_set != NEVER_SET
1259                && mem_first_set < INSN_CUID (insn))
1260         return 0;
1261       else
1262         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1263
1264     case PRE_DEC:
1265     case PRE_INC:
1266     case POST_DEC:
1267     case POST_INC:
1268     case PRE_MODIFY:
1269     case POST_MODIFY:
1270       return 0;
1271
1272     case PC:
1273     case CC0: /*FIXME*/
1274     case CONST:
1275     case CONST_INT:
1276     case CONST_DOUBLE:
1277     case SYMBOL_REF:
1278     case LABEL_REF:
1279     case ADDR_VEC:
1280     case ADDR_DIFF_VEC:
1281       return 1;
1282
1283     default:
1284       break;
1285     }
1286
1287   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1288     {
1289       if (fmt[i] == 'e')
1290         {
1291           /* If we are about to do the last recursive call needed at this
1292              level, change it into iteration.  This function is called enough
1293              to be worth it.  */
1294           if (i == 0)
1295             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1296
1297           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1298             return 0;
1299         }
1300       else if (fmt[i] == 'E')
1301         for (j = 0; j < XVECLEN (x, i); j++)
1302           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1303             return 0;
1304     }
1305
1306   return 1;
1307 }
1308
1309 /* Return non-zero if the operands of expression X are unchanged from
1310    the start of INSN's basic block up to but not including INSN.  */
1311
1312 static int
1313 oprs_anticipatable_p (x, insn)
1314      rtx x, insn;
1315 {
1316   return oprs_unchanged_p (x, insn, 0);
1317 }
1318
1319 /* Return non-zero if the operands of expression X are unchanged from
1320    INSN to the end of INSN's basic block.  */
1321
1322 static int
1323 oprs_available_p (x, insn)
1324      rtx x, insn;
1325 {
1326   return oprs_unchanged_p (x, insn, 1);
1327 }
1328
1329 /* Hash expression X.
1330
1331    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1332    indicating if a volatile operand is found or if the expression contains
1333    something we don't want to insert in the table.
1334
1335    ??? One might want to merge this with canon_hash.  Later.  */
1336
1337 static unsigned int
1338 hash_expr (x, mode, do_not_record_p, hash_table_size)
1339      rtx x;
1340      enum machine_mode mode;
1341      int *do_not_record_p;
1342      int hash_table_size;
1343 {
1344   unsigned int hash;
1345
1346   *do_not_record_p = 0;
1347
1348   hash = hash_expr_1 (x, mode, do_not_record_p);
1349   return hash % hash_table_size;
1350 }
1351 /* Hash a string.  Just add its bytes up.  */
1352 static inline unsigned
1353 hash_string_1 (ps)
1354      const char *ps;
1355 {
1356   unsigned hash = 0;
1357   const unsigned char *p = (const unsigned char *)ps;
1358
1359   if (p)
1360     while (*p)
1361       hash += *p++;
1362
1363   return hash;
1364 }
1365
1366 /* Subroutine of hash_expr to do the actual work.  */
1367
1368 static unsigned int
1369 hash_expr_1 (x, mode, do_not_record_p)
1370      rtx x;
1371      enum machine_mode mode;
1372      int *do_not_record_p;
1373 {
1374   int i, j;
1375   unsigned hash = 0;
1376   enum rtx_code code;
1377   const char *fmt;
1378
1379   /* Used to turn recursion into iteration.  We can't rely on GCC's
1380      tail-recursion eliminatio since we need to keep accumulating values
1381      in HASH.  */
1382
1383   if (x == 0)
1384     return hash;
1385
1386  repeat:
1387   code = GET_CODE (x);
1388   switch (code)
1389     {
1390     case REG:
1391       hash += ((unsigned int) REG << 7) + REGNO (x);
1392       return hash;
1393
1394     case CONST_INT:
1395       hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1396                + (unsigned int) INTVAL (x));
1397       return hash;
1398
1399     case CONST_DOUBLE:
1400       /* This is like the general case, except that it only counts
1401          the integers representing the constant.  */
1402       hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1403       if (GET_MODE (x) != VOIDmode)
1404         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1405           hash += (unsigned int) XWINT (x, i);
1406       else
1407         hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1408                  + (unsigned int) CONST_DOUBLE_HIGH (x));
1409       return hash;
1410
1411       /* Assume there is only one rtx object for any given label.  */
1412     case LABEL_REF:
1413       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1414          differences and differences between each stage's debugging dumps.  */
1415       hash += (((unsigned int) LABEL_REF << 7)
1416                + CODE_LABEL_NUMBER (XEXP (x, 0)));
1417       return hash;
1418
1419     case SYMBOL_REF:
1420       {
1421         /* Don't hash on the symbol's address to avoid bootstrap differences.
1422            Different hash values may cause expressions to be recorded in
1423            different orders and thus different registers to be used in the
1424            final assembler.  This also avoids differences in the dump files
1425            between various stages.  */
1426         unsigned int h = 0;
1427         const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1428
1429         while (*p)
1430           h += (h << 7) + *p++; /* ??? revisit */
1431
1432         hash += ((unsigned int) SYMBOL_REF << 7) + h;
1433         return hash;
1434       }
1435
1436     case MEM:
1437       if (MEM_VOLATILE_P (x))
1438         {
1439           *do_not_record_p = 1;
1440           return 0;
1441         }
1442
1443       hash += (unsigned int) MEM;
1444       hash += MEM_ALIAS_SET (x);
1445       x = XEXP (x, 0);
1446       goto repeat;
1447
1448     case PRE_DEC:
1449     case PRE_INC:
1450     case POST_DEC:
1451     case POST_INC:
1452     case PC:
1453     case CC0:
1454     case CALL:
1455     case UNSPEC_VOLATILE:
1456       *do_not_record_p = 1;
1457       return 0;
1458
1459     case ASM_OPERANDS:
1460       if (MEM_VOLATILE_P (x))
1461         {
1462           *do_not_record_p = 1;
1463           return 0;
1464         }
1465       else
1466         {
1467           /* We don't want to take the filename and line into account.  */
1468           hash += (unsigned) code + (unsigned) GET_MODE (x)
1469             + hash_string_1 (ASM_OPERANDS_TEMPLATE (x))
1470             + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x))
1471             + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x);
1472
1473           if (ASM_OPERANDS_INPUT_LENGTH (x))
1474             {
1475               for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++)
1476                 {
1477                   hash += (hash_expr_1 (ASM_OPERANDS_INPUT (x, i),
1478                                         GET_MODE (ASM_OPERANDS_INPUT (x, i)),
1479                                         do_not_record_p)
1480                            + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT
1481                                             (x, i)));
1482                 }
1483
1484               hash += hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0));
1485               x = ASM_OPERANDS_INPUT (x, 0);
1486               mode = GET_MODE (x);
1487               goto repeat;
1488             }
1489           return hash;
1490         }
1491
1492     default:
1493       break;
1494     }
1495
1496   hash += (unsigned) code + (unsigned) GET_MODE (x);
1497   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1498     {
1499       if (fmt[i] == 'e')
1500         {
1501           /* If we are about to do the last recursive call
1502              needed at this level, change it into iteration.
1503              This function is called enough to be worth it.  */
1504           if (i == 0)
1505             {
1506               x = XEXP (x, i);
1507               goto repeat;
1508             }
1509
1510           hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1511           if (*do_not_record_p)
1512             return 0;
1513         }
1514
1515       else if (fmt[i] == 'E')
1516         for (j = 0; j < XVECLEN (x, i); j++)
1517           {
1518             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1519             if (*do_not_record_p)
1520               return 0;
1521           }
1522
1523       else if (fmt[i] == 's')
1524         hash += hash_string_1 (XSTR (x, i));
1525       else if (fmt[i] == 'i')
1526         hash += (unsigned int) XINT (x, i);
1527       else
1528         abort ();
1529     }
1530
1531   return hash;
1532 }
1533
1534 /* Hash a set of register REGNO.
1535
1536    Sets are hashed on the register that is set.  This simplifies the PRE copy
1537    propagation code.
1538
1539    ??? May need to make things more elaborate.  Later, as necessary.  */
1540
1541 static unsigned int
1542 hash_set (regno, hash_table_size)
1543      int regno;
1544      int hash_table_size;
1545 {
1546   unsigned int hash;
1547
1548   hash = regno;
1549   return hash % hash_table_size;
1550 }
1551
1552 /* Return non-zero if exp1 is equivalent to exp2.
1553    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1554
1555 static int
1556 expr_equiv_p (x, y)
1557      rtx x, y;
1558 {
1559   register int i, j;
1560   register enum rtx_code code;
1561   register const char *fmt;
1562
1563   if (x == y)
1564     return 1;
1565
1566   if (x == 0 || y == 0)
1567     return x == y;
1568
1569   code = GET_CODE (x);
1570   if (code != GET_CODE (y))
1571     return 0;
1572
1573   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1574   if (GET_MODE (x) != GET_MODE (y))
1575     return 0;
1576
1577   switch (code)
1578     {
1579     case PC:
1580     case CC0:
1581       return x == y;
1582
1583     case CONST_INT:
1584       return INTVAL (x) == INTVAL (y);
1585
1586     case LABEL_REF:
1587       return XEXP (x, 0) == XEXP (y, 0);
1588
1589     case SYMBOL_REF:
1590       return XSTR (x, 0) == XSTR (y, 0);
1591
1592     case REG:
1593       return REGNO (x) == REGNO (y);
1594
1595     case MEM:
1596       /* Can't merge two expressions in different alias sets, since we can
1597          decide that the expression is transparent in a block when it isn't,
1598          due to it being set with the different alias set.  */
1599       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1600         return 0;
1601       break;
1602
1603     /*  For commutative operations, check both orders.  */
1604     case PLUS:
1605     case MULT:
1606     case AND:
1607     case IOR:
1608     case XOR:
1609     case NE:
1610     case EQ:
1611       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1612                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1613               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1614                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1615
1616     case ASM_OPERANDS:
1617       /* We don't use the generic code below because we want to
1618          disregard filename and line numbers.  */
1619
1620       /* A volatile asm isn't equivalent to any other.  */
1621       if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y))
1622         return 0;
1623
1624       if (GET_MODE (x) != GET_MODE (y)
1625           || strcmp (ASM_OPERANDS_TEMPLATE (x), ASM_OPERANDS_TEMPLATE (y))
1626           || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x),
1627                      ASM_OPERANDS_OUTPUT_CONSTRAINT (y))
1628           || ASM_OPERANDS_OUTPUT_IDX (x) != ASM_OPERANDS_OUTPUT_IDX (y)
1629           || ASM_OPERANDS_INPUT_LENGTH (x) != ASM_OPERANDS_INPUT_LENGTH (y))
1630         return 0;
1631
1632       if (ASM_OPERANDS_INPUT_LENGTH (x))
1633         {
1634           for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
1635             if (! expr_equiv_p (ASM_OPERANDS_INPUT (x, i),
1636                                 ASM_OPERANDS_INPUT (y, i))
1637                 || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x, i),
1638                            ASM_OPERANDS_INPUT_CONSTRAINT (y, i)))
1639               return 0;
1640         }
1641
1642       return 1;
1643
1644     default:
1645       break;
1646     }
1647
1648   /* Compare the elements.  If any pair of corresponding elements
1649      fail to match, return 0 for the whole thing.  */
1650
1651   fmt = GET_RTX_FORMAT (code);
1652   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1653     {
1654       switch (fmt[i])
1655         {
1656         case 'e':
1657           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1658             return 0;
1659           break;
1660
1661         case 'E':
1662           if (XVECLEN (x, i) != XVECLEN (y, i))
1663             return 0;
1664           for (j = 0; j < XVECLEN (x, i); j++)
1665             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1666               return 0;
1667           break;
1668
1669         case 's':
1670           if (strcmp (XSTR (x, i), XSTR (y, i)))
1671             return 0;
1672           break;
1673
1674         case 'i':
1675           if (XINT (x, i) != XINT (y, i))
1676             return 0;
1677           break;
1678
1679         case 'w':
1680           if (XWINT (x, i) != XWINT (y, i))
1681             return 0;
1682         break;
1683
1684         case '0':
1685           break;
1686
1687         default:
1688           abort ();
1689         }
1690       }
1691
1692   return 1;
1693 }
1694
1695 /* Insert expression X in INSN in the hash table.
1696    If it is already present, record it as the last occurrence in INSN's
1697    basic block.
1698
1699    MODE is the mode of the value X is being stored into.
1700    It is only used if X is a CONST_INT.
1701
1702    ANTIC_P is non-zero if X is an anticipatable expression.
1703    AVAIL_P is non-zero if X is an available expression.  */
1704
1705 static void
1706 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1707      rtx x;
1708      enum machine_mode mode;
1709      rtx insn;
1710      int antic_p, avail_p;
1711 {
1712   int found, do_not_record_p;
1713   unsigned int hash;
1714   struct expr *cur_expr, *last_expr = NULL;
1715   struct occr *antic_occr, *avail_occr;
1716   struct occr *last_occr = NULL;
1717
1718   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1719
1720   /* Do not insert expression in table if it contains volatile operands,
1721      or if hash_expr determines the expression is something we don't want
1722      to or can't handle.  */
1723   if (do_not_record_p)
1724     return;
1725
1726   cur_expr = expr_hash_table[hash];
1727   found = 0;
1728
1729   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1730     {
1731       /* If the expression isn't found, save a pointer to the end of
1732          the list.  */
1733       last_expr = cur_expr;
1734       cur_expr = cur_expr->next_same_hash;
1735     }
1736
1737   if (! found)
1738     {
1739       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1740       bytes_used += sizeof (struct expr);
1741       if (expr_hash_table[hash] == NULL)
1742         /* This is the first pattern that hashed to this index.  */
1743         expr_hash_table[hash] = cur_expr;
1744       else
1745         /* Add EXPR to end of this hash chain.  */
1746         last_expr->next_same_hash = cur_expr;
1747
1748       /* Set the fields of the expr element.  */
1749       cur_expr->expr = x;
1750       cur_expr->bitmap_index = n_exprs++;
1751       cur_expr->next_same_hash = NULL;
1752       cur_expr->antic_occr = NULL;
1753       cur_expr->avail_occr = NULL;
1754     }
1755
1756   /* Now record the occurrence(s).  */
1757   if (antic_p)
1758     {
1759       antic_occr = cur_expr->antic_occr;
1760
1761       /* Search for another occurrence in the same basic block.  */
1762       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1763         {
1764           /* If an occurrence isn't found, save a pointer to the end of
1765              the list.  */
1766           last_occr = antic_occr;
1767           antic_occr = antic_occr->next;
1768         }
1769
1770       if (antic_occr)
1771         /* Found another instance of the expression in the same basic block.
1772            Prefer the currently recorded one.  We want the first one in the
1773            block and the block is scanned from start to end.  */
1774         ; /* nothing to do */
1775       else
1776         {
1777           /* First occurrence of this expression in this basic block.  */
1778           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1779           bytes_used += sizeof (struct occr);
1780           /* First occurrence of this expression in any block?  */
1781           if (cur_expr->antic_occr == NULL)
1782             cur_expr->antic_occr = antic_occr;
1783           else
1784             last_occr->next = antic_occr;
1785
1786           antic_occr->insn = insn;
1787           antic_occr->next = NULL;
1788         }
1789     }
1790
1791   if (avail_p)
1792     {
1793       avail_occr = cur_expr->avail_occr;
1794
1795       /* Search for another occurrence in the same basic block.  */
1796       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1797         {
1798           /* If an occurrence isn't found, save a pointer to the end of
1799              the list.  */
1800           last_occr = avail_occr;
1801           avail_occr = avail_occr->next;
1802         }
1803
1804       if (avail_occr)
1805         /* Found another instance of the expression in the same basic block.
1806            Prefer this occurrence to the currently recorded one.  We want
1807            the last one in the block and the block is scanned from start
1808            to end.  */
1809         avail_occr->insn = insn;
1810       else
1811         {
1812           /* First occurrence of this expression in this basic block.  */
1813           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1814           bytes_used += sizeof (struct occr);
1815
1816           /* First occurrence of this expression in any block?  */
1817           if (cur_expr->avail_occr == NULL)
1818             cur_expr->avail_occr = avail_occr;
1819           else
1820             last_occr->next = avail_occr;
1821
1822           avail_occr->insn = insn;
1823           avail_occr->next = NULL;
1824         }
1825     }
1826 }
1827
1828 /* Insert pattern X in INSN in the hash table.
1829    X is a SET of a reg to either another reg or a constant.
1830    If it is already present, record it as the last occurrence in INSN's
1831    basic block.  */
1832
1833 static void
1834 insert_set_in_table (x, insn)
1835      rtx x;
1836      rtx insn;
1837 {
1838   int found;
1839   unsigned int hash;
1840   struct expr *cur_expr, *last_expr = NULL;
1841   struct occr *cur_occr, *last_occr = NULL;
1842
1843   if (GET_CODE (x) != SET
1844       || GET_CODE (SET_DEST (x)) != REG)
1845     abort ();
1846
1847   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1848
1849   cur_expr = set_hash_table[hash];
1850   found = 0;
1851
1852   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1853     {
1854       /* If the expression isn't found, save a pointer to the end of
1855          the list.  */
1856       last_expr = cur_expr;
1857       cur_expr = cur_expr->next_same_hash;
1858     }
1859
1860   if (! found)
1861     {
1862       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1863       bytes_used += sizeof (struct expr);
1864       if (set_hash_table[hash] == NULL)
1865         /* This is the first pattern that hashed to this index.  */
1866         set_hash_table[hash] = cur_expr;
1867       else
1868         /* Add EXPR to end of this hash chain.  */
1869         last_expr->next_same_hash = cur_expr;
1870
1871       /* Set the fields of the expr element.
1872          We must copy X because it can be modified when copy propagation is
1873          performed on its operands.  */
1874       /* ??? Should this go in a different obstack?  */
1875       cur_expr->expr = copy_rtx (x);
1876       cur_expr->bitmap_index = n_sets++;
1877       cur_expr->next_same_hash = NULL;
1878       cur_expr->antic_occr = NULL;
1879       cur_expr->avail_occr = NULL;
1880     }
1881
1882   /* Now record the occurrence.  */
1883   cur_occr = cur_expr->avail_occr;
1884
1885   /* Search for another occurrence in the same basic block.  */
1886   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1887     {
1888       /* If an occurrence isn't found, save a pointer to the end of
1889          the list.  */
1890       last_occr = cur_occr;
1891       cur_occr = cur_occr->next;
1892     }
1893
1894   if (cur_occr)
1895     /* Found another instance of the expression in the same basic block.
1896        Prefer this occurrence to the currently recorded one.  We want the
1897        last one in the block and the block is scanned from start to end.  */
1898     cur_occr->insn = insn;
1899   else
1900     {
1901       /* First occurrence of this expression in this basic block.  */
1902       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1903       bytes_used += sizeof (struct occr);
1904
1905       /* First occurrence of this expression in any block?  */
1906       if (cur_expr->avail_occr == NULL)
1907         cur_expr->avail_occr = cur_occr;
1908       else
1909         last_occr->next = cur_occr;
1910
1911       cur_occr->insn = insn;
1912       cur_occr->next = NULL;
1913     }
1914 }
1915
1916 /* Scan pattern PAT of INSN and add an entry to the hash table.  If SET_P is
1917    non-zero, this is for the assignment hash table, otherwise it is for the
1918    expression hash table.  */
1919
1920 static void
1921 hash_scan_set (pat, insn, set_p)
1922      rtx pat, insn;
1923      int set_p;
1924 {
1925   rtx src = SET_SRC (pat);
1926   rtx dest = SET_DEST (pat);
1927
1928   if (GET_CODE (src) == CALL)
1929     hash_scan_call (src, insn);
1930
1931   if (GET_CODE (dest) == REG)
1932     {
1933       int regno = REGNO (dest);
1934       rtx tmp;
1935
1936       /* Only record sets of pseudo-regs in the hash table.  */
1937       if (! set_p
1938           && regno >= FIRST_PSEUDO_REGISTER
1939           /* Don't GCSE something if we can't do a reg/reg copy.  */
1940           && can_copy_p [GET_MODE (dest)]
1941           /* Is SET_SRC something we want to gcse?  */
1942           && want_to_gcse_p (src))
1943         {
1944           /* An expression is not anticipatable if its operands are
1945              modified before this insn.  */
1946           int antic_p = oprs_anticipatable_p (src, insn);
1947           /* An expression is not available if its operands are
1948              subsequently modified, including this insn.  */
1949           int avail_p = oprs_available_p (src, insn);
1950
1951           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1952         }
1953
1954       /* Record sets for constant/copy propagation.  */
1955       else if (set_p
1956                && regno >= FIRST_PSEUDO_REGISTER
1957                && ((GET_CODE (src) == REG
1958                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1959                     && can_copy_p [GET_MODE (dest)])
1960                    || GET_CODE (src) == CONST_INT
1961                    || GET_CODE (src) == SYMBOL_REF
1962                    || GET_CODE (src) == CONST_DOUBLE)
1963                /* A copy is not available if its src or dest is subsequently
1964                   modified.  Here we want to search from INSN+1 on, but
1965                   oprs_available_p searches from INSN on.  */
1966                && (insn == BLOCK_END (BLOCK_NUM (insn))
1967                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1968                        && oprs_available_p (pat, tmp))))
1969         insert_set_in_table (pat, insn);
1970     }
1971 }
1972
1973 static void
1974 hash_scan_clobber (x, insn)
1975      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1976 {
1977   /* Currently nothing to do.  */
1978 }
1979
1980 static void
1981 hash_scan_call (x, insn)
1982      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1983 {
1984   /* Currently nothing to do.  */
1985 }
1986
1987 /* Process INSN and add hash table entries as appropriate.
1988
1989    Only available expressions that set a single pseudo-reg are recorded.
1990
1991    Single sets in a PARALLEL could be handled, but it's an extra complication
1992    that isn't dealt with right now.  The trick is handling the CLOBBERs that
1993    are also in the PARALLEL.  Later.
1994
1995    If SET_P is non-zero, this is for the assignment hash table,
1996    otherwise it is for the expression hash table.
1997    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1998    not record any expressions.  */
1999
2000 static void
2001 hash_scan_insn (insn, set_p, in_libcall_block)
2002      rtx insn;
2003      int set_p;
2004      int in_libcall_block;
2005 {
2006   rtx pat = PATTERN (insn);
2007   int i;
2008
2009   /* Pick out the sets of INSN and for other forms of instructions record
2010      what's been modified.  */
2011
2012   if (GET_CODE (pat) == SET && ! in_libcall_block)
2013     {
2014       /* Ignore obvious no-ops.  */
2015       if (SET_SRC (pat) != SET_DEST (pat))
2016         hash_scan_set (pat, insn, set_p);
2017     }
2018   else if (GET_CODE (pat) == PARALLEL)
2019     for (i = 0; i < XVECLEN (pat, 0); i++)
2020       {
2021         rtx x = XVECEXP (pat, 0, i);
2022
2023         if (GET_CODE (x) == SET)
2024           {
2025             if (GET_CODE (SET_SRC (x)) == CALL)
2026               hash_scan_call (SET_SRC (x), insn);
2027           }
2028         else if (GET_CODE (x) == CLOBBER)
2029           hash_scan_clobber (x, insn);
2030         else if (GET_CODE (x) == CALL)
2031           hash_scan_call (x, insn);
2032       }
2033
2034   else if (GET_CODE (pat) == CLOBBER)
2035     hash_scan_clobber (pat, insn);
2036   else if (GET_CODE (pat) == CALL)
2037     hash_scan_call (pat, insn);
2038 }
2039
2040 static void
2041 dump_hash_table (file, name, table, table_size, total_size)
2042      FILE *file;
2043      const char *name;
2044      struct expr **table;
2045      int table_size, total_size;
2046 {
2047   int i;
2048   /* Flattened out table, so it's printed in proper order.  */
2049   struct expr **flat_table;
2050   unsigned int *hash_val;
2051   struct expr *expr;
2052
2053   flat_table
2054     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2055   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
2056
2057   for (i = 0; i < table_size; i++)
2058     for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2059       {
2060         flat_table[expr->bitmap_index] = expr;
2061         hash_val[expr->bitmap_index] = i;
2062       }
2063
2064   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2065            name, table_size, total_size);
2066
2067   for (i = 0; i < total_size; i++)
2068     if (flat_table[i] != 0)
2069       {
2070         expr = flat_table[i];
2071         fprintf (file, "Index %d (hash value %d)\n  ",
2072                  expr->bitmap_index, hash_val[i]);
2073         print_rtl (file, expr->expr);
2074         fprintf (file, "\n");
2075       }
2076
2077   fprintf (file, "\n");
2078
2079   free (flat_table);
2080   free (hash_val);
2081 }
2082
2083 /* Record register first/last/block set information for REGNO in INSN.
2084
2085    reg_first_set records the first place in the block where the register
2086    is set and is used to compute "anticipatability".
2087
2088    reg_last_set records the last place in the block where the register
2089    is set and is used to compute "availability".
2090
2091    reg_set_in_block records whether the register is set in the block
2092    and is used to compute "transparency".  */
2093
2094 static void
2095 record_last_reg_set_info (insn, regno)
2096      rtx insn;
2097      int regno;
2098 {
2099   if (reg_first_set[regno] == NEVER_SET)
2100     reg_first_set[regno] = INSN_CUID (insn);
2101
2102   reg_last_set[regno] = INSN_CUID (insn);
2103   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2104 }
2105
2106 /* Record memory first/last/block set information for INSN.  */
2107
2108 static void
2109 record_last_mem_set_info (insn)
2110      rtx insn;
2111 {
2112   if (mem_first_set == NEVER_SET)
2113     mem_first_set = INSN_CUID (insn);
2114
2115   mem_last_set = INSN_CUID (insn);
2116   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2117 }
2118
2119 /* Called from compute_hash_table via note_stores to handle one
2120    SET or CLOBBER in an insn.  DATA is really the instruction in which
2121    the SET is taking place.  */
2122
2123 static void
2124 record_last_set_info (dest, setter, data)
2125      rtx dest, setter ATTRIBUTE_UNUSED;
2126      void *data;
2127 {
2128   rtx last_set_insn = (rtx) data;
2129
2130   if (GET_CODE (dest) == SUBREG)
2131     dest = SUBREG_REG (dest);
2132
2133   if (GET_CODE (dest) == REG)
2134     record_last_reg_set_info (last_set_insn, REGNO (dest));
2135   else if (GET_CODE (dest) == MEM
2136            /* Ignore pushes, they clobber nothing.  */
2137            && ! push_operand (dest, GET_MODE (dest)))
2138     record_last_mem_set_info (last_set_insn);
2139 }
2140
2141 /* Top level function to create an expression or assignment hash table.
2142
2143    Expression entries are placed in the hash table if
2144    - they are of the form (set (pseudo-reg) src),
2145    - src is something we want to perform GCSE on,
2146    - none of the operands are subsequently modified in the block
2147
2148    Assignment entries are placed in the hash table if
2149    - they are of the form (set (pseudo-reg) src),
2150    - src is something we want to perform const/copy propagation on,
2151    - none of the operands or target are subsequently modified in the block
2152
2153    Currently src must be a pseudo-reg or a const_int.
2154
2155    F is the first insn.
2156    SET_P is non-zero for computing the assignment hash table.  */
2157
2158 static void
2159 compute_hash_table (set_p)
2160      int set_p;
2161 {
2162   int bb;
2163
2164   /* While we compute the hash table we also compute a bit array of which
2165      registers are set in which blocks.
2166      We also compute which blocks set memory, in the absence of aliasing
2167      support [which is TODO].
2168      ??? This isn't needed during const/copy propagation, but it's cheap to
2169      compute.  Later.  */
2170   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2171   memset ((char *) mem_set_in_block, 0, n_basic_blocks);
2172
2173   /* Some working arrays used to track first and last set in each block.  */
2174   /* ??? One could use alloca here, but at some size a threshold is crossed
2175      beyond which one should use malloc.  Are we at that threshold here?  */
2176   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2177   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2178
2179   for (bb = 0; bb < n_basic_blocks; bb++)
2180     {
2181       rtx insn;
2182       unsigned int regno;
2183       int in_libcall_block;
2184       unsigned int i;
2185
2186       /* First pass over the instructions records information used to
2187          determine when registers and memory are first and last set.
2188          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2189          could be moved to compute_sets since they currently don't change.  */
2190
2191       for (i = 0; i < max_gcse_regno; i++)
2192         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2193
2194       mem_first_set = NEVER_SET;
2195       mem_last_set = NEVER_SET;
2196
2197       for (insn = BLOCK_HEAD (bb);
2198            insn && insn != NEXT_INSN (BLOCK_END (bb));
2199            insn = NEXT_INSN (insn))
2200         {
2201 #ifdef NON_SAVING_SETJMP
2202           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2203               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2204             {
2205               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2206                 record_last_reg_set_info (insn, regno);
2207               continue;
2208             }
2209 #endif
2210
2211           if (! INSN_P (insn))
2212             continue;
2213
2214           if (GET_CODE (insn) == CALL_INSN)
2215             {
2216               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2217                 if ((call_used_regs[regno]
2218                      && regno != STACK_POINTER_REGNUM
2219 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2220                      && regno != HARD_FRAME_POINTER_REGNUM
2221 #endif
2222 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2223                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2224 #endif
2225 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2226                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2227 #endif
2228
2229                      && regno != FRAME_POINTER_REGNUM)
2230                     || global_regs[regno])
2231                   record_last_reg_set_info (insn, regno);
2232
2233               if (! CONST_CALL_P (insn))
2234                 record_last_mem_set_info (insn);
2235             }
2236
2237           note_stores (PATTERN (insn), record_last_set_info, insn);
2238         }
2239
2240       /* The next pass builds the hash table.  */
2241
2242       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2243            insn && insn != NEXT_INSN (BLOCK_END (bb));
2244            insn = NEXT_INSN (insn))
2245         if (INSN_P (insn))
2246           {
2247             if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2248               in_libcall_block = 1;
2249             else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2250               in_libcall_block = 0;
2251             hash_scan_insn (insn, set_p, in_libcall_block);
2252         }
2253     }
2254
2255   free (reg_first_set);
2256   free (reg_last_set);
2257
2258   /* Catch bugs early.  */
2259   reg_first_set = reg_last_set = 0;
2260 }
2261
2262 /* Allocate space for the set hash table.
2263    N_INSNS is the number of instructions in the function.
2264    It is used to determine the number of buckets to use.  */
2265
2266 static void
2267 alloc_set_hash_table (n_insns)
2268      int n_insns;
2269 {
2270   int n;
2271
2272   set_hash_table_size = n_insns / 4;
2273   if (set_hash_table_size < 11)
2274     set_hash_table_size = 11;
2275
2276   /* Attempt to maintain efficient use of hash table.
2277      Making it an odd number is simplest for now.
2278      ??? Later take some measurements.  */
2279   set_hash_table_size |= 1;
2280   n = set_hash_table_size * sizeof (struct expr *);
2281   set_hash_table = (struct expr **) gmalloc (n);
2282 }
2283
2284 /* Free things allocated by alloc_set_hash_table.  */
2285
2286 static void
2287 free_set_hash_table ()
2288 {
2289   free (set_hash_table);
2290 }
2291
2292 /* Compute the hash table for doing copy/const propagation.  */
2293
2294 static void
2295 compute_set_hash_table ()
2296 {
2297   /* Initialize count of number of entries in hash table.  */
2298   n_sets = 0;
2299   memset ((char *) set_hash_table, 0,
2300          set_hash_table_size * sizeof (struct expr *));
2301
2302   compute_hash_table (1);
2303 }
2304
2305 /* Allocate space for the expression hash table.
2306    N_INSNS is the number of instructions in the function.
2307    It is used to determine the number of buckets to use.  */
2308
2309 static void
2310 alloc_expr_hash_table (n_insns)
2311      unsigned int n_insns;
2312 {
2313   int n;
2314
2315   expr_hash_table_size = n_insns / 2;
2316   /* Make sure the amount is usable.  */
2317   if (expr_hash_table_size < 11)
2318     expr_hash_table_size = 11;
2319
2320   /* Attempt to maintain efficient use of hash table.
2321      Making it an odd number is simplest for now.
2322      ??? Later take some measurements.  */
2323   expr_hash_table_size |= 1;
2324   n = expr_hash_table_size * sizeof (struct expr *);
2325   expr_hash_table = (struct expr **) gmalloc (n);
2326 }
2327
2328 /* Free things allocated by alloc_expr_hash_table.  */
2329
2330 static void
2331 free_expr_hash_table ()
2332 {
2333   free (expr_hash_table);
2334 }
2335
2336 /* Compute the hash table for doing GCSE.  */
2337
2338 static void
2339 compute_expr_hash_table ()
2340 {
2341   /* Initialize count of number of entries in hash table.  */
2342   n_exprs = 0;
2343   memset ((char *) expr_hash_table, 0,
2344          expr_hash_table_size * sizeof (struct expr *));
2345
2346   compute_hash_table (0);
2347 }
2348 \f
2349 /* Expression tracking support.  */
2350
2351 /* Lookup pattern PAT in the expression table.
2352    The result is a pointer to the table entry, or NULL if not found.  */
2353
2354 static struct expr *
2355 lookup_expr (pat)
2356      rtx pat;
2357 {
2358   int do_not_record_p;
2359   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2360                                  expr_hash_table_size);
2361   struct expr *expr;
2362
2363   if (do_not_record_p)
2364     return NULL;
2365
2366   expr = expr_hash_table[hash];
2367
2368   while (expr && ! expr_equiv_p (expr->expr, pat))
2369     expr = expr->next_same_hash;
2370
2371   return expr;
2372 }
2373
2374 /* Lookup REGNO in the set table.  If PAT is non-NULL look for the entry that
2375    matches it, otherwise return the first entry for REGNO.  The result is a
2376    pointer to the table entry, or NULL if not found.  */
2377
2378 static struct expr *
2379 lookup_set (regno, pat)
2380      unsigned int regno;
2381      rtx pat;
2382 {
2383   unsigned int hash = hash_set (regno, set_hash_table_size);
2384   struct expr *expr;
2385
2386   expr = set_hash_table[hash];
2387
2388   if (pat)
2389     {
2390       while (expr && ! expr_equiv_p (expr->expr, pat))
2391         expr = expr->next_same_hash;
2392     }
2393   else
2394     {
2395       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2396         expr = expr->next_same_hash;
2397     }
2398
2399   return expr;
2400 }
2401
2402 /* Return the next entry for REGNO in list EXPR.  */
2403
2404 static struct expr *
2405 next_set (regno, expr)
2406      unsigned int regno;
2407      struct expr *expr;
2408 {
2409   do
2410     expr = expr->next_same_hash;
2411   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2412
2413   return expr;
2414 }
2415
2416 /* Reset tables used to keep track of what's still available [since the
2417    start of the block].  */
2418
2419 static void
2420 reset_opr_set_tables ()
2421 {
2422   /* Maintain a bitmap of which regs have been set since beginning of
2423      the block.  */
2424   sbitmap_zero (reg_set_bitmap);
2425
2426   /* Also keep a record of the last instruction to modify memory.
2427      For now this is very trivial, we only record whether any memory
2428      location has been modified.  */
2429   mem_last_set = 0;
2430 }
2431
2432 /* Return non-zero if the operands of X are not set before INSN in
2433    INSN's basic block.  */
2434
2435 static int
2436 oprs_not_set_p (x, insn)
2437      rtx x, insn;
2438 {
2439   int i, j;
2440   enum rtx_code code;
2441   const char *fmt;
2442
2443   if (x == 0)
2444     return 1;
2445
2446   code = GET_CODE (x);
2447   switch (code)
2448     {
2449     case PC:
2450     case CC0:
2451     case CONST:
2452     case CONST_INT:
2453     case CONST_DOUBLE:
2454     case SYMBOL_REF:
2455     case LABEL_REF:
2456     case ADDR_VEC:
2457     case ADDR_DIFF_VEC:
2458       return 1;
2459
2460     case MEM:
2461       if (mem_last_set != 0)
2462         return 0;
2463       else
2464         return oprs_not_set_p (XEXP (x, 0), insn);
2465
2466     case REG:
2467       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2468
2469     default:
2470       break;
2471     }
2472
2473   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2474     {
2475       if (fmt[i] == 'e')
2476         {
2477           /* If we are about to do the last recursive call
2478              needed at this level, change it into iteration.
2479              This function is called enough to be worth it.  */
2480           if (i == 0)
2481             return oprs_not_set_p (XEXP (x, i), insn);
2482
2483           if (! oprs_not_set_p (XEXP (x, i), insn))
2484             return 0;
2485         }
2486       else if (fmt[i] == 'E')
2487         for (j = 0; j < XVECLEN (x, i); j++)
2488           if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2489             return 0;
2490     }
2491
2492   return 1;
2493 }
2494
2495 /* Mark things set by a CALL.  */
2496
2497 static void
2498 mark_call (insn)
2499      rtx insn;
2500 {
2501   mem_last_set = INSN_CUID (insn);
2502 }
2503
2504 /* Mark things set by a SET.  */
2505
2506 static void
2507 mark_set (pat, insn)
2508      rtx pat, insn;
2509 {
2510   rtx dest = SET_DEST (pat);
2511
2512   while (GET_CODE (dest) == SUBREG
2513          || GET_CODE (dest) == ZERO_EXTRACT
2514          || GET_CODE (dest) == SIGN_EXTRACT
2515          || GET_CODE (dest) == STRICT_LOW_PART)
2516     dest = XEXP (dest, 0);
2517
2518   if (GET_CODE (dest) == REG)
2519     SET_BIT (reg_set_bitmap, REGNO (dest));
2520   else if (GET_CODE (dest) == MEM)
2521     mem_last_set = INSN_CUID (insn);
2522
2523   if (GET_CODE (SET_SRC (pat)) == CALL)
2524     mark_call (insn);
2525 }
2526
2527 /* Record things set by a CLOBBER.  */
2528
2529 static void
2530 mark_clobber (pat, insn)
2531      rtx pat, insn;
2532 {
2533   rtx clob = XEXP (pat, 0);
2534
2535   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2536     clob = XEXP (clob, 0);
2537
2538   if (GET_CODE (clob) == REG)
2539     SET_BIT (reg_set_bitmap, REGNO (clob));
2540   else
2541     mem_last_set = INSN_CUID (insn);
2542 }
2543
2544 /* Record things set by INSN.
2545    This data is used by oprs_not_set_p.  */
2546
2547 static void
2548 mark_oprs_set (insn)
2549      rtx insn;
2550 {
2551   rtx pat = PATTERN (insn);
2552   int i;
2553
2554   if (GET_CODE (pat) == SET)
2555     mark_set (pat, insn);
2556   else if (GET_CODE (pat) == PARALLEL)
2557     for (i = 0; i < XVECLEN (pat, 0); i++)
2558       {
2559         rtx x = XVECEXP (pat, 0, i);
2560
2561         if (GET_CODE (x) == SET)
2562           mark_set (x, insn);
2563         else if (GET_CODE (x) == CLOBBER)
2564           mark_clobber (x, insn);
2565         else if (GET_CODE (x) == CALL)
2566           mark_call (insn);
2567       }
2568
2569   else if (GET_CODE (pat) == CLOBBER)
2570     mark_clobber (pat, insn);
2571   else if (GET_CODE (pat) == CALL)
2572     mark_call (insn);
2573 }
2574
2575 \f
2576 /* Classic GCSE reaching definition support.  */
2577
2578 /* Allocate reaching def variables.  */
2579
2580 static void
2581 alloc_rd_mem (n_blocks, n_insns)
2582      int n_blocks, n_insns;
2583 {
2584   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2585   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2586
2587   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2588   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2589
2590   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2591   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2592
2593   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2594   sbitmap_vector_zero (rd_out, n_basic_blocks);
2595 }
2596
2597 /* Free reaching def variables.  */
2598
2599 static void
2600 free_rd_mem ()
2601 {
2602   free (rd_kill);
2603   free (rd_gen);
2604   free (reaching_defs);
2605   free (rd_out);
2606 }
2607
2608 /* Add INSN to the kills of BB.  REGNO, set in BB, is killed by INSN.  */
2609
2610 static void
2611 handle_rd_kill_set (insn, regno, bb)
2612      rtx insn;
2613      int regno, bb;
2614 {
2615   struct reg_set *this_reg;
2616
2617   for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2618     if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2619       SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2620 }
2621
2622 /* Compute the set of kill's for reaching definitions.  */
2623
2624 static void
2625 compute_kill_rd ()
2626 {
2627   int bb, cuid;
2628   int regno, i;
2629
2630   /* For each block
2631        For each set bit in `gen' of the block (i.e each insn which
2632            generates a definition in the block)
2633          Call the reg set by the insn corresponding to that bit regx
2634          Look at the linked list starting at reg_set_table[regx]
2635          For each setting of regx in the linked list, which is not in
2636              this block
2637            Set the bit in `kill' corresponding to that insn.   */
2638   for (bb = 0; bb < n_basic_blocks; bb++)
2639     for (cuid = 0; cuid < max_cuid; cuid++)
2640       if (TEST_BIT (rd_gen[bb], cuid))
2641         {
2642           rtx insn = CUID_INSN (cuid);
2643           rtx pat = PATTERN (insn);
2644
2645           if (GET_CODE (insn) == CALL_INSN)
2646             {
2647               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2648                 {
2649                   if ((call_used_regs[regno]
2650                        && regno != STACK_POINTER_REGNUM
2651 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2652                        && regno != HARD_FRAME_POINTER_REGNUM
2653 #endif
2654 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2655                        && ! (regno == ARG_POINTER_REGNUM
2656                              && fixed_regs[regno])
2657 #endif
2658 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2659                        && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2660 #endif
2661                        && regno != FRAME_POINTER_REGNUM)
2662                       || global_regs[regno])
2663                     handle_rd_kill_set (insn, regno, bb);
2664                 }
2665             }
2666
2667           if (GET_CODE (pat) == PARALLEL)
2668             {
2669               for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2670                 {
2671                   enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2672
2673                   if ((code == SET || code == CLOBBER)
2674                       && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2675                     handle_rd_kill_set (insn,
2676                                         REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2677                                         bb);
2678                 }
2679             }
2680           else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2681             /* Each setting of this register outside of this block
2682                must be marked in the set of kills in this block.  */
2683             handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2684         }
2685 }
2686
2687 /* Compute the reaching definitions as in
2688    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2689    Chapter 10.  It is the same algorithm as used for computing available
2690    expressions but applied to the gens and kills of reaching definitions.  */
2691
2692 static void
2693 compute_rd ()
2694 {
2695   int bb, changed, passes;
2696
2697   for (bb = 0; bb < n_basic_blocks; bb++)
2698     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2699
2700   passes = 0;
2701   changed = 1;
2702   while (changed)
2703     {
2704       changed = 0;
2705       for (bb = 0; bb < n_basic_blocks; bb++)
2706         {
2707           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2708           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2709                                             reaching_defs[bb], rd_kill[bb]);
2710         }
2711       passes++;
2712     }
2713
2714   if (gcse_file)
2715     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2716 }
2717 \f
2718 /* Classic GCSE available expression support.  */
2719
2720 /* Allocate memory for available expression computation.  */
2721
2722 static void
2723 alloc_avail_expr_mem (n_blocks, n_exprs)
2724      int n_blocks, n_exprs;
2725 {
2726   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2727   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2728
2729   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2730   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2731
2732   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2733   sbitmap_vector_zero (ae_in, n_basic_blocks);
2734
2735   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2736   sbitmap_vector_zero (ae_out, n_basic_blocks);
2737 }
2738
2739 static void
2740 free_avail_expr_mem ()
2741 {
2742   free (ae_kill);
2743   free (ae_gen);
2744   free (ae_in);
2745   free (ae_out);
2746 }
2747
2748 /* Compute the set of available expressions generated in each basic block.  */
2749
2750 static void
2751 compute_ae_gen ()
2752 {
2753   unsigned int i;
2754   struct expr *expr;
2755   struct occr *occr;
2756
2757   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2758      This is all we have to do because an expression is not recorded if it
2759      is not available, and the only expressions we want to work with are the
2760      ones that are recorded.  */
2761   for (i = 0; i < expr_hash_table_size; i++)
2762     for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2763       for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2764         SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2765 }
2766
2767 /* Return non-zero if expression X is killed in BB.  */
2768
2769 static int
2770 expr_killed_p (x, bb)
2771      rtx x;
2772      int bb;
2773 {
2774   int i, j;
2775   enum rtx_code code;
2776   const char *fmt;
2777
2778   if (x == 0)
2779     return 1;
2780
2781   code = GET_CODE (x);
2782   switch (code)
2783     {
2784     case REG:
2785       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2786
2787     case MEM:
2788       if (mem_set_in_block[bb])
2789         return 1;
2790       else
2791         return expr_killed_p (XEXP (x, 0), bb);
2792
2793     case PC:
2794     case CC0: /*FIXME*/
2795     case CONST:
2796     case CONST_INT:
2797     case CONST_DOUBLE:
2798     case SYMBOL_REF:
2799     case LABEL_REF:
2800     case ADDR_VEC:
2801     case ADDR_DIFF_VEC:
2802       return 0;
2803
2804     default:
2805       break;
2806     }
2807
2808   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2809     {
2810       if (fmt[i] == 'e')
2811         {
2812           /* If we are about to do the last recursive call
2813              needed at this level, change it into iteration.
2814              This function is called enough to be worth it.  */
2815           if (i == 0)
2816             return expr_killed_p (XEXP (x, i), bb);
2817           else if (expr_killed_p (XEXP (x, i), bb))
2818             return 1;
2819         }
2820       else if (fmt[i] == 'E')
2821         for (j = 0; j < XVECLEN (x, i); j++)
2822           if (expr_killed_p (XVECEXP (x, i, j), bb))
2823             return 1;
2824     }
2825
2826   return 0;
2827 }
2828
2829 /* Compute the set of available expressions killed in each basic block.  */
2830
2831 static void
2832 compute_ae_kill (ae_gen, ae_kill)
2833      sbitmap *ae_gen, *ae_kill;
2834 {
2835   int bb;
2836   unsigned int i;
2837   struct expr *expr;
2838
2839   for (bb = 0; bb < n_basic_blocks; bb++)
2840     for (i = 0; i < expr_hash_table_size; i++)
2841       for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
2842         {
2843           /* Skip EXPR if generated in this block.  */
2844           if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2845             continue;
2846
2847           if (expr_killed_p (expr->expr, bb))
2848             SET_BIT (ae_kill[bb], expr->bitmap_index);
2849         }
2850 }
2851 \f
2852 /* Actually perform the Classic GCSE optimizations.  */
2853
2854 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2855
2856    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2857    as a positive reach.  We want to do this when there are two computations
2858    of the expression in the block.
2859
2860    VISITED is a pointer to a working buffer for tracking which BB's have
2861    been visited.  It is NULL for the top-level call.
2862
2863    We treat reaching expressions that go through blocks containing the same
2864    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2865    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2866    2 as not reaching.  The intent is to improve the probability of finding
2867    only one reaching expression and to reduce register lifetimes by picking
2868    the closest such expression.  */
2869
2870 static int
2871 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2872      struct occr *occr;
2873      struct expr *expr;
2874      int bb;
2875      int check_self_loop;
2876      char *visited;
2877 {
2878   edge pred;
2879
2880   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2881     {
2882       int pred_bb = pred->src->index;
2883
2884       if (visited[pred_bb])
2885         /* This predecessor has already been visited. Nothing to do.  */
2886           ;
2887       else if (pred_bb == bb)
2888         {
2889           /* BB loops on itself.  */
2890           if (check_self_loop
2891               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2892               && BLOCK_NUM (occr->insn) == pred_bb)
2893             return 1;
2894
2895           visited[pred_bb] = 1;
2896         }
2897
2898       /* Ignore this predecessor if it kills the expression.  */
2899       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2900         visited[pred_bb] = 1;
2901
2902       /* Does this predecessor generate this expression?  */
2903       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2904         {
2905           /* Is this the occurrence we're looking for?
2906              Note that there's only one generating occurrence per block
2907              so we just need to check the block number.  */
2908           if (BLOCK_NUM (occr->insn) == pred_bb)
2909             return 1;
2910
2911           visited[pred_bb] = 1;
2912         }
2913
2914       /* Neither gen nor kill.  */
2915       else
2916         {
2917           visited[pred_bb] = 1;
2918           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2919               visited))
2920
2921             return 1;
2922         }
2923     }
2924
2925   /* All paths have been checked.  */
2926   return 0;
2927 }
2928
2929 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2930    memory allocated for that function is returned. */
2931
2932 static int
2933 expr_reaches_here_p (occr, expr, bb, check_self_loop)
2934      struct occr *occr;
2935      struct expr *expr;
2936      int bb;
2937      int check_self_loop;
2938 {
2939   int rval;
2940   char *visited = (char *) xcalloc (n_basic_blocks, 1);
2941
2942   rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
2943
2944   free (visited);
2945   return rval;
2946 }
2947
2948 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2949    If there is more than one such instruction, return NULL.
2950
2951    Called only by handle_avail_expr.  */
2952
2953 static rtx
2954 computing_insn (expr, insn)
2955      struct expr *expr;
2956      rtx insn;
2957 {
2958   int bb = BLOCK_NUM (insn);
2959
2960   if (expr->avail_occr->next == NULL)
2961     {
2962       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2963         /* The available expression is actually itself
2964            (i.e. a loop in the flow graph) so do nothing.  */
2965         return NULL;
2966
2967       /* (FIXME) Case that we found a pattern that was created by
2968          a substitution that took place.  */
2969       return expr->avail_occr->insn;
2970     }
2971   else
2972     {
2973       /* Pattern is computed more than once.
2974          Search backwards from this insn to see how many of these
2975          computations actually reach this insn.  */
2976       struct occr *occr;
2977       rtx insn_computes_expr = NULL;
2978       int can_reach = 0;
2979
2980       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2981         {
2982           if (BLOCK_NUM (occr->insn) == bb)
2983             {
2984               /* The expression is generated in this block.
2985                  The only time we care about this is when the expression
2986                  is generated later in the block [and thus there's a loop].
2987                  We let the normal cse pass handle the other cases.  */
2988               if (INSN_CUID (insn) < INSN_CUID (occr->insn)
2989                   && expr_reaches_here_p (occr, expr, bb, 1))
2990                 {
2991                   can_reach++;
2992                   if (can_reach > 1)
2993                     return NULL;
2994
2995                   insn_computes_expr = occr->insn;
2996                 }
2997             }
2998           else if (expr_reaches_here_p (occr, expr, bb, 0))
2999             {
3000               can_reach++;
3001               if (can_reach > 1)
3002                 return NULL;
3003
3004               insn_computes_expr = occr->insn;
3005             }
3006         }
3007
3008       if (insn_computes_expr == NULL)
3009         abort ();
3010
3011       return insn_computes_expr;
3012     }
3013 }
3014
3015 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3016    Only called by can_disregard_other_sets.  */
3017
3018 static int
3019 def_reaches_here_p (insn, def_insn)
3020      rtx insn, def_insn;
3021 {
3022   rtx reg;
3023
3024   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3025     return 1;
3026
3027   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3028     {
3029       if (INSN_CUID (def_insn) < INSN_CUID (insn))
3030         {
3031           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3032             return 1;
3033           else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3034             reg = XEXP (PATTERN (def_insn), 0);
3035           else if (GET_CODE (PATTERN (def_insn)) == SET)
3036             reg = SET_DEST (PATTERN (def_insn));
3037           else
3038             abort ();
3039
3040           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3041         }
3042       else
3043         return 0;
3044     }
3045
3046   return 0;
3047 }
3048
3049 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.  The
3050    value returned is the number of definitions that reach INSN.  Returning a
3051    value of zero means that [maybe] more than one definition reaches INSN and
3052    the caller can't perform whatever optimization it is trying.  i.e. it is
3053    always safe to return zero.  */
3054
3055 static int
3056 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3057      struct reg_set **addr_this_reg;
3058      rtx insn;
3059      int for_combine;
3060 {
3061   int number_of_reaching_defs = 0;
3062   struct reg_set *this_reg;
3063
3064   for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3065     if (def_reaches_here_p (insn, this_reg->insn))
3066       {
3067         number_of_reaching_defs++;
3068         /* Ignore parallels for now.  */
3069         if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3070           return 0;
3071
3072         if (!for_combine
3073             && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3074                 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3075                                   SET_SRC (PATTERN (insn)))))
3076           /* A setting of the reg to a different value reaches INSN.  */
3077           return 0;
3078
3079         if (number_of_reaching_defs > 1)
3080           {
3081             /* If in this setting the value the register is being set to is
3082                equal to the previous value the register was set to and this
3083                setting reaches the insn we are trying to do the substitution
3084                on then we are ok.  */
3085             if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3086               return 0;
3087             else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3088                                     SET_SRC (PATTERN (insn))))
3089               return 0;
3090           }
3091
3092         *addr_this_reg = this_reg;
3093       }
3094
3095   return number_of_reaching_defs;
3096 }
3097
3098 /* Expression computed by insn is available and the substitution is legal,
3099    so try to perform the substitution.
3100
3101    The result is non-zero if any changes were made.  */
3102
3103 static int
3104 handle_avail_expr (insn, expr)
3105      rtx insn;
3106      struct expr *expr;
3107 {
3108   rtx pat, insn_computes_expr;
3109   rtx to;
3110   struct reg_set *this_reg;
3111   int found_setting, use_src;
3112   int changed = 0;
3113
3114   /* We only handle the case where one computation of the expression
3115      reaches this instruction.  */
3116   insn_computes_expr = computing_insn (expr, insn);
3117   if (insn_computes_expr == NULL)
3118     return 0;
3119
3120   found_setting = 0;
3121   use_src = 0;
3122
3123   /* At this point we know only one computation of EXPR outside of this
3124      block reaches this insn.  Now try to find a register that the
3125      expression is computed into.  */
3126   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3127     {
3128       /* This is the case when the available expression that reaches
3129          here has already been handled as an available expression.  */
3130       unsigned int regnum_for_replacing
3131         = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3132
3133       /* If the register was created by GCSE we can't use `reg_set_table',
3134          however we know it's set only once.  */
3135       if (regnum_for_replacing >= max_gcse_regno
3136           /* If the register the expression is computed into is set only once,
3137              or only one set reaches this insn, we can use it.  */
3138           || (((this_reg = reg_set_table[regnum_for_replacing]),
3139                this_reg->next == NULL)
3140               || can_disregard_other_sets (&this_reg, insn, 0)))
3141        {
3142          use_src = 1;
3143          found_setting = 1;
3144        }
3145     }
3146
3147   if (!found_setting)
3148     {
3149       unsigned int regnum_for_replacing
3150         = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3151
3152       /* This shouldn't happen.  */
3153       if (regnum_for_replacing >= max_gcse_regno)
3154         abort ();
3155
3156       this_reg = reg_set_table[regnum_for_replacing];
3157
3158       /* If the register the expression is computed into is set only once,
3159          or only one set reaches this insn, use it.  */
3160       if (this_reg->next == NULL
3161           || can_disregard_other_sets (&this_reg, insn, 0))
3162         found_setting = 1;
3163     }
3164
3165   if (found_setting)
3166     {
3167       pat = PATTERN (insn);
3168       if (use_src)
3169         to = SET_SRC (PATTERN (insn_computes_expr));
3170       else
3171         to = SET_DEST (PATTERN (insn_computes_expr));
3172       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3173
3174       /* We should be able to ignore the return code from validate_change but
3175          to play it safe we check.  */
3176       if (changed)
3177         {
3178           gcse_subst_count++;
3179           if (gcse_file != NULL)
3180             {
3181               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3182                        INSN_UID (insn));
3183               fprintf (gcse_file, " reg %d %s insn %d\n",
3184                        REGNO (to), use_src ? "from" : "set in",
3185                        INSN_UID (insn_computes_expr));
3186             }
3187         }
3188     }
3189
3190   /* The register that the expr is computed into is set more than once.  */
3191   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3192     {
3193       /* Insert an insn after insnx that copies the reg set in insnx
3194          into a new pseudo register call this new register REGN.
3195          From insnb until end of basic block or until REGB is set
3196          replace all uses of REGB with REGN.  */
3197       rtx new_insn;
3198
3199       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3200
3201       /* Generate the new insn.  */
3202       /* ??? If the change fails, we return 0, even though we created
3203          an insn.  I think this is ok.  */
3204       new_insn
3205         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3206                                         SET_DEST (PATTERN
3207                                                   (insn_computes_expr))),
3208                            insn_computes_expr);
3209
3210       /* Keep block number table up to date.  */
3211       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3212
3213       /* Keep register set table up to date.  */
3214       record_one_set (REGNO (to), new_insn);
3215
3216       gcse_create_count++;
3217       if (gcse_file != NULL)
3218         {
3219           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3220                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3221                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3222           fprintf (gcse_file, ", computed in insn %d,\n",
3223                    INSN_UID (insn_computes_expr));
3224           fprintf (gcse_file, "      into newly allocated reg %d\n",
3225                    REGNO (to));
3226         }
3227
3228       pat = PATTERN (insn);
3229
3230       /* Do register replacement for INSN.  */
3231       changed = validate_change (insn, &SET_SRC (pat),
3232                                  SET_DEST (PATTERN
3233                                            (NEXT_INSN (insn_computes_expr))),
3234                                  0);
3235
3236       /* We should be able to ignore the return code from validate_change but
3237          to play it safe we check.  */
3238       if (changed)
3239         {
3240           gcse_subst_count++;
3241           if (gcse_file != NULL)
3242             {
3243               fprintf (gcse_file,
3244                        "GCSE: Replacing the source in insn %d with reg %d ",
3245                        INSN_UID (insn),
3246                        REGNO (SET_DEST (PATTERN (NEXT_INSN
3247                                                  (insn_computes_expr)))));
3248               fprintf (gcse_file, "set in insn %d\n",
3249                        INSN_UID (insn_computes_expr));
3250             }
3251         }
3252     }
3253
3254   return changed;
3255 }
3256
3257 /* Perform classic GCSE.  This is called by one_classic_gcse_pass after all
3258    the dataflow analysis has been done.
3259
3260    The result is non-zero if a change was made.  */
3261
3262 static int
3263 classic_gcse ()
3264 {
3265   int bb, changed;
3266   rtx insn;
3267
3268   /* Note we start at block 1.  */
3269
3270   changed = 0;
3271   for (bb = 1; bb < n_basic_blocks; bb++)
3272     {
3273       /* Reset tables used to keep track of what's still valid [since the
3274          start of the block].  */
3275       reset_opr_set_tables ();
3276
3277       for (insn = BLOCK_HEAD (bb);
3278            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3279            insn = NEXT_INSN (insn))
3280         {
3281           /* Is insn of form (set (pseudo-reg) ...)?  */
3282           if (GET_CODE (insn) == INSN
3283               && GET_CODE (PATTERN (insn)) == SET
3284               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3285               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3286             {
3287               rtx pat = PATTERN (insn);
3288               rtx src = SET_SRC (pat);
3289               struct expr *expr;
3290
3291               if (want_to_gcse_p (src)
3292                   /* Is the expression recorded?  */
3293                   && ((expr = lookup_expr (src)) != NULL)
3294                   /* Is the expression available [at the start of the
3295                      block]?  */
3296                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3297                   /* Are the operands unchanged since the start of the
3298                      block?  */
3299                   && oprs_not_set_p (src, insn))
3300                 changed |= handle_avail_expr (insn, expr);
3301             }
3302
3303           /* Keep track of everything modified by this insn.  */
3304           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3305           if (INSN_P (insn))
3306             mark_oprs_set (insn);
3307         }
3308     }
3309
3310   return changed;
3311 }
3312
3313 /* Top level routine to perform one classic GCSE pass.
3314
3315    Return non-zero if a change was made.  */
3316
3317 static int
3318 one_classic_gcse_pass (pass)
3319      int pass;
3320 {
3321   int changed = 0;
3322
3323   gcse_subst_count = 0;
3324   gcse_create_count = 0;
3325
3326   alloc_expr_hash_table (max_cuid);
3327   alloc_rd_mem (n_basic_blocks, max_cuid);
3328   compute_expr_hash_table ();
3329   if (gcse_file)
3330     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3331                      expr_hash_table_size, n_exprs);
3332
3333   if (n_exprs > 0)
3334     {
3335       compute_kill_rd ();
3336       compute_rd ();
3337       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3338       compute_ae_gen ();
3339       compute_ae_kill (ae_gen, ae_kill);
3340       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3341       changed = classic_gcse ();
3342       free_avail_expr_mem ();
3343     }
3344
3345   free_rd_mem ();
3346   free_expr_hash_table ();
3347
3348   if (gcse_file)
3349     {
3350       fprintf (gcse_file, "\n");
3351       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3352                current_function_name, pass, bytes_used, gcse_subst_count);
3353       fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3354     }
3355
3356   return changed;
3357 }
3358 \f
3359 /* Compute copy/constant propagation working variables.  */
3360
3361 /* Local properties of assignments.  */
3362 static sbitmap *cprop_pavloc;
3363 static sbitmap *cprop_absaltered;
3364
3365 /* Global properties of assignments (computed from the local properties).  */
3366 static sbitmap *cprop_avin;
3367 static sbitmap *cprop_avout;
3368
3369 /* Allocate vars used for copy/const propagation.  N_BLOCKS is the number of
3370    basic blocks.  N_SETS is the number of sets.  */
3371
3372 static void
3373 alloc_cprop_mem (n_blocks, n_sets)
3374      int n_blocks, n_sets;
3375 {
3376   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3377   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3378
3379   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3380   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3381 }
3382
3383 /* Free vars used by copy/const propagation.  */
3384
3385 static void
3386 free_cprop_mem ()
3387 {
3388   free (cprop_pavloc);
3389   free (cprop_absaltered);
3390   free (cprop_avin);
3391   free (cprop_avout);
3392 }
3393
3394 /* For each block, compute whether X is transparent.  X is either an
3395    expression or an assignment [though we don't care which, for this context
3396    an assignment is treated as an expression].  For each block where an
3397    element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3398    bit in BMAP.  */
3399
3400 static void
3401 compute_transp (x, indx, bmap, set_p)
3402      rtx x;
3403      int indx;
3404      sbitmap *bmap;
3405      int set_p;
3406 {
3407   int bb, i, j;
3408   enum rtx_code code;
3409   reg_set *r;
3410   const char *fmt;
3411
3412   /* repeat is used to turn tail-recursion into iteration since GCC
3413      can't do it when there's no return value.  */
3414  repeat:
3415
3416   if (x == 0)
3417     return;
3418
3419   code = GET_CODE (x);
3420   switch (code)
3421     {
3422     case REG:
3423       if (set_p)
3424         {
3425           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3426             {
3427               for (bb = 0; bb < n_basic_blocks; bb++)
3428                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3429                   SET_BIT (bmap[bb], indx);
3430             }
3431           else
3432             {
3433               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3434                 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3435             }
3436         }
3437       else
3438         {
3439           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3440             {
3441               for (bb = 0; bb < n_basic_blocks; bb++)
3442                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3443                   RESET_BIT (bmap[bb], indx);
3444             }
3445           else
3446             {
3447               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3448                 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3449             }
3450         }
3451
3452       return;
3453
3454     case MEM:
3455       if (set_p)
3456         {
3457           for (bb = 0; bb < n_basic_blocks; bb++)
3458             if (mem_set_in_block[bb])
3459               SET_BIT (bmap[bb], indx);
3460         }
3461       else
3462         {
3463           for (bb = 0; bb < n_basic_blocks; bb++)
3464             if (mem_set_in_block[bb])
3465               RESET_BIT (bmap[bb], indx);
3466         }
3467
3468       x = XEXP (x, 0);
3469       goto repeat;
3470
3471     case PC:
3472     case CC0: /*FIXME*/
3473     case CONST:
3474     case CONST_INT:
3475     case CONST_DOUBLE:
3476     case SYMBOL_REF:
3477     case LABEL_REF:
3478     case ADDR_VEC:
3479     case ADDR_DIFF_VEC:
3480       return;
3481
3482     default:
3483       break;
3484     }
3485
3486   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3487     {
3488       if (fmt[i] == 'e')
3489         {
3490           /* If we are about to do the last recursive call
3491              needed at this level, change it into iteration.
3492              This function is called enough to be worth it.  */
3493           if (i == 0)
3494             {
3495               x = XEXP (x, i);
3496               goto repeat;
3497             }
3498
3499           compute_transp (XEXP (x, i), indx, bmap, set_p);
3500         }
3501       else if (fmt[i] == 'E')
3502         for (j = 0; j < XVECLEN (x, i); j++)
3503           compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3504     }
3505 }
3506
3507 /* Top level routine to do the dataflow analysis needed by copy/const
3508    propagation.  */
3509
3510 static void
3511 compute_cprop_data ()
3512 {
3513   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3514   compute_available (cprop_pavloc, cprop_absaltered,
3515                      cprop_avout, cprop_avin);
3516 }
3517 \f
3518 /* Copy/constant propagation.  */
3519
3520 /* Maximum number of register uses in an insn that we handle.  */
3521 #define MAX_USES 8
3522
3523 /* Table of uses found in an insn.
3524    Allocated statically to avoid alloc/free complexity and overhead.  */
3525 static struct reg_use reg_use_table[MAX_USES];
3526
3527 /* Index into `reg_use_table' while building it.  */
3528 static int reg_use_count;
3529
3530 /* Set up a list of register numbers used in INSN.  The found uses are stored
3531    in `reg_use_table'.  `reg_use_count' is initialized to zero before entry,
3532    and contains the number of uses in the table upon exit.
3533
3534    ??? If a register appears multiple times we will record it multiple times.
3535    This doesn't hurt anything but it will slow things down.  */
3536
3537 static void
3538 find_used_regs (x)
3539      rtx x;
3540 {
3541   int i, j;
3542   enum rtx_code code;
3543   const char *fmt;
3544
3545   /* repeat is used to turn tail-recursion into iteration since GCC
3546      can't do it when there's no return value.  */
3547  repeat:
3548
3549   if (x == 0)
3550     return;
3551
3552   code = GET_CODE (x);
3553   switch (code)
3554     {
3555     case REG:
3556       if (reg_use_count == MAX_USES)
3557         return;
3558
3559       reg_use_table[reg_use_count].reg_rtx = x;
3560       reg_use_count++;
3561       return;
3562
3563     case MEM:
3564       x = XEXP (x, 0);
3565       goto repeat;
3566
3567     case PC:
3568     case CC0:
3569     case CONST:
3570     case CONST_INT:
3571     case CONST_DOUBLE:
3572     case SYMBOL_REF:
3573     case LABEL_REF:
3574     case CLOBBER:
3575     case ADDR_VEC:
3576     case ADDR_DIFF_VEC:
3577     case ASM_INPUT: /*FIXME*/
3578       return;
3579
3580     case SET:
3581       if (GET_CODE (SET_DEST (x)) == MEM)
3582         find_used_regs (SET_DEST (x));
3583       x = SET_SRC (x);
3584       goto repeat;
3585
3586     default:
3587       break;
3588     }
3589
3590   /* Recursively scan the operands of this expression.  */
3591
3592   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3593     {
3594       if (fmt[i] == 'e')
3595         {
3596           /* If we are about to do the last recursive call
3597              needed at this level, change it into iteration.
3598              This function is called enough to be worth it.  */
3599           if (i == 0)
3600             {
3601               x = XEXP (x, 0);
3602               goto repeat;
3603             }
3604
3605           find_used_regs (XEXP (x, i));
3606         }
3607       else if (fmt[i] == 'E')
3608         for (j = 0; j < XVECLEN (x, i); j++)
3609           find_used_regs (XVECEXP (x, i, j));
3610     }
3611 }
3612
3613 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3614    Returns non-zero is successful.  */
3615
3616 static int
3617 try_replace_reg (from, to, insn)
3618      rtx from, to, insn;
3619 {
3620   rtx note;
3621   rtx src;
3622   int success;
3623   rtx set;
3624
3625   note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3626
3627   if (!note)
3628     note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3629
3630   /* If this fails we could try to simplify the result of the
3631      replacement and attempt to recognize the simplified insn.
3632
3633      But we need a general simplify_rtx that doesn't have pass
3634      specific state variables.  I'm not aware of one at the moment.  */
3635
3636   success = validate_replace_src (from, to, insn);
3637   set = single_set (insn);
3638
3639   /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3640      information.  */
3641   if (!success && !note)
3642     {
3643       if (!set)
3644         return 0;
3645
3646       note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3647                                                    copy_rtx (SET_SRC (set)),
3648                                                    REG_NOTES (insn));
3649     }
3650
3651   /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes.  Also
3652      try to simplify them.  */
3653   if (note)
3654     {
3655       rtx simplified;
3656
3657       if (!validate_replace_rtx_subexp (from, to, insn, &XEXP (note, 0)))
3658         abort();
3659
3660       src = XEXP (note, 0);
3661
3662       /* Try to simplify resulting note. */
3663       simplified = simplify_rtx (src);
3664       if (simplified)
3665         {
3666           src = simplified;
3667           XEXP (note, 0) = src;
3668         }
3669
3670       /* REG_EQUAL may get simplified into register.
3671          We don't allow that. Remove that note. This code ought
3672          not to hapen, because previous code ought to syntetize
3673          reg-reg move, but be on the safe side.  */
3674       else if (REG_P (src))
3675         remove_note (insn, note);
3676     }
3677   return success;
3678 }
3679
3680 /* Find a set of REGNOs that are available on entry to INSN's block.  Returns
3681    NULL no such set is found.  */
3682
3683 static struct expr *
3684 find_avail_set (regno, insn)
3685      int regno;
3686      rtx insn;
3687 {
3688   /* SET1 contains the last set found that can be returned to the caller for
3689      use in a substitution.  */
3690   struct expr *set1 = 0;
3691
3692   /* Loops are not possible here.  To get a loop we would need two sets
3693      available at the start of the block containing INSN.  ie we would
3694      need two sets like this available at the start of the block:
3695
3696        (set (reg X) (reg Y))
3697        (set (reg Y) (reg X))
3698
3699      This can not happen since the set of (reg Y) would have killed the
3700      set of (reg X) making it unavailable at the start of this block.  */
3701   while (1)
3702      {
3703       rtx src;
3704       struct expr *set = lookup_set (regno, NULL_RTX);
3705
3706       /* Find a set that is available at the start of the block
3707          which contains INSN.  */
3708       while (set)
3709         {
3710           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3711             break;
3712           set = next_set (regno, set);
3713         }
3714
3715       /* If no available set was found we've reached the end of the
3716          (possibly empty) copy chain.  */
3717       if (set == 0)
3718         break;
3719
3720       if (GET_CODE (set->expr) != SET)
3721         abort ();
3722
3723       src = SET_SRC (set->expr);
3724
3725       /* We know the set is available.
3726          Now check that SRC is ANTLOC (i.e. none of the source operands
3727          have changed since the start of the block).
3728
3729          If the source operand changed, we may still use it for the next
3730          iteration of this loop, but we may not use it for substitutions.  */
3731
3732       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3733         set1 = set;
3734
3735       /* If the source of the set is anything except a register, then
3736          we have reached the end of the copy chain.  */
3737       if (GET_CODE (src) != REG)
3738         break;
3739
3740       /* Follow the copy chain, ie start another iteration of the loop
3741          and see if we have an available copy into SRC.  */
3742       regno = REGNO (src);
3743      }
3744
3745   /* SET1 holds the last set that was available and anticipatable at
3746      INSN.  */
3747   return set1;
3748 }
3749
3750 /* Subroutine of cprop_insn that tries to propagate constants into
3751    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3752    that we can use for substitutions.
3753    REG_USED is the use we will try to replace, SRC is the constant we
3754    will try to substitute for it.
3755    Returns nonzero if a change was made.  */
3756
3757 static int
3758 cprop_jump (insn, copy, reg_used, src)
3759      rtx insn, copy;
3760      struct reg_use *reg_used;
3761      rtx src;
3762 {
3763   rtx set = PATTERN (copy);
3764   rtx temp;
3765
3766   /* Replace the register with the appropriate constant.  */
3767   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3768
3769   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3770                                      GET_MODE (SET_SRC (set)),
3771                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3772                                      XEXP (SET_SRC (set), 0),
3773                                      XEXP (SET_SRC (set), 1),
3774                                      XEXP (SET_SRC (set), 2));
3775
3776   /* If no simplification can be made, then try the next
3777      register.  */
3778   if (temp == 0)
3779     return 0;
3780
3781   SET_SRC (set) = temp;
3782
3783   /* That may have changed the structure of TEMP, so
3784      force it to be rerecognized if it has not turned
3785      into a nop or unconditional jump.  */
3786
3787   INSN_CODE (copy) = -1;
3788   if ((SET_DEST (set) == pc_rtx
3789        && (SET_SRC (set) == pc_rtx
3790            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3791       || recog (PATTERN (copy), copy, NULL) >= 0)
3792     {
3793       /* This has either become an unconditional jump
3794          or a nop-jump.  We'd like to delete nop jumps
3795          here, but doing so confuses gcse.  So we just
3796          make the replacement and let later passes
3797          sort things out.  */
3798       PATTERN (insn) = set;
3799       INSN_CODE (insn) = -1;
3800
3801       /* One less use of the label this insn used to jump to
3802          if we turned this into a NOP jump.  */
3803       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3804         --LABEL_NUSES (JUMP_LABEL (insn));
3805
3806       /* If this has turned into an unconditional jump,
3807          then put a barrier after it so that the unreachable
3808          code will be deleted.  */
3809       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3810         emit_barrier_after (insn);
3811
3812       run_jump_opt_after_gcse = 1;
3813
3814       const_prop_count++;
3815       if (gcse_file != NULL)
3816         {
3817           fprintf (gcse_file,
3818                    "CONST-PROP: Replacing reg %d in insn %d with constant ",
3819                    REGNO (reg_used->reg_rtx), INSN_UID (insn));
3820           print_rtl (gcse_file, src);
3821           fprintf (gcse_file, "\n");
3822         }
3823
3824       return 1;
3825     }
3826   return 0;
3827 }
3828
3829 #ifdef HAVE_cc0
3830
3831 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3832    for machines that have CC0.  INSN is a single set that stores into CC0;
3833    the insn following it is a conditional jump.  REG_USED is the use we will
3834    try to replace, SRC is the constant we will try to substitute for it.
3835    Returns nonzero if a change was made.  */
3836
3837 static int
3838 cprop_cc0_jump (insn, reg_used, src)
3839      rtx insn;
3840      struct reg_use *reg_used;
3841      rtx src;
3842 {
3843   rtx jump = NEXT_INSN (insn);
3844   rtx copy = copy_rtx (jump);
3845   rtx set = PATTERN (copy);
3846
3847   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3848      substitute into it.  */
3849   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3850   if (! cprop_jump (jump, copy, reg_used, src))
3851     return 0;
3852
3853   /* If we succeeded, delete the cc0 setter.  */
3854   PUT_CODE (insn, NOTE);
3855   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3856   NOTE_SOURCE_FILE (insn) = 0;
3857   return 1;
3858  }
3859 #endif
3860
3861 /* Perform constant and copy propagation on INSN.
3862    The result is non-zero if a change was made.  */
3863
3864 static int
3865 cprop_insn (insn, alter_jumps)
3866      rtx insn;
3867      int alter_jumps;
3868 {
3869   struct reg_use *reg_used;
3870   int changed = 0;
3871   rtx note;
3872
3873   /* Only propagate into SETs.  Note that a conditional jump is a
3874      SET with pc_rtx as the destination.  */
3875   if ((GET_CODE (insn) != INSN
3876        && GET_CODE (insn) != JUMP_INSN)
3877       || GET_CODE (PATTERN (insn)) != SET)
3878     return 0;
3879
3880   reg_use_count = 0;
3881   find_used_regs (PATTERN (insn));
3882
3883   note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3884   if (!note)
3885     note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3886
3887   /* We may win even when propagating constants into notes. */
3888   if (note)
3889     find_used_regs (XEXP (note, 0));
3890
3891   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3892        reg_used++, reg_use_count--)
3893     {
3894       unsigned int regno = REGNO (reg_used->reg_rtx);
3895       rtx pat, src;
3896       struct expr *set;
3897
3898       /* Ignore registers created by GCSE.
3899          We do this because ... */
3900       if (regno >= max_gcse_regno)
3901         continue;
3902
3903       /* If the register has already been set in this block, there's
3904          nothing we can do.  */
3905       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3906         continue;
3907
3908       /* Find an assignment that sets reg_used and is available
3909          at the start of the block.  */
3910       set = find_avail_set (regno, insn);
3911       if (! set)
3912         continue;
3913
3914       pat = set->expr;
3915       /* ??? We might be able to handle PARALLELs.  Later.  */
3916       if (GET_CODE (pat) != SET)
3917         abort ();
3918
3919       src = SET_SRC (pat);
3920
3921       /* Constant propagation.  */
3922       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3923           || GET_CODE (src) == SYMBOL_REF)
3924         {
3925           /* Handle normal insns first.  */
3926           if (GET_CODE (insn) == INSN
3927               && try_replace_reg (reg_used->reg_rtx, src, insn))
3928             {
3929               changed = 1;
3930               const_prop_count++;
3931               if (gcse_file != NULL)
3932                 {
3933                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3934                            regno);
3935                   fprintf (gcse_file, "insn %d with constant ",
3936                            INSN_UID (insn));
3937                   print_rtl (gcse_file, src);
3938                   fprintf (gcse_file, "\n");
3939                 }
3940
3941               /* The original insn setting reg_used may or may not now be
3942                  deletable.  We leave the deletion to flow.  */
3943             }
3944
3945           /* Try to propagate a CONST_INT into a conditional jump.
3946              We're pretty specific about what we will handle in this
3947              code, we can extend this as necessary over time.
3948
3949              Right now the insn in question must look like
3950              (set (pc) (if_then_else ...))  */
3951           else if (alter_jumps
3952                    && GET_CODE (insn) == JUMP_INSN
3953                    && condjump_p (insn)
3954                    && ! simplejump_p (insn))
3955             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3956 #ifdef HAVE_cc0
3957           /* Similar code for machines that use a pair of CC0 setter and
3958              conditional jump insn.  */
3959           else if (alter_jumps
3960                    && GET_CODE (PATTERN (insn)) == SET
3961                    && SET_DEST (PATTERN (insn)) == cc0_rtx
3962                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3963                    && condjump_p (NEXT_INSN (insn))
3964                    && ! simplejump_p (NEXT_INSN (insn)))
3965             {
3966               if (cprop_cc0_jump (insn, reg_used, src))
3967                 {
3968                   changed = 1;
3969                   break;
3970                 }
3971             }
3972 #endif
3973         }
3974       else if (GET_CODE (src) == REG
3975                && REGNO (src) >= FIRST_PSEUDO_REGISTER
3976                && REGNO (src) != regno)
3977         {
3978           if (try_replace_reg (reg_used->reg_rtx, src, insn))
3979             {
3980               changed = 1;
3981               copy_prop_count++;
3982               if (gcse_file != NULL)
3983                 {
3984                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3985                            regno, INSN_UID (insn));
3986                   fprintf (gcse_file, " with reg %d\n", REGNO (src));
3987                 }
3988
3989               /* The original insn setting reg_used may or may not now be
3990                  deletable.  We leave the deletion to flow.  */
3991               /* FIXME: If it turns out that the insn isn't deletable,
3992                  then we may have unnecessarily extended register lifetimes
3993                  and made things worse.  */
3994             }
3995         }
3996     }
3997
3998   return changed;
3999 }
4000
4001 /* Forward propagate copies.  This includes copies and constants.  Return
4002    non-zero if a change was made.  */
4003
4004 static int
4005 cprop (alter_jumps)
4006      int alter_jumps;
4007 {
4008   int bb, changed;
4009   rtx insn;
4010
4011   /* Note we start at block 1.  */
4012
4013   changed = 0;
4014   for (bb = 1; bb < n_basic_blocks; bb++)
4015     {
4016       /* Reset tables used to keep track of what's still valid [since the
4017          start of the block].  */
4018       reset_opr_set_tables ();
4019
4020       for (insn = BLOCK_HEAD (bb);
4021            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4022            insn = NEXT_INSN (insn))
4023         {
4024           if (INSN_P (insn))
4025             {
4026               changed |= cprop_insn (insn, alter_jumps);
4027
4028               /* Keep track of everything modified by this insn.  */
4029               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
4030                  call mark_oprs_set if we turned the insn into a NOTE.  */
4031               if (GET_CODE (insn) != NOTE)
4032                 mark_oprs_set (insn);
4033             }
4034         }
4035     }
4036
4037   if (gcse_file != NULL)
4038     fprintf (gcse_file, "\n");
4039
4040   return changed;
4041 }
4042
4043 /* Perform one copy/constant propagation pass.
4044    F is the first insn in the function.
4045    PASS is the pass count.  */
4046
4047 static int
4048 one_cprop_pass (pass, alter_jumps)
4049      int pass;
4050      int alter_jumps;
4051 {
4052   int changed = 0;
4053
4054   const_prop_count = 0;
4055   copy_prop_count = 0;
4056
4057   alloc_set_hash_table (max_cuid);
4058   compute_set_hash_table ();
4059   if (gcse_file)
4060     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4061                      n_sets);
4062   if (n_sets > 0)
4063     {
4064       alloc_cprop_mem (n_basic_blocks, n_sets);
4065       compute_cprop_data ();
4066       changed = cprop (alter_jumps);
4067       free_cprop_mem ();
4068     }
4069
4070   free_set_hash_table ();
4071
4072   if (gcse_file)
4073     {
4074       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4075                current_function_name, pass, bytes_used);
4076       fprintf (gcse_file, "%d const props, %d copy props\n\n",
4077                const_prop_count, copy_prop_count);
4078     }
4079
4080   return changed;
4081 }
4082 \f
4083 /* Compute PRE+LCM working variables.  */
4084
4085 /* Local properties of expressions.  */
4086 /* Nonzero for expressions that are transparent in the block.  */
4087 static sbitmap *transp;
4088
4089 /* Nonzero for expressions that are transparent at the end of the block.
4090    This is only zero for expressions killed by abnormal critical edge
4091    created by a calls.  */
4092 static sbitmap *transpout;
4093
4094 /* Nonzero for expressions that are computed (available) in the block.  */
4095 static sbitmap *comp;
4096
4097 /* Nonzero for expressions that are locally anticipatable in the block.  */
4098 static sbitmap *antloc;
4099
4100 /* Nonzero for expressions where this block is an optimal computation
4101    point.  */
4102 static sbitmap *pre_optimal;
4103
4104 /* Nonzero for expressions which are redundant in a particular block.  */
4105 static sbitmap *pre_redundant;
4106
4107 /* Nonzero for expressions which should be inserted on a specific edge.  */
4108 static sbitmap *pre_insert_map;
4109
4110 /* Nonzero for expressions which should be deleted in a specific block.  */
4111 static sbitmap *pre_delete_map;
4112
4113 /* Contains the edge_list returned by pre_edge_lcm.  */
4114 static struct edge_list *edge_list;
4115
4116 /* Redundant insns.  */
4117 static sbitmap pre_redundant_insns;
4118
4119 /* Allocate vars used for PRE analysis.  */
4120
4121 static void
4122 alloc_pre_mem (n_blocks, n_exprs)
4123      int n_blocks, n_exprs;
4124 {
4125   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4126   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4127   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4128
4129   pre_optimal = NULL;
4130   pre_redundant = NULL;
4131   pre_insert_map = NULL;
4132   pre_delete_map = NULL;
4133   ae_in = NULL;
4134   ae_out = NULL;
4135   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4136
4137   /* pre_insert and pre_delete are allocated later.  */
4138 }
4139
4140 /* Free vars used for PRE analysis.  */
4141
4142 static void
4143 free_pre_mem ()
4144 {
4145   free (transp);
4146   free (comp);
4147
4148   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
4149
4150   if (pre_optimal)
4151     free (pre_optimal);
4152   if (pre_redundant)
4153     free (pre_redundant);
4154   if (pre_insert_map)
4155     free (pre_insert_map);
4156   if (pre_delete_map)
4157     free (pre_delete_map);
4158
4159   if (ae_in)
4160     free (ae_in);
4161   if (ae_out)
4162     free (ae_out);
4163
4164   transp = comp = NULL;
4165   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4166   ae_in = ae_out = NULL;
4167 }
4168
4169 /* Top level routine to do the dataflow analysis needed by PRE.  */
4170
4171 static void
4172 compute_pre_data ()
4173 {
4174   sbitmap trapping_expr;
4175   int i;
4176   unsigned int ui;
4177
4178   compute_local_properties (transp, comp, antloc, 0);
4179   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4180
4181   /* Collect expressions which might trap.  */
4182   trapping_expr = sbitmap_alloc (n_exprs);
4183   sbitmap_zero (trapping_expr);
4184   for (ui = 0; ui < expr_hash_table_size; ui++)
4185     {
4186       struct expr *e;
4187       for (e = expr_hash_table[ui]; e != NULL; e = e->next_same_hash)
4188         if (may_trap_p (e->expr))
4189           SET_BIT (trapping_expr, e->bitmap_index);
4190     }
4191
4192   /* Compute ae_kill for each basic block using:
4193
4194      ~(TRANSP | COMP)
4195
4196      This is significantly faster than compute_ae_kill.  */
4197
4198   for (i = 0; i < n_basic_blocks; i++)
4199     {
4200       edge e;
4201
4202       /* If the current block is the destination of an abnormal edge, we
4203          kill all trapping expressions because we won't be able to properly
4204          place the instruction on the edge.  So make them neither
4205          anticipatable nor transparent.  This is fairly conservative.  */
4206       for (e = BASIC_BLOCK (i)->pred; e ; e = e->pred_next)
4207         if (e->flags & EDGE_ABNORMAL)
4208           {
4209             sbitmap_difference (antloc[i], antloc[i], trapping_expr);
4210             sbitmap_difference (transp[i], transp[i], trapping_expr);
4211             break;
4212           }
4213
4214       sbitmap_a_or_b (ae_kill[i], transp[i], comp[i]);
4215       sbitmap_not (ae_kill[i], ae_kill[i]);
4216     }
4217
4218   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4219                             ae_kill, &pre_insert_map, &pre_delete_map);
4220   free (antloc);
4221   antloc = NULL;
4222   free (ae_kill);
4223   ae_kill = NULL;
4224   free (trapping_expr);
4225 }
4226 \f
4227 /* PRE utilities */
4228
4229 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4230    block BB.
4231
4232    VISITED is a pointer to a working buffer for tracking which BB's have
4233    been visited.  It is NULL for the top-level call.
4234
4235    We treat reaching expressions that go through blocks containing the same
4236    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4237    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4238    2 as not reaching.  The intent is to improve the probability of finding
4239    only one reaching expression and to reduce register lifetimes by picking
4240    the closest such expression.  */
4241
4242 static int
4243 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4244      int occr_bb;
4245      struct expr *expr;
4246      int bb;
4247      char *visited;
4248 {
4249   edge pred;
4250
4251   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4252     {
4253       int pred_bb = pred->src->index;
4254
4255       if (pred->src == ENTRY_BLOCK_PTR
4256           /* Has predecessor has already been visited?  */
4257           || visited[pred_bb])
4258         ;/* Nothing to do.  */
4259
4260       /* Does this predecessor generate this expression?  */
4261       else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4262         {
4263           /* Is this the occurrence we're looking for?
4264              Note that there's only one generating occurrence per block
4265              so we just need to check the block number.  */
4266           if (occr_bb == pred_bb)
4267             return 1;
4268
4269           visited[pred_bb] = 1;
4270         }
4271       /* Ignore this predecessor if it kills the expression.  */
4272       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4273         visited[pred_bb] = 1;
4274
4275       /* Neither gen nor kill.  */
4276       else
4277         {
4278           visited[pred_bb] = 1;
4279           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4280             return 1;
4281         }
4282     }
4283
4284   /* All paths have been checked.  */
4285   return 0;
4286 }
4287
4288 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4289    memory allocated for that function is returned. */
4290
4291 static int
4292 pre_expr_reaches_here_p (occr_bb, expr, bb)
4293      int occr_bb;
4294      struct expr *expr;
4295      int bb;
4296 {
4297   int rval;
4298   char *visited = (char *) xcalloc (n_basic_blocks, 1);
4299
4300   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4301
4302   free (visited);
4303   return rval;
4304 }
4305 \f
4306
4307 /* Given an expr, generate RTL which we can insert at the end of a BB,
4308    or on an edge.  Set the block number of any insns generated to
4309    the value of BB.  */
4310
4311 static rtx
4312 process_insert_insn (expr)
4313      struct expr *expr;
4314 {
4315   rtx reg = expr->reaching_reg;
4316   rtx pat, copied_expr;
4317   rtx first_new_insn;
4318
4319   start_sequence ();
4320   copied_expr = copy_rtx (expr->expr);
4321   emit_move_insn (reg, copied_expr);
4322   first_new_insn = get_insns ();
4323   pat = gen_sequence ();
4324   end_sequence ();
4325
4326   return pat;
4327 }
4328
4329 /* Add EXPR to the end of basic block BB.
4330
4331    This is used by both the PRE and code hoisting.
4332
4333    For PRE, we want to verify that the expr is either transparent
4334    or locally anticipatable in the target block.  This check makes
4335    no sense for code hoisting.  */
4336
4337 static void
4338 insert_insn_end_bb (expr, bb, pre)
4339      struct expr *expr;
4340      int bb;
4341      int pre;
4342 {
4343   rtx insn = BLOCK_END (bb);
4344   rtx new_insn;
4345   rtx reg = expr->reaching_reg;
4346   int regno = REGNO (reg);
4347   rtx pat;
4348   int i;
4349
4350   pat = process_insert_insn (expr);
4351
4352   /* If the last insn is a jump, insert EXPR in front [taking care to
4353      handle cc0, etc. properly].  */
4354
4355   if (GET_CODE (insn) == JUMP_INSN)
4356     {
4357 #ifdef HAVE_cc0
4358       rtx note;
4359 #endif
4360
4361       /* If this is a jump table, then we can't insert stuff here.  Since
4362          we know the previous real insn must be the tablejump, we insert
4363          the new instruction just before the tablejump.  */
4364       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4365           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4366         insn = prev_real_insn (insn);
4367
4368 #ifdef HAVE_cc0
4369       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4370          if cc0 isn't set.  */
4371       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4372       if (note)
4373         insn = XEXP (note, 0);
4374       else
4375         {
4376           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4377           if (maybe_cc0_setter
4378               && INSN_P (maybe_cc0_setter)
4379               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4380             insn = maybe_cc0_setter;
4381         }
4382 #endif
4383       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4384       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4385     }
4386
4387   /* Likewise if the last insn is a call, as will happen in the presence
4388      of exception handling.  */
4389   else if (GET_CODE (insn) == CALL_INSN)
4390     {
4391       HARD_REG_SET parm_regs;
4392       int nparm_regs;
4393       rtx p;
4394
4395       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4396          we search backward and place the instructions before the first
4397          parameter is loaded.  Do this for everyone for consistency and a
4398          presumtion that we'll get better code elsewhere as well.
4399
4400          It should always be the case that we can put these instructions
4401          anywhere in the basic block with performing PRE optimizations.
4402          Check this.  */
4403
4404       if (pre
4405           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4406           && !TEST_BIT (transp[bb], expr->bitmap_index))
4407         abort ();
4408
4409       /* Since different machines initialize their parameter registers
4410          in different orders, assume nothing.  Collect the set of all
4411          parameter registers.  */
4412       CLEAR_HARD_REG_SET (parm_regs);
4413       nparm_regs = 0;
4414       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4415         if (GET_CODE (XEXP (p, 0)) == USE
4416             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4417           {
4418             if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
4419               abort ();
4420
4421             SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
4422             nparm_regs++;
4423           }
4424
4425       /* Search backward for the first set of a register in this set.  */
4426       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4427         {
4428           insn = PREV_INSN (insn);
4429           p = single_set (insn);
4430           if (p && GET_CODE (SET_DEST (p)) == REG
4431               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4432               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4433             {
4434               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4435               nparm_regs--;
4436             }
4437         }
4438
4439       /* If we found all the parameter loads, then we want to insert
4440          before the first parameter load.
4441
4442          If we did not find all the parameter loads, then we might have
4443          stopped on the head of the block, which could be a CODE_LABEL.
4444          If we inserted before the CODE_LABEL, then we would be putting
4445          the insn in the wrong basic block.  In that case, put the insn
4446          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
4447       while (GET_CODE (insn) == CODE_LABEL
4448              || NOTE_INSN_BASIC_BLOCK_P (insn))
4449         insn = NEXT_INSN (insn);
4450
4451       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4452     }
4453   else
4454     {
4455       new_insn = emit_insn_after (pat, insn);
4456       BLOCK_END (bb) = new_insn;
4457     }
4458
4459   /* Keep block number table up to date.
4460      Note, PAT could be a multiple insn sequence, we have to make
4461      sure that each insn in the sequence is handled.  */
4462   if (GET_CODE (pat) == SEQUENCE)
4463     {
4464       for (i = 0; i < XVECLEN (pat, 0); i++)
4465         {
4466           rtx insn = XVECEXP (pat, 0, i);
4467
4468           set_block_num (insn, bb);
4469           if (INSN_P (insn))
4470             add_label_notes (PATTERN (insn), new_insn);
4471
4472           note_stores (PATTERN (insn), record_set_info, insn);
4473         }
4474     }
4475   else
4476     {
4477       add_label_notes (SET_SRC (pat), new_insn);
4478       set_block_num (new_insn, bb);
4479
4480       /* Keep register set table up to date.  */
4481       record_one_set (regno, new_insn);
4482     }
4483
4484   gcse_create_count++;
4485
4486   if (gcse_file)
4487     {
4488       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4489                bb, INSN_UID (new_insn));
4490       fprintf (gcse_file, "copying expression %d to reg %d\n",
4491                expr->bitmap_index, regno);
4492     }
4493 }
4494
4495 /* Insert partially redundant expressions on edges in the CFG to make
4496    the expressions fully redundant.  */
4497
4498 static int
4499 pre_edge_insert (edge_list, index_map)
4500      struct edge_list *edge_list;
4501      struct expr **index_map;
4502 {
4503   int e, i, j, num_edges, set_size, did_insert = 0;
4504   sbitmap *inserted;
4505
4506   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4507      if it reaches any of the deleted expressions.  */
4508
4509   set_size = pre_insert_map[0]->size;
4510   num_edges = NUM_EDGES (edge_list);
4511   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4512   sbitmap_vector_zero (inserted, num_edges);
4513
4514   for (e = 0; e < num_edges; e++)
4515     {
4516       int indx;
4517       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4518       int bb = pred->index;
4519
4520       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4521         {
4522           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4523
4524           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4525             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4526               {
4527                 struct expr *expr = index_map[j];
4528                 struct occr *occr;
4529
4530                 /* Now look at each deleted occurence of this expression.  */
4531                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4532                   {
4533                     if (! occr->deleted_p)
4534                       continue;
4535
4536                     /* Insert this expression on this edge if if it would
4537                        reach the deleted occurence in BB.  */
4538                     if (!TEST_BIT (inserted[e], j))
4539                       {
4540                         rtx insn;
4541                         edge eg = INDEX_EDGE (edge_list, e);
4542
4543                         /* We can't insert anything on an abnormal and
4544                            critical edge, so we insert the insn at the end of
4545                            the previous block. There are several alternatives
4546                            detailed in Morgans book P277 (sec 10.5) for
4547                            handling this situation.  This one is easiest for
4548                            now.  */
4549
4550                         if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4551                           insert_insn_end_bb (index_map[j], bb, 0);
4552                         else
4553                           {
4554                             insn = process_insert_insn (index_map[j]);
4555                             insert_insn_on_edge (insn, eg);
4556                           }
4557
4558                         if (gcse_file)
4559                           {
4560                             fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4561                                      bb,
4562                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4563                             fprintf (gcse_file, "copy expression %d\n",
4564                                      expr->bitmap_index);
4565                           }
4566
4567                         SET_BIT (inserted[e], j);
4568                         did_insert = 1;
4569                         gcse_create_count++;
4570                       }
4571                   }
4572               }
4573         }
4574     }
4575
4576   free (inserted);
4577   return did_insert;
4578 }
4579
4580 /* Copy the result of INSN to REG.  INDX is the expression number.  */
4581
4582 static void
4583 pre_insert_copy_insn (expr, insn)
4584      struct expr *expr;
4585      rtx insn;
4586 {
4587   rtx reg = expr->reaching_reg;
4588   int regno = REGNO (reg);
4589   int indx = expr->bitmap_index;
4590   rtx set = single_set (insn);
4591   rtx new_insn;
4592   int bb = BLOCK_NUM (insn);
4593
4594   if (!set)
4595     abort ();
4596
4597   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4598                               insn);
4599
4600   /* Keep block number table up to date.  */
4601   set_block_num (new_insn, bb);
4602
4603   /* Keep register set table up to date.  */
4604   record_one_set (regno, new_insn);
4605   if (insn == BLOCK_END (bb))
4606     BLOCK_END (bb) = new_insn;
4607
4608   gcse_create_count++;
4609
4610   if (gcse_file)
4611     fprintf (gcse_file,
4612              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4613               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4614               INSN_UID (insn), regno);
4615 }
4616
4617 /* Copy available expressions that reach the redundant expression
4618    to `reaching_reg'.  */
4619
4620 static void
4621 pre_insert_copies ()
4622 {
4623   unsigned int i;
4624   struct expr *expr;
4625   struct occr *occr;
4626   struct occr *avail;
4627
4628   /* For each available expression in the table, copy the result to
4629      `reaching_reg' if the expression reaches a deleted one.
4630
4631      ??? The current algorithm is rather brute force.
4632      Need to do some profiling.  */
4633
4634   for (i = 0; i < expr_hash_table_size; i++)
4635     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4636       {
4637         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
4638            we don't want to insert a copy here because the expression may not
4639            really be redundant.  So only insert an insn if the expression was
4640            deleted.  This test also avoids further processing if the
4641            expression wasn't deleted anywhere.  */
4642         if (expr->reaching_reg == NULL)
4643           continue;
4644
4645         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4646           {
4647             if (! occr->deleted_p)
4648               continue;
4649
4650             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4651               {
4652                 rtx insn = avail->insn;
4653
4654                 /* No need to handle this one if handled already.  */
4655                 if (avail->copied_p)
4656                   continue;
4657
4658                 /* Don't handle this one if it's a redundant one.  */
4659                 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4660                   continue;
4661
4662                 /* Or if the expression doesn't reach the deleted one.  */
4663                 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4664                                                BLOCK_NUM (occr->insn)))
4665                   continue;
4666
4667                 /* Copy the result of avail to reaching_reg.  */
4668                 pre_insert_copy_insn (expr, insn);
4669                 avail->copied_p = 1;
4670               }
4671           }
4672       }
4673 }
4674
4675 /* Delete redundant computations.
4676    Deletion is done by changing the insn to copy the `reaching_reg' of
4677    the expression into the result of the SET.  It is left to later passes
4678    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4679
4680    Returns non-zero if a change is made.  */
4681
4682 static int
4683 pre_delete ()
4684 {
4685   unsigned int i;
4686   int changed;
4687   struct expr *expr;
4688   struct occr *occr;
4689
4690   changed = 0;
4691   for (i = 0; i < expr_hash_table_size; i++)
4692     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4693       {
4694         int indx = expr->bitmap_index;
4695
4696         /* We only need to search antic_occr since we require
4697            ANTLOC != 0.  */
4698
4699         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4700           {
4701             rtx insn = occr->insn;
4702             rtx set;
4703             int bb = BLOCK_NUM (insn);
4704
4705             if (TEST_BIT (pre_delete_map[bb], indx))
4706               {
4707                 set = single_set (insn);
4708                 if (! set)
4709                   abort ();
4710
4711                 /* Create a pseudo-reg to store the result of reaching
4712                    expressions into.  Get the mode for the new pseudo from
4713                    the mode of the original destination pseudo.  */
4714                 if (expr->reaching_reg == NULL)
4715                   expr->reaching_reg
4716                     = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4717
4718                 /* In theory this should never fail since we're creating
4719                    a reg->reg copy.
4720
4721                    However, on the x86 some of the movXX patterns actually
4722                    contain clobbers of scratch regs.  This may cause the
4723                    insn created by validate_change to not match any pattern
4724                    and thus cause validate_change to fail.   */
4725                 if (validate_change (insn, &SET_SRC (set),
4726                                      expr->reaching_reg, 0))
4727                   {
4728                     occr->deleted_p = 1;
4729                     SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4730                     changed = 1;
4731                     gcse_subst_count++;
4732                   }
4733
4734                 if (gcse_file)
4735                   {
4736                     fprintf (gcse_file,
4737                              "PRE: redundant insn %d (expression %d) in ",
4738                                INSN_UID (insn), indx);
4739                     fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4740                              bb, REGNO (expr->reaching_reg));
4741                   }
4742               }
4743           }
4744       }
4745
4746   return changed;
4747 }
4748
4749 /* Perform GCSE optimizations using PRE.
4750    This is called by one_pre_gcse_pass after all the dataflow analysis
4751    has been done.
4752
4753    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4754    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4755    Compiler Design and Implementation.
4756
4757    ??? A new pseudo reg is created to hold the reaching expression.  The nice
4758    thing about the classical approach is that it would try to use an existing
4759    reg.  If the register can't be adequately optimized [i.e. we introduce
4760    reload problems], one could add a pass here to propagate the new register
4761    through the block.
4762
4763    ??? We don't handle single sets in PARALLELs because we're [currently] not
4764    able to copy the rest of the parallel when we insert copies to create full
4765    redundancies from partial redundancies.  However, there's no reason why we
4766    can't handle PARALLELs in the cases where there are no partial
4767    redundancies.  */
4768
4769 static int
4770 pre_gcse ()
4771 {
4772   unsigned int i;
4773   int did_insert, changed;
4774   struct expr **index_map;
4775   struct expr *expr;
4776
4777   /* Compute a mapping from expression number (`bitmap_index') to
4778      hash table entry.  */
4779
4780   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4781   for (i = 0; i < expr_hash_table_size; i++)
4782     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4783       index_map[expr->bitmap_index] = expr;
4784
4785   /* Reset bitmap used to track which insns are redundant.  */
4786   pre_redundant_insns = sbitmap_alloc (max_cuid);
4787   sbitmap_zero (pre_redundant_insns);
4788
4789   /* Delete the redundant insns first so that
4790      - we know what register to use for the new insns and for the other
4791        ones with reaching expressions
4792      - we know which insns are redundant when we go to create copies  */
4793
4794   changed = pre_delete ();
4795
4796   did_insert = pre_edge_insert (edge_list, index_map);
4797
4798   /* In other places with reaching expressions, copy the expression to the
4799      specially allocated pseudo-reg that reaches the redundant expr.  */
4800   pre_insert_copies ();
4801   if (did_insert)
4802     {
4803       commit_edge_insertions ();
4804       changed = 1;
4805     }
4806
4807   free (index_map);
4808   free (pre_redundant_insns);
4809   return changed;
4810 }
4811
4812 /* Top level routine to perform one PRE GCSE pass.
4813
4814    Return non-zero if a change was made.  */
4815
4816 static int
4817 one_pre_gcse_pass (pass)
4818      int pass;
4819 {
4820   int changed = 0;
4821
4822   gcse_subst_count = 0;
4823   gcse_create_count = 0;
4824
4825   alloc_expr_hash_table (max_cuid);
4826   add_noreturn_fake_exit_edges ();
4827   compute_expr_hash_table ();
4828   if (gcse_file)
4829     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4830                      expr_hash_table_size, n_exprs);
4831
4832   if (n_exprs > 0)
4833     {
4834       alloc_pre_mem (n_basic_blocks, n_exprs);
4835       compute_pre_data ();
4836       changed |= pre_gcse ();
4837       free_edge_list (edge_list);
4838       free_pre_mem ();
4839     }
4840
4841   remove_fake_edges ();
4842   free_expr_hash_table ();
4843
4844   if (gcse_file)
4845     {
4846       fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4847                current_function_name, pass, bytes_used);
4848       fprintf (gcse_file, "%d substs, %d insns created\n",
4849                gcse_subst_count, gcse_create_count);
4850     }
4851
4852   return changed;
4853 }
4854 \f
4855 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4856    If notes are added to an insn which references a CODE_LABEL, the
4857    LABEL_NUSES count is incremented.  We have to add REG_LABEL notes,
4858    because the following loop optimization pass requires them.  */
4859
4860 /* ??? This is very similar to the loop.c add_label_notes function.  We
4861    could probably share code here.  */
4862
4863 /* ??? If there was a jump optimization pass after gcse and before loop,
4864    then we would not need to do this here, because jump would add the
4865    necessary REG_LABEL notes.  */
4866
4867 static void
4868 add_label_notes (x, insn)
4869      rtx x;
4870      rtx insn;
4871 {
4872   enum rtx_code code = GET_CODE (x);
4873   int i, j;
4874   const char *fmt;
4875
4876   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4877     {
4878       /* This code used to ignore labels that referred to dispatch tables to
4879          avoid flow generating (slighly) worse code.
4880
4881          We no longer ignore such label references (see LABEL_REF handling in
4882          mark_jump_label for additional information).  */
4883
4884       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4885                                             REG_NOTES (insn));
4886       if (LABEL_P (XEXP (x, 0)))
4887         LABEL_NUSES (XEXP (x, 0))++;
4888       return;
4889     }
4890
4891   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4892     {
4893       if (fmt[i] == 'e')
4894         add_label_notes (XEXP (x, i), insn);
4895       else if (fmt[i] == 'E')
4896         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4897           add_label_notes (XVECEXP (x, i, j), insn);
4898     }
4899 }
4900
4901 /* Compute transparent outgoing information for each block.
4902
4903    An expression is transparent to an edge unless it is killed by
4904    the edge itself.  This can only happen with abnormal control flow,
4905    when the edge is traversed through a call.  This happens with
4906    non-local labels and exceptions.
4907
4908    This would not be necessary if we split the edge.  While this is
4909    normally impossible for abnormal critical edges, with some effort
4910    it should be possible with exception handling, since we still have
4911    control over which handler should be invoked.  But due to increased
4912    EH table sizes, this may not be worthwhile.  */
4913
4914 static void
4915 compute_transpout ()
4916 {
4917   int bb;
4918   unsigned int i;
4919   struct expr *expr;
4920
4921   sbitmap_vector_ones (transpout, n_basic_blocks);
4922
4923   for (bb = 0; bb < n_basic_blocks; ++bb)
4924     {
4925       /* Note that flow inserted a nop a the end of basic blocks that
4926          end in call instructions for reasons other than abnormal
4927          control flow.  */
4928       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4929         continue;
4930
4931       for (i = 0; i < expr_hash_table_size; i++)
4932         for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4933           if (GET_CODE (expr->expr) == MEM)
4934             {
4935               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4936                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4937                 continue;
4938
4939               /* ??? Optimally, we would use interprocedural alias
4940                  analysis to determine if this mem is actually killed
4941                  by this call.  */
4942               RESET_BIT (transpout[bb], expr->bitmap_index);
4943             }
4944     }
4945 }
4946
4947 /* Removal of useless null pointer checks */
4948
4949 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4950    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4951    `null_pointer_info *'.
4952
4953    We ignore hard registers.  */
4954
4955 static void
4956 invalidate_nonnull_info (x, setter, data)
4957      rtx x;
4958      rtx setter ATTRIBUTE_UNUSED;
4959      void *data;
4960 {
4961   unsigned int regno;
4962   struct null_pointer_info *npi = (struct null_pointer_info *) data;
4963
4964   while (GET_CODE (x) == SUBREG)
4965     x = SUBREG_REG (x);
4966
4967   /* Ignore anything that is not a register or is a hard register.  */
4968   if (GET_CODE (x) != REG
4969       || REGNO (x) < npi->min_reg
4970       || REGNO (x) >= npi->max_reg)
4971     return;
4972
4973   regno = REGNO (x) - npi->min_reg;
4974
4975   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4976   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4977 }
4978
4979 /* Do null-pointer check elimination for the registers indicated in
4980    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4981    they are not our responsibility to free.  */
4982
4983 static void
4984 delete_null_pointer_checks_1 (delete_list, block_reg, nonnull_avin,
4985                               nonnull_avout, npi)
4986      varray_type *delete_list;
4987      unsigned int *block_reg;
4988      sbitmap *nonnull_avin;
4989      sbitmap *nonnull_avout;
4990      struct null_pointer_info *npi;
4991 {
4992   int bb;
4993   int current_block;
4994   sbitmap *nonnull_local = npi->nonnull_local;
4995   sbitmap *nonnull_killed = npi->nonnull_killed;
4996
4997   /* Compute local properties, nonnull and killed.  A register will have
4998      the nonnull property if at the end of the current block its value is
4999      known to be nonnull.  The killed property indicates that somewhere in
5000      the block any information we had about the register is killed.
5001
5002      Note that a register can have both properties in a single block.  That
5003      indicates that it's killed, then later in the block a new value is
5004      computed.  */
5005   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
5006   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
5007
5008   for (current_block = 0; current_block < n_basic_blocks; current_block++)
5009     {
5010       rtx insn, stop_insn;
5011
5012       /* Set the current block for invalidate_nonnull_info.  */
5013       npi->current_block = current_block;
5014
5015       /* Scan each insn in the basic block looking for memory references and
5016          register sets.  */
5017       stop_insn = NEXT_INSN (BLOCK_END (current_block));
5018       for (insn = BLOCK_HEAD (current_block);
5019            insn != stop_insn;
5020            insn = NEXT_INSN (insn))
5021         {
5022           rtx set;
5023           rtx reg;
5024
5025           /* Ignore anything that is not a normal insn.  */
5026           if (! INSN_P (insn))
5027             continue;
5028
5029           /* Basically ignore anything that is not a simple SET.  We do have
5030              to make sure to invalidate nonnull_local and set nonnull_killed
5031              for such insns though.  */
5032           set = single_set (insn);
5033           if (!set)
5034             {
5035               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5036               continue;
5037             }
5038
5039           /* See if we've got a useable memory load.  We handle it first
5040              in case it uses its address register as a dest (which kills
5041              the nonnull property).  */
5042           if (GET_CODE (SET_SRC (set)) == MEM
5043               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5044               && REGNO (reg) >= npi->min_reg
5045               && REGNO (reg) < npi->max_reg)
5046             SET_BIT (nonnull_local[current_block],
5047                      REGNO (reg) - npi->min_reg);
5048
5049           /* Now invalidate stuff clobbered by this insn.  */
5050           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5051
5052           /* And handle stores, we do these last since any sets in INSN can
5053              not kill the nonnull property if it is derived from a MEM
5054              appearing in a SET_DEST.  */
5055           if (GET_CODE (SET_DEST (set)) == MEM
5056               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5057               && REGNO (reg) >= npi->min_reg
5058               && REGNO (reg) < npi->max_reg)
5059             SET_BIT (nonnull_local[current_block],
5060                      REGNO (reg) - npi->min_reg);
5061         }
5062     }
5063
5064   /* Now compute global properties based on the local properties.   This
5065      is a classic global availablity algorithm.  */
5066   compute_available (nonnull_local, nonnull_killed,
5067                      nonnull_avout, nonnull_avin);
5068
5069   /* Now look at each bb and see if it ends with a compare of a value
5070      against zero.  */
5071   for (bb = 0; bb < n_basic_blocks; bb++)
5072     {
5073       rtx last_insn = BLOCK_END (bb);
5074       rtx condition, earliest;
5075       int compare_and_branch;
5076
5077       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5078          since BLOCK_REG[BB] is zero if this block did not end with a
5079          comparison against zero, this condition works.  */
5080       if (block_reg[bb] < npi->min_reg
5081           || block_reg[bb] >= npi->max_reg)
5082         continue;
5083
5084       /* LAST_INSN is a conditional jump.  Get its condition.  */
5085       condition = get_condition (last_insn, &earliest);
5086
5087       /* If we can't determine the condition then skip.  */
5088       if (! condition)
5089         continue;
5090
5091       /* Is the register known to have a nonzero value?  */
5092       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5093         continue;
5094
5095       /* Try to compute whether the compare/branch at the loop end is one or
5096          two instructions.  */
5097       if (earliest == last_insn)
5098         compare_and_branch = 1;
5099       else if (earliest == prev_nonnote_insn (last_insn))
5100         compare_and_branch = 2;
5101       else
5102         continue;
5103
5104       /* We know the register in this comparison is nonnull at exit from
5105          this block.  We can optimize this comparison.  */
5106       if (GET_CODE (condition) == NE)
5107         {
5108           rtx new_jump;
5109
5110           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5111                                             last_insn);
5112           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5113           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5114           emit_barrier_after (new_jump);
5115         }
5116       if (!*delete_list)
5117         VARRAY_RTX_INIT (*delete_list, 10, "delete_list");
5118
5119       VARRAY_PUSH_RTX (*delete_list, last_insn);
5120       if (compare_and_branch == 2)
5121         VARRAY_PUSH_RTX (*delete_list, earliest);
5122
5123       /* Don't check this block again.  (Note that BLOCK_END is
5124          invalid here; we deleted the last instruction in the
5125          block.)  */
5126       block_reg[bb] = 0;
5127     }
5128 }
5129
5130 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5131    at compile time.
5132
5133    This is conceptually similar to global constant/copy propagation and
5134    classic global CSE (it even uses the same dataflow equations as cprop).
5135
5136    If a register is used as memory address with the form (mem (reg)), then we
5137    know that REG can not be zero at that point in the program.  Any instruction
5138    which sets REG "kills" this property.
5139
5140    So, if every path leading to a conditional branch has an available memory
5141    reference of that form, then we know the register can not have the value
5142    zero at the conditional branch.
5143
5144    So we merely need to compute the local properies and propagate that data
5145    around the cfg, then optimize where possible.
5146
5147    We run this pass two times.  Once before CSE, then again after CSE.  This
5148    has proven to be the most profitable approach.  It is rare for new
5149    optimization opportunities of this nature to appear after the first CSE
5150    pass.
5151
5152    This could probably be integrated with global cprop with a little work.  */
5153
5154 void
5155 delete_null_pointer_checks (f)
5156      rtx f ATTRIBUTE_UNUSED;
5157 {
5158   sbitmap *nonnull_avin, *nonnull_avout;
5159   unsigned int *block_reg;
5160   varray_type delete_list = NULL;
5161   int bb;
5162   int reg;
5163   int regs_per_pass;
5164   int max_reg;
5165   unsigned int i;
5166   struct null_pointer_info npi;
5167
5168   /* If we have only a single block, then there's nothing to do.  */
5169   if (n_basic_blocks <= 1)
5170     return;
5171
5172   /* Trying to perform global optimizations on flow graphs which have
5173      a high connectivity will take a long time and is unlikely to be
5174      particularly useful.
5175
5176      In normal circumstances a cfg should have about twice has many edges
5177      as blocks.  But we do not want to punish small functions which have
5178      a couple switch statements.  So we require a relatively large number
5179      of basic blocks and the ratio of edges to blocks to be high.  */
5180   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5181     return;
5182
5183   /* We need four bitmaps, each with a bit for each register in each
5184      basic block.  */
5185   max_reg = max_reg_num ();
5186   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5187
5188   /* Allocate bitmaps to hold local and global properties.  */
5189   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5190   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5191   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5192   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5193
5194   /* Go through the basic blocks, seeing whether or not each block
5195      ends with a conditional branch whose condition is a comparison
5196      against zero.  Record the register compared in BLOCK_REG.  */
5197   block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
5198   for (bb = 0; bb < n_basic_blocks; bb++)
5199     {
5200       rtx last_insn = BLOCK_END (bb);
5201       rtx condition, earliest, reg;
5202
5203       /* We only want conditional branches.  */
5204       if (GET_CODE (last_insn) != JUMP_INSN
5205           || !any_condjump_p (last_insn)
5206           || !onlyjump_p (last_insn))
5207         continue;
5208
5209       /* LAST_INSN is a conditional jump.  Get its condition.  */
5210       condition = get_condition (last_insn, &earliest);
5211
5212       /* If we were unable to get the condition, or it is not a equality
5213          comparison against zero then there's nothing we can do.  */
5214       if (!condition
5215           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5216           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5217           || (XEXP (condition, 1)
5218               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5219         continue;
5220
5221       /* We must be checking a register against zero.  */
5222       reg = XEXP (condition, 0);
5223       if (GET_CODE (reg) != REG)
5224         continue;
5225
5226       block_reg[bb] = REGNO (reg);
5227     }
5228
5229   /* Go through the algorithm for each block of registers.  */
5230   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5231     {
5232       npi.min_reg = reg;
5233       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5234       delete_null_pointer_checks_1 (&delete_list, block_reg, nonnull_avin,
5235                                     nonnull_avout, &npi);
5236     }
5237
5238   /* Now delete the instructions all at once.  This breaks the CFG.  */
5239   if (delete_list)
5240     {
5241       for (i = 0; i < VARRAY_ACTIVE_SIZE (delete_list); i++)
5242         delete_insn (VARRAY_RTX (delete_list, i));
5243       VARRAY_FREE (delete_list);
5244     }
5245
5246   /* Free the table of registers compared at the end of every block.  */
5247   free (block_reg);
5248
5249   /* Free bitmaps.  */
5250   free (npi.nonnull_local);
5251   free (npi.nonnull_killed);
5252   free (nonnull_avin);
5253   free (nonnull_avout);
5254 }
5255
5256 /* Code Hoisting variables and subroutines.  */
5257
5258 /* Very busy expressions.  */
5259 static sbitmap *hoist_vbein;
5260 static sbitmap *hoist_vbeout;
5261
5262 /* Hoistable expressions.  */
5263 static sbitmap *hoist_exprs;
5264
5265 /* Dominator bitmaps.  */
5266 static sbitmap *dominators;
5267
5268 /* ??? We could compute post dominators and run this algorithm in
5269    reverse to to perform tail merging, doing so would probably be
5270    more effective than the tail merging code in jump.c.
5271
5272    It's unclear if tail merging could be run in parallel with
5273    code hoisting.  It would be nice.  */
5274
5275 /* Allocate vars used for code hoisting analysis.  */
5276
5277 static void
5278 alloc_code_hoist_mem (n_blocks, n_exprs)
5279      int n_blocks, n_exprs;
5280 {
5281   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5282   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5283   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5284
5285   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5286   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5287   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5288   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5289
5290   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5291 }
5292
5293 /* Free vars used for code hoisting analysis.  */
5294
5295 static void
5296 free_code_hoist_mem ()
5297 {
5298   free (antloc);
5299   free (transp);
5300   free (comp);
5301
5302   free (hoist_vbein);
5303   free (hoist_vbeout);
5304   free (hoist_exprs);
5305   free (transpout);
5306
5307   free (dominators);
5308 }
5309
5310 /* Compute the very busy expressions at entry/exit from each block.
5311
5312    An expression is very busy if all paths from a given point
5313    compute the expression.  */
5314
5315 static void
5316 compute_code_hoist_vbeinout ()
5317 {
5318   int bb, changed, passes;
5319
5320   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5321   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5322
5323   passes = 0;
5324   changed = 1;
5325
5326   while (changed)
5327     {
5328       changed = 0;
5329
5330       /* We scan the blocks in the reverse order to speed up
5331          the convergence.  */
5332       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5333         {
5334           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5335                                            hoist_vbeout[bb], transp[bb]);
5336           if (bb != n_basic_blocks - 1)
5337             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5338         }
5339
5340       passes++;
5341     }
5342
5343   if (gcse_file)
5344     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5345 }
5346
5347 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5348
5349 static void
5350 compute_code_hoist_data ()
5351 {
5352   compute_local_properties (transp, comp, antloc, 0);
5353   compute_transpout ();
5354   compute_code_hoist_vbeinout ();
5355   calculate_dominance_info (NULL, dominators, CDI_DOMINATORS);
5356   if (gcse_file)
5357     fprintf (gcse_file, "\n");
5358 }
5359
5360 /* Determine if the expression identified by EXPR_INDEX would
5361    reach BB unimpared if it was placed at the end of EXPR_BB.
5362
5363    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5364    to me that the expression must either be computed or transparent in
5365    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5366    would allow the expression to be hoisted out of loops, even if
5367    the expression wasn't a loop invariant.
5368
5369    Contrast this to reachability for PRE where an expression is
5370    considered reachable if *any* path reaches instead of *all*
5371    paths.  */
5372
5373 static int
5374 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5375      int expr_bb;
5376      int expr_index;
5377      int bb;
5378      char *visited;
5379 {
5380   edge pred;
5381   int visited_allocated_locally = 0;
5382
5383
5384   if (visited == NULL)
5385     {
5386        visited_allocated_locally = 1;
5387        visited = xcalloc (n_basic_blocks, 1);
5388     }
5389
5390   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5391     {
5392       int pred_bb = pred->src->index;
5393
5394       if (pred->src == ENTRY_BLOCK_PTR)
5395         break;
5396       else if (visited[pred_bb])
5397         continue;
5398
5399       /* Does this predecessor generate this expression?  */
5400       else if (TEST_BIT (comp[pred_bb], expr_index))
5401         break;
5402       else if (! TEST_BIT (transp[pred_bb], expr_index))
5403         break;
5404
5405       /* Not killed.  */
5406       else
5407         {
5408           visited[pred_bb] = 1;
5409           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5410                                            pred_bb, visited))
5411             break;
5412         }
5413     }
5414   if (visited_allocated_locally)
5415     free (visited);
5416
5417   return (pred == NULL);
5418 }
5419 \f
5420 /* Actually perform code hoisting.  */
5421
5422 static void
5423 hoist_code ()
5424 {
5425   int bb, dominated;
5426   unsigned int i;
5427   struct expr **index_map;
5428   struct expr *expr;
5429
5430   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5431
5432   /* Compute a mapping from expression number (`bitmap_index') to
5433      hash table entry.  */
5434
5435   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5436   for (i = 0; i < expr_hash_table_size; i++)
5437     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5438       index_map[expr->bitmap_index] = expr;
5439
5440   /* Walk over each basic block looking for potentially hoistable
5441      expressions, nothing gets hoisted from the entry block.  */
5442   for (bb = 0; bb < n_basic_blocks; bb++)
5443     {
5444       int found = 0;
5445       int insn_inserted_p;
5446
5447       /* Examine each expression that is very busy at the exit of this
5448          block.  These are the potentially hoistable expressions.  */
5449       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5450         {
5451           int hoistable = 0;
5452
5453           if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5454             {
5455               /* We've found a potentially hoistable expression, now
5456                  we look at every block BB dominates to see if it
5457                  computes the expression.  */
5458               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5459                 {
5460                   /* Ignore self dominance.  */
5461                   if (bb == dominated
5462                       || ! TEST_BIT (dominators[dominated], bb))
5463                     continue;
5464
5465                   /* We've found a dominated block, now see if it computes
5466                      the busy expression and whether or not moving that
5467                      expression to the "beginning" of that block is safe.  */
5468                   if (!TEST_BIT (antloc[dominated], i))
5469                     continue;
5470
5471                   /* Note if the expression would reach the dominated block
5472                      unimpared if it was placed at the end of BB.
5473
5474                      Keep track of how many times this expression is hoistable
5475                      from a dominated block into BB.  */
5476                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5477                     hoistable++;
5478                 }
5479
5480               /* If we found more than one hoistable occurence of this
5481                  expression, then note it in the bitmap of expressions to
5482                  hoist.  It makes no sense to hoist things which are computed
5483                  in only one BB, and doing so tends to pessimize register
5484                  allocation.  One could increase this value to try harder
5485                  to avoid any possible code expansion due to register
5486                  allocation issues; however experiments have shown that
5487                  the vast majority of hoistable expressions are only movable
5488                  from two successors, so raising this threshhold is likely
5489                  to nullify any benefit we get from code hoisting.  */
5490               if (hoistable > 1)
5491                 {
5492                   SET_BIT (hoist_exprs[bb], i);
5493                   found = 1;
5494                 }
5495             }
5496         }
5497
5498       /* If we found nothing to hoist, then quit now.  */
5499       if (! found)
5500         continue;
5501
5502       /* Loop over all the hoistable expressions.  */
5503       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5504         {
5505           /* We want to insert the expression into BB only once, so
5506              note when we've inserted it.  */
5507           insn_inserted_p = 0;
5508
5509           /* These tests should be the same as the tests above.  */
5510           if (TEST_BIT (hoist_vbeout[bb], i))
5511             {
5512               /* We've found a potentially hoistable expression, now
5513                  we look at every block BB dominates to see if it
5514                  computes the expression.  */
5515               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5516                 {
5517                   /* Ignore self dominance.  */
5518                   if (bb == dominated
5519                       || ! TEST_BIT (dominators[dominated], bb))
5520                     continue;
5521
5522                   /* We've found a dominated block, now see if it computes
5523                      the busy expression and whether or not moving that
5524                      expression to the "beginning" of that block is safe.  */
5525                   if (!TEST_BIT (antloc[dominated], i))
5526                     continue;
5527
5528                   /* The expression is computed in the dominated block and
5529                      it would be safe to compute it at the start of the
5530                      dominated block.  Now we have to determine if the
5531                      expresion would reach the dominated block if it was
5532                      placed at the end of BB.  */
5533                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5534                     {
5535                       struct expr *expr = index_map[i];
5536                       struct occr *occr = expr->antic_occr;
5537                       rtx insn;
5538                       rtx set;
5539
5540                       /* Find the right occurence of this expression.  */
5541                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5542                         occr = occr->next;
5543
5544                       /* Should never happen.  */
5545                       if (!occr)
5546                         abort ();
5547
5548                       insn = occr->insn;
5549
5550                       set = single_set (insn);
5551                       if (! set)
5552                         abort ();
5553
5554                       /* Create a pseudo-reg to store the result of reaching
5555                          expressions into.  Get the mode for the new pseudo
5556                          from the mode of the original destination pseudo.  */
5557                       if (expr->reaching_reg == NULL)
5558                         expr->reaching_reg
5559                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5560
5561                       /* In theory this should never fail since we're creating
5562                          a reg->reg copy.
5563
5564                          However, on the x86 some of the movXX patterns
5565                          actually contain clobbers of scratch regs.  This may
5566                          cause the insn created by validate_change to not
5567                          match any pattern and thus cause validate_change to
5568                          fail.  */
5569                       if (validate_change (insn, &SET_SRC (set),
5570                                            expr->reaching_reg, 0))
5571                         {
5572                           occr->deleted_p = 1;
5573                           if (!insn_inserted_p)
5574                             {
5575                               insert_insn_end_bb (index_map[i], bb, 0);
5576                               insn_inserted_p = 1;
5577                             }
5578                         }
5579                     }
5580                 }
5581             }
5582         }
5583     }
5584
5585     free (index_map);
5586 }
5587
5588 /* Top level routine to perform one code hoisting (aka unification) pass
5589
5590    Return non-zero if a change was made.  */
5591
5592 static int
5593 one_code_hoisting_pass ()
5594 {
5595   int changed = 0;
5596
5597   alloc_expr_hash_table (max_cuid);
5598   compute_expr_hash_table ();
5599   if (gcse_file)
5600     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5601                      expr_hash_table_size, n_exprs);
5602
5603   if (n_exprs > 0)
5604     {
5605       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5606       compute_code_hoist_data ();
5607       hoist_code ();
5608       free_code_hoist_mem ();
5609     }
5610
5611   free_expr_hash_table ();
5612
5613   return changed;
5614 }