gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162 #include "params.h"
 163
 164 #include "obstack.h"
 165 #define obstack_chunk_alloc gmalloc
 166 #define obstack_chunk_free free
 167
 168 /* Maximum number of passes to perform.  */
 169 #define MAX_PASSES 1
 170
 171 /* Propagate flow information through back edges and thus enable PRE's
 172    moving loop invariant calculations out of loops.
 173
 174    Originally this tended to create worse overall code, but several
 175    improvements during the development of PRE seem to have made following
 176    back edges generally a win.
 177
 178    Note much of the loop invariant code motion done here would normally
 179    be done by loop.c, which has more heuristics for when to move invariants
 180    out of loops.  At some point we might need to move some of those
 181    heuristics into gcse.c.  */
 182 #define FOLLOW_BACK_EDGES 1
 183
 184 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 185    are a superset of those done by GCSE.
 186
 187    We perform the following steps:
 188
 189    1) Compute basic block information.
 190
 191    2) Compute table of places where registers are set.
 192
 193    3) Perform copy/constant propagation.
 194
 195    4) Perform global cse.
 196
 197    5) Perform another pass of copy/constant propagation.
 198
 199    Two passes of copy/constant propagation are done because the first one
 200    enables more GCSE and the second one helps to clean up the copies that
 201    GCSE creates.  This is needed more for PRE than for Classic because Classic
 202    GCSE will try to use an existing register containing the common
 203    subexpression rather than create a new one.  This is harder to do for PRE
 204    because of the code motion (which Classic GCSE doesn't do).
 205
 206    Expressions we are interested in GCSE-ing are of the form
 207    (set (pseudo-reg) (expression)).
 208    Function want_to_gcse_p says what these are.
 209
 210    PRE handles moving invariant expressions out of loops (by treating them as
 211    partially redundant).
 212
 213    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 214    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 215    (Rice University) on value numbering is a useful reference for this.
 216
 217    **********************
 218
 219    We used to support multiple passes but there are diminishing returns in
 220    doing so.  The first pass usually makes 90% of the changes that are doable.
 221    A second pass can make a few more changes made possible by the first pass.
 222    Experiments show any further passes don't make enough changes to justify
 223    the expense.
 224
 225    A study of spec92 using an unlimited number of passes:
 226    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 227    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 228    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 229
 230    It was found doing copy propagation between each pass enables further
 231    substitutions.
 232
 233    PRE is quite expensive in complicated functions because the DFA can take
 234    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 235    be modified if one wants to experiment.
 236
 237    **********************
 238
 239    The steps for PRE are:
 240
 241    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 242
 243    2) Perform the data flow analysis for PRE.
 244
 245    3) Delete the redundant instructions
 246
 247    4) Insert the required copies [if any] that make the partially
 248       redundant instructions fully redundant.
 249
 250    5) For other reaching expressions, insert an instruction to copy the value
 251       to a newly created pseudo that will reach the redundant instruction.
 252
 253    The deletion is done first so that when we do insertions we
 254    know which pseudo reg to use.
 255
 256    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 257    argue it is not.  The number of iterations for the algorithm to converge
 258    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 259
 260    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 261    we create.  To make an expression reach the place where it's redundant,
 262    the result of the expression is copied to a new register, and the redundant
 263    expression is deleted by replacing it with this new register.  Classic GCSE
 264    doesn't have this problem as much as it computes the reaching defs of
 265    each register in each block and thus can try to use an existing register.
 266
 267    **********************
 268
 269    A fair bit of simplicity is created by creating small functions for simple
 270    tasks, even when the function is only called in one place.  This may
 271    measurably slow things down [or may not] by creating more function call
 272    overhead than is necessary.  The source is laid out so that it's trivial
 273    to make the affected functions inline so that one can measure what speed
 274    up, if any, can be achieved, and maybe later when things settle things can
 275    be rearranged.
 276
 277    Help stamp out big monolithic functions!  */
 278 \f
 279 /* GCSE global vars.  */
 280
 281 /* -dG dump file.  */
 282 static FILE *gcse_file;
 283
 284 /* Note whether or not we should run jump optimization after gcse.  We
 285    want to do this for two cases.
 286
 287     * If we changed any jumps via cprop.
 288
 289     * If we added any labels via edge splitting.  */
 290
 291 static int run_jump_opt_after_gcse;
 292
 293 /* Bitmaps are normally not included in debugging dumps.
 294    However it's useful to be able to print them from GDB.
 295    We could create special functions for this, but it's simpler to
 296    just allow passing stderr to the dump_foo fns.  Since stderr can
 297    be a macro, we store a copy here.  */
 298 static FILE *debug_stderr;
 299
 300 /* An obstack for our working variables.  */
 301 static struct obstack gcse_obstack;
 302
 303 /* Non-zero for each mode that supports (set (reg) (reg)).
 304    This is trivially true for integer and floating point values.
 305    It may or may not be true for condition codes.  */
 306 static char can_copy_p[(int) NUM_MACHINE_MODES];
 307
 308 /* Non-zero if can_copy_p has been initialized.  */
 309 static int can_copy_init_p;
 310
 311 struct reg_use {rtx reg_rtx; };
 312
 313 /* Hash table of expressions.  */
 314
 315 struct expr
 316 {
 317   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 318   rtx expr;
 319   /* Index in the available expression bitmaps.  */
 320   int bitmap_index;
 321   /* Next entry with the same hash.  */
 322   struct expr *next_same_hash;
 323   /* List of anticipatable occurrences in basic blocks in the function.
 324      An "anticipatable occurrence" is one that is the first occurrence in the
 325      basic block, the operands are not modified in the basic block prior
 326      to the occurrence and the output is not used between the start of
 327      the block and the occurrence.  */
 328   struct occr *antic_occr;
 329   /* List of available occurrence in basic blocks in the function.
 330      An "available occurrence" is one that is the last occurrence in the
 331      basic block and the operands are not modified by following statements in
 332      the basic block [including this insn].  */
 333   struct occr *avail_occr;
 334   /* Non-null if the computation is PRE redundant.
 335      The value is the newly created pseudo-reg to record a copy of the
 336      expression in all the places that reach the redundant copy.  */
 337   rtx reaching_reg;
 338 };
 339
 340 /* Occurrence of an expression.
 341    There is one per basic block.  If a pattern appears more than once the
 342    last appearance is used [or first for anticipatable expressions].  */
 343
 344 struct occr
 345 {
 346   /* Next occurrence of this expression.  */
 347   struct occr *next;
 348   /* The insn that computes the expression.  */
 349   rtx insn;
 350   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 351   char deleted_p;
 352   /* Non-zero if this [available] occurrence has been copied to
 353      reaching_reg.  */
 354   /* ??? This is mutually exclusive with deleted_p, so they could share
 355      the same byte.  */
 356   char copied_p;
 357 };
 358
 359 /* Expression and copy propagation hash tables.
 360    Each hash table is an array of buckets.
 361    ??? It is known that if it were an array of entries, structure elements
 362    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 363    not clear whether in the final analysis a sufficient amount of memory would
 364    be saved as the size of the available expression bitmaps would be larger
 365    [one could build a mapping table without holes afterwards though].
 366    Someday I'll perform the computation and figure it out.  */
 367
 368 /* Total size of the expression hash table, in elements.  */
 369 static unsigned int expr_hash_table_size;
 370
 371 /* The table itself.
 372    This is an array of `expr_hash_table_size' elements.  */
 373 static struct expr **expr_hash_table;
 374
 375 /* Total size of the copy propagation hash table, in elements.  */
 376 static unsigned int set_hash_table_size;
 377
 378 /* The table itself.
 379    This is an array of `set_hash_table_size' elements.  */
 380 static struct expr **set_hash_table;
 381
 382 /* Mapping of uids to cuids.
 383    Only real insns get cuids.  */
 384 static int *uid_cuid;
 385
 386 /* Highest UID in UID_CUID.  */
 387 static int max_uid;
 388
 389 /* Get the cuid of an insn.  */
 390 #ifdef ENABLE_CHECKING
 391 #define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
 392 #else
 393 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 394 #endif
 395
 396 /* Number of cuids.  */
 397 static int max_cuid;
 398
 399 /* Mapping of cuids to insns.  */
 400 static rtx *cuid_insn;
 401
 402 /* Get insn from cuid.  */
 403 #define CUID_INSN(CUID) (cuid_insn[CUID])
 404
 405 /* Maximum register number in function prior to doing gcse + 1.
 406    Registers created during this pass have regno >= max_gcse_regno.
 407    This is named with "gcse" to not collide with global of same name.  */
 408 static unsigned int max_gcse_regno;
 409
 410 /* Maximum number of cse-able expressions found.  */
 411 static int n_exprs;
 412
 413 /* Maximum number of assignments for copy propagation found.  */
 414 static int n_sets;
 415
 416 /* Table of registers that are modified.
 417
 418    For each register, each element is a list of places where the pseudo-reg
 419    is set.
 420
 421    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 422    requires knowledge of which blocks kill which regs [and thus could use
 423    a bitmap instead of the lists `reg_set_table' uses].
 424
 425    `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
 426    num-regs) [however perhaps it may be useful to keep the data as is].  One
 427    advantage of recording things this way is that `reg_set_table' is fairly
 428    sparse with respect to pseudo regs but for hard regs could be fairly dense
 429    [relatively speaking].  And recording sets of pseudo-regs in lists speeds
 430    up functions like compute_transp since in the case of pseudo-regs we only
 431    need to iterate over the number of times a pseudo-reg is set, not over the
 432    number of basic blocks [clearly there is a bit of a slow down in the cases
 433    where a pseudo is set more than once in a block, however it is believed
 434    that the net effect is to speed things up].  This isn't done for hard-regs
 435    because recording call-clobbered hard-regs in `reg_set_table' at each
 436    function call can consume a fair bit of memory, and iterating over
 437    hard-regs stored this way in compute_transp will be more expensive.  */
 438
 439 typedef struct reg_set
 440 {
 441   /* The next setting of this register.  */
 442   struct reg_set *next;
 443   /* The insn where it was set.  */
 444   rtx insn;
 445 } reg_set;
 446
 447 static reg_set **reg_set_table;
 448
 449 /* Size of `reg_set_table'.
 450    The table starts out at max_gcse_regno + slop, and is enlarged as
 451    necessary.  */
 452 static int reg_set_table_size;
 453
 454 /* Amount to grow `reg_set_table' by when it's full.  */
 455 #define REG_SET_TABLE_SLOP 100
 456
 457 /* Bitmap containing one bit for each register in the program.
 458    Used when performing GCSE to track which registers have been set since
 459    the start of the basic block.  */
 460 static sbitmap reg_set_bitmap;
 461
 462 /* For each block, a bitmap of registers set in the block.
 463    This is used by expr_killed_p and compute_transp.
 464    It is computed during hash table computation and not by compute_sets
 465    as it includes registers added since the last pass (or between cprop and
 466    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 467 static sbitmap *reg_set_in_block;
 468
 469 /* For each block, non-zero if memory is set in that block.
 470    This is computed during hash table computation and is used by
 471    expr_killed_p and compute_transp.
 472    ??? Handling of memory is very simple, we don't make any attempt
 473    to optimize things (later).
 474    ??? This can be computed by compute_sets since the information
 475    doesn't change.  */
 476 static char *mem_set_in_block;
 477
 478 /* Various variables for statistics gathering.  */
 479
 480 /* Memory used in a pass.
 481    This isn't intended to be absolutely precise.  Its intent is only
 482    to keep an eye on memory usage.  */
 483 static int bytes_used;
 484
 485 /* GCSE substitutions made.  */
 486 static int gcse_subst_count;
 487 /* Number of copy instructions created.  */
 488 static int gcse_create_count;
 489 /* Number of constants propagated.  */
 490 static int const_prop_count;
 491 /* Number of copys propagated.  */
 492 static int copy_prop_count;
 493 \f
 494 /* These variables are used by classic GCSE.
 495    Normally they'd be defined a bit later, but `rd_gen' needs to
 496    be declared sooner.  */
 497
 498 /* Each block has a bitmap of each type.
 499    The length of each blocks bitmap is:
 500
 501        max_cuid  - for reaching definitions
 502        n_exprs - for available expressions
 503
 504    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 505    rd_kill[block_num][cuid_num]
 506    ae_kill[block_num][expr_num]                  */
 507
 508 /* For reaching defs */
 509 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 510
 511 /* for available exprs */
 512 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 513
 514 /* Objects of this type are passed around by the null-pointer check
 515    removal routines.  */
 516 struct null_pointer_info
 517 {
 518   /* The basic block being processed.  */
 519   int current_block;
 520   /* The first register to be handled in this pass.  */
 521   unsigned int min_reg;
 522   /* One greater than the last register to be handled in this pass.  */
 523   unsigned int max_reg;
 524   sbitmap *nonnull_local;
 525   sbitmap *nonnull_killed;
 526 };
 527 \f
 528 static void compute_can_copy    PARAMS ((void));
 529 static char *gmalloc            PARAMS ((unsigned int));
 530 static char *grealloc           PARAMS ((char *, unsigned int));
 531 static char *gcse_alloc         PARAMS ((unsigned long));
 532 static void alloc_gcse_mem      PARAMS ((rtx));
 533 static void free_gcse_mem       PARAMS ((void));
 534 static void alloc_reg_set_mem   PARAMS ((int));
 535 static void free_reg_set_mem    PARAMS ((void));
 536 static int get_bitmap_width     PARAMS ((int, int, int));
 537 static void record_one_set      PARAMS ((int, rtx));
 538 static void record_set_info     PARAMS ((rtx, rtx, void *));
 539 static void compute_sets        PARAMS ((rtx));
 540 static void hash_scan_insn      PARAMS ((rtx, int, int));
 541 static void hash_scan_set       PARAMS ((rtx, rtx, int));
 542 static void hash_scan_clobber   PARAMS ((rtx, rtx));
 543 static void hash_scan_call      PARAMS ((rtx, rtx));
 544 static int want_to_gcse_p       PARAMS ((rtx));
 545 static int oprs_unchanged_p     PARAMS ((rtx, rtx, int));
 546 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
 547 static int oprs_available_p     PARAMS ((rtx, rtx));
 548 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
 549                                           int, int));
 550 static void insert_set_in_table PARAMS ((rtx, rtx));
 551 static unsigned int hash_expr   PARAMS ((rtx, enum machine_mode, int *, int));
 552 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
 553 static unsigned int hash_string_1 PARAMS ((const char *));
 554 static unsigned int hash_set    PARAMS ((int, int));
 555 static int expr_equiv_p         PARAMS ((rtx, rtx));
 556 static void record_last_reg_set_info PARAMS ((rtx, int));
 557 static void record_last_mem_set_info PARAMS ((rtx));
 558 static void record_last_set_info PARAMS ((rtx, rtx, void *));
 559 static void compute_hash_table  PARAMS ((int));
 560 static void alloc_set_hash_table PARAMS ((int));
 561 static void free_set_hash_table PARAMS ((void));
 562 static void compute_set_hash_table PARAMS ((void));
 563 static void alloc_expr_hash_table PARAMS ((unsigned int));
 564 static void free_expr_hash_table PARAMS ((void));
 565 static void compute_expr_hash_table PARAMS ((void));
 566 static void dump_hash_table     PARAMS ((FILE *, const char *, struct expr **,
 567                                          int, int));
 568 static struct expr *lookup_expr PARAMS ((rtx));
 569 static struct expr *lookup_set  PARAMS ((unsigned int, rtx));
 570 static struct expr *next_set    PARAMS ((unsigned int, struct expr *));
 571 static void reset_opr_set_tables PARAMS ((void));
 572 static int oprs_not_set_p       PARAMS ((rtx, rtx));
 573 static void mark_call           PARAMS ((rtx));
 574 static void mark_set            PARAMS ((rtx, rtx));
 575 static void mark_clobber        PARAMS ((rtx, rtx));
 576 static void mark_oprs_set       PARAMS ((rtx));
 577 static void alloc_cprop_mem     PARAMS ((int, int));
 578 static void free_cprop_mem      PARAMS ((void));
 579 static void compute_transp      PARAMS ((rtx, int, sbitmap *, int));
 580 static void compute_transpout   PARAMS ((void));
 581 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
 582                                               int));
 583 static void compute_cprop_data  PARAMS ((void));
 584 static void find_used_regs      PARAMS ((rtx));
 585 static int try_replace_reg      PARAMS ((rtx, rtx, rtx));
 586 static struct expr *find_avail_set PARAMS ((int, rtx));
 587 static int cprop_jump           PARAMS ((rtx, rtx, struct reg_use *, rtx));
 588 #ifdef HAVE_cc0
 589 static int cprop_cc0_jump       PARAMS ((rtx, struct reg_use *, rtx));
 590 #endif
 591 static int cprop_insn           PARAMS ((rtx, int));
 592 static int cprop                PARAMS ((int));
 593 static int one_cprop_pass       PARAMS ((int, int));
 594 static void alloc_pre_mem       PARAMS ((int, int));
 595 static void free_pre_mem        PARAMS ((void));
 596 static void compute_pre_data    PARAMS ((void));
 597 static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
 598 static void insert_insn_end_bb  PARAMS ((struct expr *, int, int));
 599 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
 600 static void pre_insert_copies   PARAMS ((void));
 601 static int pre_delete           PARAMS ((void));
 602 static int pre_gcse             PARAMS ((void));
 603 static int one_pre_gcse_pass    PARAMS ((int));
 604 static void add_label_notes     PARAMS ((rtx, rtx));
 605 static void alloc_code_hoist_mem PARAMS ((int, int));
 606 static void free_code_hoist_mem PARAMS ((void));
 607 static void compute_code_hoist_vbeinout PARAMS ((void));
 608 static void compute_code_hoist_data PARAMS ((void));
 609 static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
 610 static void hoist_code          PARAMS ((void));
 611 static int one_code_hoisting_pass PARAMS ((void));
 612 static void alloc_rd_mem        PARAMS ((int, int));
 613 static void free_rd_mem         PARAMS ((void));
 614 static void handle_rd_kill_set  PARAMS ((rtx, int, int));
 615 static void compute_kill_rd     PARAMS ((void));
 616 static void compute_rd          PARAMS ((void));
 617 static void alloc_avail_expr_mem PARAMS ((int, int));
 618 static void free_avail_expr_mem PARAMS ((void));
 619 static void compute_ae_gen      PARAMS ((void));
 620 static int expr_killed_p        PARAMS ((rtx, int));
 621 static void compute_ae_kill     PARAMS ((sbitmap *, sbitmap *));
 622 static int expr_reaches_here_p  PARAMS ((struct occr *, struct expr *,
 623                                          int, int));
 624 static rtx computing_insn       PARAMS ((struct expr *, rtx));
 625 static int def_reaches_here_p   PARAMS ((rtx, rtx));
 626 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
 627 static int handle_avail_expr    PARAMS ((rtx, struct expr *));
 628 static int classic_gcse         PARAMS ((void));
 629 static int one_classic_gcse_pass PARAMS ((int));
 630 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
 631 static void delete_null_pointer_checks_1 PARAMS ((varray_type *, unsigned int *,
 632                                                   sbitmap *, sbitmap *,
 633                                                   struct null_pointer_info *));
 634 static rtx process_insert_insn  PARAMS ((struct expr *));
 635 static int pre_edge_insert      PARAMS ((struct edge_list *, struct expr **));
 636 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
 637                                              int, int, char *));
 638 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
 639                                                  int, char *));
 640 \f
 641 /* Entry point for global common subexpression elimination.
 642    F is the first instruction in the function.  */
 643
 644 int
 645 gcse_main (f, file)
 646      rtx f;
 647      FILE *file;
 648 {
 649   int changed, pass;
 650   /* Bytes used at start of pass.  */
 651   int initial_bytes_used;
 652   /* Maximum number of bytes used by a pass.  */
 653   int max_pass_bytes;
 654   /* Point to release obstack data from for each pass.  */
 655   char *gcse_obstack_bottom;
 656
 657   /* We do not construct an accurate cfg in functions which call
 658      setjmp, so just punt to be safe.  */
 659   if (current_function_calls_setjmp)
 660     return 0;
 661
 662   /* Assume that we do not need to run jump optimizations after gcse.  */
 663   run_jump_opt_after_gcse = 0;
 664
 665   /* For calling dump_foo fns from gdb.  */
 666   debug_stderr = stderr;
 667   gcse_file = file;
 668
 669   /* Identify the basic block information for this function, including
 670      successors and predecessors.  */
 671   max_gcse_regno = max_reg_num ();
 672
 673   if (file)
 674     dump_flow_info (file);
 675
 676   /* Return if there's nothing to do.  */
 677   if (n_basic_blocks <= 1)
 678     return 0;
 679
 680   /* Trying to perform global optimizations on flow graphs which have
 681      a high connectivity will take a long time and is unlikely to be
 682      particularly useful.
 683
 684      In normal circumstances a cfg should have about twice as many edges
 685      as blocks.  But we do not want to punish small functions which have
 686      a couple switch statements.  So we require a relatively large number
 687      of basic blocks and the ratio of edges to blocks to be high.  */
 688   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 689     {
 690       if (warn_disabled_optimization)
 691       warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
 692                n_basic_blocks, n_edges / n_basic_blocks);
 693       return 0;
 694     }
 695
 696   /* If allocating memory for the cprop bitmap would take up too much
 697      storage it's better just to disable the optimization.  */
 698   if ((n_basic_blocks
 699        * SBITMAP_SET_SIZE (max_gcse_regno)
 700        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
 701     {
 702       if (warn_disabled_optimization)
 703         warning ("GCSE disabled: %d basic blocks and %d registers",
 704                  n_basic_blocks, max_gcse_regno);
 705
 706       return 0;
 707     }
 708
 709   /* See what modes support reg/reg copy operations.  */
 710   if (! can_copy_init_p)
 711     {
 712       compute_can_copy ();
 713       can_copy_init_p = 1;
 714     }
 715
 716   gcc_obstack_init (&gcse_obstack);
 717   bytes_used = 0;
 718
 719   /* Record where pseudo-registers are set.  This data is kept accurate
 720      during each pass.  ??? We could also record hard-reg information here
 721      [since it's unchanging], however it is currently done during hash table
 722      computation.
 723
 724      It may be tempting to compute MEM set information here too, but MEM sets
 725      will be subject to code motion one day and thus we need to compute
 726      information about memory sets when we build the hash tables.  */
 727
 728   alloc_reg_set_mem (max_gcse_regno);
 729   compute_sets (f);
 730
 731   pass = 0;
 732   initial_bytes_used = bytes_used;
 733   max_pass_bytes = 0;
 734   gcse_obstack_bottom = gcse_alloc (1);
 735   changed = 1;
 736   while (changed && pass < MAX_PASSES)
 737     {
 738       changed = 0;
 739       if (file)
 740         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 741
 742       /* Initialize bytes_used to the space for the pred/succ lists,
 743          and the reg_set_table data.  */
 744       bytes_used = initial_bytes_used;
 745
 746       /* Each pass may create new registers, so recalculate each time.  */
 747       max_gcse_regno = max_reg_num ();
 748
 749       alloc_gcse_mem (f);
 750
 751       /* Don't allow constant propagation to modify jumps
 752          during this pass.  */
 753       changed = one_cprop_pass (pass + 1, 0);
 754
 755       if (optimize_size)
 756         changed |= one_classic_gcse_pass (pass + 1);
 757       else
 758         {
 759           changed |= one_pre_gcse_pass (pass + 1);
 760           free_reg_set_mem ();
 761           alloc_reg_set_mem (max_reg_num ());
 762           compute_sets (f);
 763           run_jump_opt_after_gcse = 1;
 764         }
 765
 766       if (max_pass_bytes < bytes_used)
 767         max_pass_bytes = bytes_used;
 768
 769       /* Free up memory, then reallocate for code hoisting.  We can
 770          not re-use the existing allocated memory because the tables
 771          will not have info for the insns or registers created by
 772          partial redundancy elimination.  */
 773       free_gcse_mem ();
 774
 775       /* It does not make sense to run code hoisting unless we optimizing
 776          for code size -- it rarely makes programs faster, and can make
 777          them bigger if we did partial redundancy elimination (when optimizing
 778          for space, we use a classic gcse algorithm instead of partial
 779          redundancy algorithms).  */
 780       if (optimize_size)
 781         {
 782           max_gcse_regno = max_reg_num ();
 783           alloc_gcse_mem (f);
 784           changed |= one_code_hoisting_pass ();
 785           free_gcse_mem ();
 786
 787           if (max_pass_bytes < bytes_used)
 788             max_pass_bytes = bytes_used;
 789         }
 790
 791       if (file)
 792         {
 793           fprintf (file, "\n");
 794           fflush (file);
 795         }
 796
 797       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 798       pass++;
 799     }
 800
 801   /* Do one last pass of copy propagation, including cprop into
 802      conditional jumps.  */
 803
 804   max_gcse_regno = max_reg_num ();
 805   alloc_gcse_mem (f);
 806   /* This time, go ahead and allow cprop to alter jumps.  */
 807   one_cprop_pass (pass + 1, 1);
 808   free_gcse_mem ();
 809
 810   if (file)
 811     {
 812       fprintf (file, "GCSE of %s: %d basic blocks, ",
 813                current_function_name, n_basic_blocks);
 814       fprintf (file, "%d pass%s, %d bytes\n\n",
 815                pass, pass > 1 ? "es" : "", max_pass_bytes);
 816     }
 817
 818   obstack_free (&gcse_obstack, NULL_PTR);
 819   free_reg_set_mem ();
 820   return run_jump_opt_after_gcse;
 821 }
 822 \f
 823 /* Misc. utilities.  */
 824
 825 /* Compute which modes support reg/reg copy operations.  */
 826
 827 static void
 828 compute_can_copy ()
 829 {
 830   int i;
 831 #ifndef AVOID_CCMODE_COPIES
 832   rtx reg,insn;
 833 #endif
 834   memset (can_copy_p, 0, NUM_MACHINE_MODES);
 835
 836   start_sequence ();
 837   for (i = 0; i < NUM_MACHINE_MODES; i++)
 838     if (GET_MODE_CLASS (i) == MODE_CC)
 839       {
 840 #ifdef AVOID_CCMODE_COPIES
 841         can_copy_p[i] = 0;
 842 #else
 843         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 844         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 845         if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 846           can_copy_p[i] = 1;
 847 #endif
 848       }
 849     else
 850       can_copy_p[i] = 1;
 851
 852   end_sequence ();
 853 }
 854 \f
 855 /* Cover function to xmalloc to record bytes allocated.  */
 856
 857 static char *
 858 gmalloc (size)
 859      unsigned int size;
 860 {
 861   bytes_used += size;
 862   return xmalloc (size);
 863 }
 864
 865 /* Cover function to xrealloc.
 866    We don't record the additional size since we don't know it.
 867    It won't affect memory usage stats much anyway.  */
 868
 869 static char *
 870 grealloc (ptr, size)
 871      char *ptr;
 872      unsigned int size;
 873 {
 874   return xrealloc (ptr, size);
 875 }
 876
 877 /* Cover function to obstack_alloc.
 878    We don't need to record the bytes allocated here since
 879    obstack_chunk_alloc is set to gmalloc.  */
 880
 881 static char *
 882 gcse_alloc (size)
 883      unsigned long size;
 884 {
 885   return (char *) obstack_alloc (&gcse_obstack, size);
 886 }
 887
 888 /* Allocate memory for the cuid mapping array,
 889    and reg/memory set tracking tables.
 890
 891    This is called at the start of each pass.  */
 892
 893 static void
 894 alloc_gcse_mem (f)
 895      rtx f;
 896 {
 897   int i,n;
 898   rtx insn;
 899
 900   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 901      CUIDs are like UIDs except they increase monotonically, have no gaps,
 902      and only apply to real insns.  */
 903
 904   max_uid = get_max_uid ();
 905   n = (max_uid + 1) * sizeof (int);
 906   uid_cuid = (int *) gmalloc (n);
 907   memset ((char *) uid_cuid, 0, n);
 908   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 909     {
 910       if (INSN_P (insn))
 911         uid_cuid[INSN_UID (insn)] = i++;
 912       else
 913         uid_cuid[INSN_UID (insn)] = i;
 914     }
 915
 916   /* Create a table mapping cuids to insns.  */
 917
 918   max_cuid = i;
 919   n = (max_cuid + 1) * sizeof (rtx);
 920   cuid_insn = (rtx *) gmalloc (n);
 921   memset ((char *) cuid_insn, 0, n);
 922   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 923     if (INSN_P (insn))
 924       CUID_INSN (i++) = insn;
 925
 926   /* Allocate vars to track sets of regs.  */
 927   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 928
 929   /* Allocate vars to track sets of regs, memory per block.  */
 930   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 931                                                        max_gcse_regno);
 932   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 933 }
 934
 935 /* Free memory allocated by alloc_gcse_mem.  */
 936
 937 static void
 938 free_gcse_mem ()
 939 {
 940   free (uid_cuid);
 941   free (cuid_insn);
 942
 943   free (reg_set_bitmap);
 944
 945   free (reg_set_in_block);
 946   free (mem_set_in_block);
 947 }
 948
 949 /* Many of the global optimization algorithms work by solving dataflow
 950    equations for various expressions.  Initially, some local value is
 951    computed for each expression in each block.  Then, the values across the
 952    various blocks are combined (by following flow graph edges) to arrive at
 953    global values.  Conceptually, each set of equations is independent.  We
 954    may therefore solve all the equations in parallel, solve them one at a
 955    time, or pick any intermediate approach.
 956
 957    When you're going to need N two-dimensional bitmaps, each X (say, the
 958    number of blocks) by Y (say, the number of expressions), call this
 959    function.  It's not important what X and Y represent; only that Y
 960    correspond to the things that can be done in parallel.  This function will
 961    return an appropriate chunking factor C; you should solve C sets of
 962    equations in parallel.  By going through this function, we can easily
 963    trade space against time; by solving fewer equations in parallel we use
 964    less space.  */
 965
 966 static int
 967 get_bitmap_width (n, x, y)
 968      int n;
 969      int x;
 970      int y;
 971 {
 972   /* It's not really worth figuring out *exactly* how much memory will
 973      be used by a particular choice.  The important thing is to get
 974      something approximately right.  */
 975   size_t max_bitmap_memory = 10 * 1024 * 1024;
 976
 977   /* The number of bytes we'd use for a single column of minimum
 978      width.  */
 979   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 980
 981   /* Often, it's reasonable just to solve all the equations in
 982      parallel.  */
 983   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
 984     return y;
 985
 986   /* Otherwise, pick the largest width we can, without going over the
 987      limit.  */
 988   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
 989                              / column_size);
 990 }
 991 \f
 992 /* Compute the local properties of each recorded expression.
 993
 994    Local properties are those that are defined by the block, irrespective of
 995    other blocks.
 996
 997    An expression is transparent in a block if its operands are not modified
 998    in the block.
 999
1000    An expression is computed (locally available) in a block if it is computed
1001    at least once and expression would contain the same value if the
1002    computation was moved to the end of the block.
1003
1004    An expression is locally anticipatable in a block if it is computed at
1005    least once and expression would contain the same value if the computation
1006    was moved to the beginning of the block.
1007
1008    We call this routine for cprop, pre and code hoisting.  They all compute
1009    basically the same information and thus can easily share this code.
1010
1011    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1012    properties.  If NULL, then it is not necessary to compute or record that
1013    particular property.
1014
1015    SETP controls which hash table to look at.  If zero, this routine looks at
1016    the expr hash table; if nonzero this routine looks at the set hash table.
1017    Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1018    ABSALTERED.  */
1019
1020 static void
1021 compute_local_properties (transp, comp, antloc, setp)
1022      sbitmap *transp;
1023      sbitmap *comp;
1024      sbitmap *antloc;
1025      int setp;
1026 {
1027   unsigned int i, hash_table_size;
1028   struct expr **hash_table;
1029
1030   /* Initialize any bitmaps that were passed in.  */
1031   if (transp)
1032     {
1033       if (setp)
1034         sbitmap_vector_zero (transp, n_basic_blocks);
1035       else
1036         sbitmap_vector_ones (transp, n_basic_blocks);
1037     }
1038
1039   if (comp)
1040     sbitmap_vector_zero (comp, n_basic_blocks);
1041   if (antloc)
1042     sbitmap_vector_zero (antloc, n_basic_blocks);
1043
1044   /* We use the same code for cprop, pre and hoisting.  For cprop
1045      we care about the set hash table, for pre and hoisting we
1046      care about the expr hash table.  */
1047   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1048   hash_table = setp ? set_hash_table : expr_hash_table;
1049
1050   for (i = 0; i < hash_table_size; i++)
1051     {
1052       struct expr *expr;
1053
1054       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1055         {
1056           int indx = expr->bitmap_index;
1057           struct occr *occr;
1058
1059           /* The expression is transparent in this block if it is not killed.
1060              We start by assuming all are transparent [none are killed], and
1061              then reset the bits for those that are.  */
1062           if (transp)
1063             compute_transp (expr->expr, indx, transp, setp);
1064
1065           /* The occurrences recorded in antic_occr are exactly those that
1066              we want to set to non-zero in ANTLOC.  */
1067           if (antloc)
1068             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1069               {
1070                 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1071
1072                 /* While we're scanning the table, this is a good place to
1073                    initialize this.  */
1074                 occr->deleted_p = 0;
1075               }
1076
1077           /* The occurrences recorded in avail_occr are exactly those that
1078              we want to set to non-zero in COMP.  */
1079           if (comp)
1080             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1081               {
1082                 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1083
1084                 /* While we're scanning the table, this is a good place to
1085                    initialize this.  */
1086                 occr->copied_p = 0;
1087               }
1088
1089           /* While we're scanning the table, this is a good place to
1090              initialize this.  */
1091           expr->reaching_reg = 0;
1092         }
1093     }
1094 }
1095 \f
1096 /* Register set information.
1097
1098    `reg_set_table' records where each register is set or otherwise
1099    modified.  */
1100
1101 static struct obstack reg_set_obstack;
1102
1103 static void
1104 alloc_reg_set_mem (n_regs)
1105      int n_regs;
1106 {
1107   unsigned int n;
1108
1109   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1110   n = reg_set_table_size * sizeof (struct reg_set *);
1111   reg_set_table = (struct reg_set **) gmalloc (n);
1112   memset ((char *) reg_set_table, 0, n);
1113
1114   gcc_obstack_init (&reg_set_obstack);
1115 }
1116
1117 static void
1118 free_reg_set_mem ()
1119 {
1120   free (reg_set_table);
1121   obstack_free (&reg_set_obstack, NULL_PTR);
1122 }
1123
1124 /* Record REGNO in the reg_set table.  */
1125
1126 static void
1127 record_one_set (regno, insn)
1128      int regno;
1129      rtx insn;
1130 {
1131   /* allocate a new reg_set element and link it onto the list */
1132   struct reg_set *new_reg_info;
1133
1134   /* If the table isn't big enough, enlarge it.  */
1135   if (regno >= reg_set_table_size)
1136     {
1137       int new_size = regno + REG_SET_TABLE_SLOP;
1138
1139       reg_set_table
1140         = (struct reg_set **) grealloc ((char *) reg_set_table,
1141                                         new_size * sizeof (struct reg_set *));
1142       memset ((char *) (reg_set_table + reg_set_table_size), 0,
1143              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1144       reg_set_table_size = new_size;
1145     }
1146
1147   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1148                                                    sizeof (struct reg_set));
1149   bytes_used += sizeof (struct reg_set);
1150   new_reg_info->insn = insn;
1151   new_reg_info->next = reg_set_table[regno];
1152   reg_set_table[regno] = new_reg_info;
1153 }
1154
1155 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1156    an insn.  The DATA is really the instruction in which the SET is
1157    occurring.  */
1158
1159 static void
1160 record_set_info (dest, setter, data)
1161      rtx dest, setter ATTRIBUTE_UNUSED;
1162      void *data;
1163 {
1164   rtx record_set_insn = (rtx) data;
1165
1166   if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1167     record_one_set (REGNO (dest), record_set_insn);
1168 }
1169
1170 /* Scan the function and record each set of each pseudo-register.
1171
1172    This is called once, at the start of the gcse pass.  See the comments for
1173    `reg_set_table' for further documenation.  */
1174
1175 static void
1176 compute_sets (f)
1177      rtx f;
1178 {
1179   rtx insn;
1180
1181   for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1182     if (INSN_P (insn))
1183       note_stores (PATTERN (insn), record_set_info, insn);
1184 }
1185 \f
1186 /* Hash table support.  */
1187
1188 /* For each register, the cuid of the first/last insn in the block to set it,
1189    or -1 if not set.  */
1190 #define NEVER_SET -1
1191 static int *reg_first_set;
1192 static int *reg_last_set;
1193
1194 /* While computing "first/last set" info, this is the CUID of first/last insn
1195    to set memory or -1 if not set.  `mem_last_set' is also used when
1196    performing GCSE to record whether memory has been set since the beginning
1197    of the block.
1198
1199    Note that handling of memory is very simple, we don't make any attempt
1200    to optimize things (later).  */
1201 static int mem_first_set;
1202 static int mem_last_set;
1203
1204 /* Perform a quick check whether X, the source of a set, is something
1205    we want to consider for GCSE.  */
1206
1207 static int
1208 want_to_gcse_p (x)
1209      rtx x;
1210 {
1211   switch (GET_CODE (x))
1212     {
1213     case REG:
1214     case SUBREG:
1215     case CONST_INT:
1216     case CONST_DOUBLE:
1217     case CALL:
1218       return 0;
1219
1220     default:
1221       break;
1222     }
1223
1224   return 1;
1225 }
1226
1227 /* Return non-zero if the operands of expression X are unchanged from the
1228    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1229    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1230
1231 static int
1232 oprs_unchanged_p (x, insn, avail_p)
1233      rtx x, insn;
1234      int avail_p;
1235 {
1236   int i, j;
1237   enum rtx_code code;
1238   const char *fmt;
1239
1240   if (x == 0)
1241     return 1;
1242
1243   code = GET_CODE (x);
1244   switch (code)
1245     {
1246     case REG:
1247       if (avail_p)
1248         return (reg_last_set[REGNO (x)] == NEVER_SET
1249                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1250       else
1251         return (reg_first_set[REGNO (x)] == NEVER_SET
1252                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1253
1254     case MEM:
1255       if (avail_p && mem_last_set != NEVER_SET
1256           && mem_last_set >= INSN_CUID (insn))
1257         return 0;
1258       else if (! avail_p && mem_first_set != NEVER_SET
1259                && mem_first_set < INSN_CUID (insn))
1260         return 0;
1261       else
1262         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1263
1264     case PRE_DEC:
1265     case PRE_INC:
1266     case POST_DEC:
1267     case POST_INC:
1268     case PRE_MODIFY:
1269     case POST_MODIFY:
1270       return 0;
1271
1272     case PC:
1273     case CC0: /*FIXME*/
1274     case CONST:
1275     case CONST_INT:
1276     case CONST_DOUBLE:
1277     case SYMBOL_REF:
1278     case LABEL_REF:
1279     case ADDR_VEC:
1280     case ADDR_DIFF_VEC:
1281       return 1;
1282
1283     default:
1284       break;
1285     }
1286
1287   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1288     {
1289       if (fmt[i] == 'e')
1290         {
1291           /* If we are about to do the last recursive call needed at this
1292              level, change it into iteration.  This function is called enough
1293              to be worth it.  */
1294           if (i == 0)
1295             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1296
1297           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1298             return 0;
1299         }
1300       else if (fmt[i] == 'E')
1301         for (j = 0; j < XVECLEN (x, i); j++)
1302           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1303             return 0;
1304     }
1305
1306   return 1;
1307 }
1308
1309 /* Return non-zero if the operands of expression X are unchanged from
1310    the start of INSN's basic block up to but not including INSN.  */
1311
1312 static int
1313 oprs_anticipatable_p (x, insn)
1314      rtx x, insn;
1315 {
1316   return oprs_unchanged_p (x, insn, 0);
1317 }
1318
1319 /* Return non-zero if the operands of expression X are unchanged from
1320    INSN to the end of INSN's basic block.  */
1321
1322 static int
1323 oprs_available_p (x, insn)
1324      rtx x, insn;
1325 {
1326   return oprs_unchanged_p (x, insn, 1);
1327 }
1328
1329 /* Hash expression X.
1330
1331    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1332    indicating if a volatile operand is found or if the expression contains
1333    something we don't want to insert in the table.
1334
1335    ??? One might want to merge this with canon_hash.  Later.  */
1336
1337 static unsigned int
1338 hash_expr (x, mode, do_not_record_p, hash_table_size)
1339      rtx x;
1340      enum machine_mode mode;
1341      int *do_not_record_p;
1342      int hash_table_size;
1343 {
1344   unsigned int hash;
1345
1346   *do_not_record_p = 0;
1347
1348   hash = hash_expr_1 (x, mode, do_not_record_p);
1349   return hash % hash_table_size;
1350 }
1351 /* Hash a string.  Just add its bytes up.  */
1352 static inline unsigned
1353 hash_string_1 (ps)
1354      const char *ps;
1355 {
1356   unsigned hash = 0;
1357   const unsigned char *p = (const unsigned char *)ps;
1358
1359   if (p)
1360     while (*p)
1361       hash += *p++;
1362
1363   return hash;
1364 }
1365
1366 /* Subroutine of hash_expr to do the actual work.  */
1367
1368 static unsigned int
1369 hash_expr_1 (x, mode, do_not_record_p)
1370      rtx x;
1371      enum machine_mode mode;
1372      int *do_not_record_p;
1373 {
1374   int i, j;
1375   unsigned hash = 0;
1376   enum rtx_code code;
1377   const char *fmt;
1378
1379   /* Used to turn recursion into iteration.  We can't rely on GCC's
1380      tail-recursion eliminatio since we need to keep accumulating values
1381      in HASH.  */
1382
1383   if (x == 0)
1384     return hash;
1385
1386  repeat:
1387   code = GET_CODE (x);
1388   switch (code)
1389     {
1390     case REG:
1391       hash += ((unsigned int) REG << 7) + REGNO (x);
1392       return hash;
1393
1394     case CONST_INT:
1395       hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1396                + (unsigned int) INTVAL (x));
1397       return hash;
1398
1399     case CONST_DOUBLE:
1400       /* This is like the general case, except that it only counts
1401          the integers representing the constant.  */
1402       hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1403       if (GET_MODE (x) != VOIDmode)
1404         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1405           hash += (unsigned int) XWINT (x, i);
1406       else
1407         hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1408                  + (unsigned int) CONST_DOUBLE_HIGH (x));
1409       return hash;
1410
1411       /* Assume there is only one rtx object for any given label.  */
1412     case LABEL_REF:
1413       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1414          differences and differences between each stage's debugging dumps.  */
1415       hash += (((unsigned int) LABEL_REF << 7)
1416                + CODE_LABEL_NUMBER (XEXP (x, 0)));
1417       return hash;
1418
1419     case SYMBOL_REF:
1420       {
1421         /* Don't hash on the symbol's address to avoid bootstrap differences.
1422            Different hash values may cause expressions to be recorded in
1423            different orders and thus different registers to be used in the
1424            final assembler.  This also avoids differences in the dump files
1425            between various stages.  */
1426         unsigned int h = 0;
1427         const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1428
1429         while (*p)
1430           h += (h << 7) + *p++; /* ??? revisit */
1431
1432         hash += ((unsigned int) SYMBOL_REF << 7) + h;
1433         return hash;
1434       }
1435
1436     case MEM:
1437       if (MEM_VOLATILE_P (x))
1438         {
1439           *do_not_record_p = 1;
1440           return 0;
1441         }
1442
1443       hash += (unsigned int) MEM;
1444       hash += MEM_ALIAS_SET (x);
1445       x = XEXP (x, 0);
1446       goto repeat;
1447
1448     case PRE_DEC:
1449     case PRE_INC:
1450     case POST_DEC:
1451     case POST_INC:
1452     case PC:
1453     case CC0:
1454     case CALL:
1455     case UNSPEC_VOLATILE:
1456       *do_not_record_p = 1;
1457       return 0;
1458
1459     case ASM_OPERANDS:
1460       if (MEM_VOLATILE_P (x))
1461         {
1462           *do_not_record_p = 1;
1463           return 0;
1464         }
1465       else
1466         {
1467           /* We don't want to take the filename and line into account.  */
1468           hash += (unsigned) code + (unsigned) GET_MODE (x)
1469             + hash_string_1 (ASM_OPERANDS_TEMPLATE (x))
1470             + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x))
1471             + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x);
1472
1473           if (ASM_OPERANDS_INPUT_LENGTH (x))
1474             {
1475               for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++)
1476                 {
1477                   hash += (hash_expr_1 (ASM_OPERANDS_INPUT (x, i),
1478                                         GET_MODE (ASM_OPERANDS_INPUT (x, i)),
1479                                         do_not_record_p)
1480                            + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT
1481                                             (x, i)));
1482                 }
1483
1484               hash += hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0));
1485               x = ASM_OPERANDS_INPUT (x, 0);
1486               mode = GET_MODE (x);
1487               goto repeat;
1488             }
1489           return hash;
1490         }
1491
1492     default:
1493       break;
1494     }
1495
1496   hash += (unsigned) code + (unsigned) GET_MODE (x);
1497   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1498     {
1499       if (fmt[i] == 'e')
1500         {
1501           /* If we are about to do the last recursive call
1502              needed at this level, change it into iteration.
1503              This function is called enough to be worth it.  */
1504           if (i == 0)
1505             {
1506               x = XEXP (x, i);
1507               goto repeat;
1508             }
1509
1510           hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1511           if (*do_not_record_p)
1512             return 0;
1513         }
1514
1515       else if (fmt[i] == 'E')
1516         for (j = 0; j < XVECLEN (x, i); j++)
1517           {
1518             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1519             if (*do_not_record_p)
1520               return 0;
1521           }
1522
1523       else if (fmt[i] == 's')
1524         hash += hash_string_1 (XSTR (x, i));
1525       else if (fmt[i] == 'i')
1526         hash += (unsigned int) XINT (x, i);
1527       else
1528         abort ();
1529     }
1530
1531   return hash;
1532 }
1533
1534 /* Hash a set of register REGNO.
1535
1536    Sets are hashed on the register that is set.  This simplifies the PRE copy
1537    propagation code.
1538
1539    ??? May need to make things more elaborate.  Later, as necessary.  */
1540
1541 static unsigned int
1542 hash_set (regno, hash_table_size)
1543      int regno;
1544      int hash_table_size;
1545 {
1546   unsigned int hash;
1547
1548   hash = regno;
1549   return hash % hash_table_size;
1550 }
1551
1552 /* Return non-zero if exp1 is equivalent to exp2.
1553    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1554
1555 static int
1556 expr_equiv_p (x, y)
1557      rtx x, y;
1558 {
1559   register int i, j;
1560   register enum rtx_code code;
1561   register const char *fmt;
1562
1563   if (x == y)
1564     return 1;
1565
1566   if (x == 0 || y == 0)
1567     return x == y;
1568
1569   code = GET_CODE (x);
1570   if (code != GET_CODE (y))
1571     return 0;
1572
1573   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1574   if (GET_MODE (x) != GET_MODE (y))
1575     return 0;
1576
1577   switch (code)
1578     {
1579     case PC:
1580     case CC0:
1581       return x == y;
1582
1583     case CONST_INT:
1584       return INTVAL (x) == INTVAL (y);
1585
1586     case LABEL_REF:
1587       return XEXP (x, 0) == XEXP (y, 0);
1588
1589     case SYMBOL_REF:
1590       return XSTR (x, 0) == XSTR (y, 0);
1591
1592     case REG:
1593       return REGNO (x) == REGNO (y);
1594
1595     case MEM:
1596       /* Can't merge two expressions in different alias sets, since we can
1597          decide that the expression is transparent in a block when it isn't,
1598          due to it being set with the different alias set.  */
1599       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1600         return 0;
1601       break;
1602
1603     /*  For commutative operations, check both orders.  */
1604     case PLUS:
1605     case MULT:
1606     case AND:
1607     case IOR:
1608     case XOR:
1609     case NE:
1610     case EQ:
1611       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1612                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1613               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1614                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1615
1616     case ASM_OPERANDS:
1617       /* We don't use the generic code below because we want to
1618          disregard filename and line numbers.  */
1619
1620       /* A volatile asm isn't equivalent to any other.  */
1621       if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y))
1622         return 0;
1623
1624       if (GET_MODE (x) != GET_MODE (y)
1625           || strcmp (ASM_OPERANDS_TEMPLATE (x), ASM_OPERANDS_TEMPLATE (y))
1626           || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x),
1627                      ASM_OPERANDS_OUTPUT_CONSTRAINT (y))
1628           || ASM_OPERANDS_OUTPUT_IDX (x) != ASM_OPERANDS_OUTPUT_IDX (y)
1629           || ASM_OPERANDS_INPUT_LENGTH (x) != ASM_OPERANDS_INPUT_LENGTH (y))
1630         return 0;
1631
1632       if (ASM_OPERANDS_INPUT_LENGTH (x))
1633         {
1634           for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
1635             if (! expr_equiv_p (ASM_OPERANDS_INPUT (x, i),
1636                                 ASM_OPERANDS_INPUT (y, i))
1637                 || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x, i),
1638                            ASM_OPERANDS_INPUT_CONSTRAINT (y, i)))
1639               return 0;
1640         }
1641
1642       return 1;
1643
1644     default:
1645       break;
1646     }
1647
1648   /* Compare the elements.  If any pair of corresponding elements
1649      fail to match, return 0 for the whole thing.  */
1650
1651   fmt = GET_RTX_FORMAT (code);
1652   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1653     {
1654       switch (fmt[i])
1655         {
1656         case 'e':
1657           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1658             return 0;
1659           break;
1660
1661         case 'E':
1662           if (XVECLEN (x, i) != XVECLEN (y, i))
1663             return 0;
1664           for (j = 0; j < XVECLEN (x, i); j++)
1665             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1666               return 0;
1667           break;
1668
1669         case 's':
1670           if (strcmp (XSTR (x, i), XSTR (y, i)))
1671             return 0;
1672           break;
1673
1674         case 'i':
1675           if (XINT (x, i) != XINT (y, i))
1676             return 0;
1677           break;
1678
1679         case 'w':
1680           if (XWINT (x, i) != XWINT (y, i))
1681             return 0;
1682         break;
1683
1684         case '0':
1685           break;
1686
1687         default:
1688           abort ();
1689         }
1690       }
1691
1692   return 1;
1693 }
1694
1695 /* Insert expression X in INSN in the hash table.
1696    If it is already present, record it as the last occurrence in INSN's
1697    basic block.
1698
1699    MODE is the mode of the value X is being stored into.
1700    It is only used if X is a CONST_INT.
1701
1702    ANTIC_P is non-zero if X is an anticipatable expression.
1703    AVAIL_P is non-zero if X is an available expression.  */
1704
1705 static void
1706 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1707      rtx x;
1708      enum machine_mode mode;
1709      rtx insn;
1710      int antic_p, avail_p;
1711 {
1712   int found, do_not_record_p;
1713   unsigned int hash;
1714   struct expr *cur_expr, *last_expr = NULL;
1715   struct occr *antic_occr, *avail_occr;
1716   struct occr *last_occr = NULL;
1717
1718   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1719
1720   /* Do not insert expression in table if it contains volatile operands,
1721      or if hash_expr determines the expression is something we don't want
1722      to or can't handle.  */
1723   if (do_not_record_p)
1724     return;
1725
1726   cur_expr = expr_hash_table[hash];
1727   found = 0;
1728
1729   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1730     {
1731       /* If the expression isn't found, save a pointer to the end of
1732          the list.  */
1733       last_expr = cur_expr;
1734       cur_expr = cur_expr->next_same_hash;
1735     }
1736
1737   if (! found)
1738     {
1739       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1740       bytes_used += sizeof (struct expr);
1741       if (expr_hash_table[hash] == NULL)
1742         /* This is the first pattern that hashed to this index.  */
1743         expr_hash_table[hash] = cur_expr;
1744       else
1745         /* Add EXPR to end of this hash chain.  */
1746         last_expr->next_same_hash = cur_expr;
1747
1748       /* Set the fields of the expr element.  */
1749       cur_expr->expr = x;
1750       cur_expr->bitmap_index = n_exprs++;
1751       cur_expr->next_same_hash = NULL;
1752       cur_expr->antic_occr = NULL;
1753       cur_expr->avail_occr = NULL;
1754     }
1755
1756   /* Now record the occurrence(s).  */
1757   if (antic_p)
1758     {
1759       antic_occr = cur_expr->antic_occr;
1760
1761       /* Search for another occurrence in the same basic block.  */
1762       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1763         {
1764           /* If an occurrence isn't found, save a pointer to the end of
1765              the list.  */
1766           last_occr = antic_occr;
1767           antic_occr = antic_occr->next;
1768         }
1769
1770       if (antic_occr)
1771         /* Found another instance of the expression in the same basic block.
1772            Prefer the currently recorded one.  We want the first one in the
1773            block and the block is scanned from start to end.  */
1774         ; /* nothing to do */
1775       else
1776         {
1777           /* First occurrence of this expression in this basic block.  */
1778           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1779           bytes_used += sizeof (struct occr);
1780           /* First occurrence of this expression in any block?  */
1781           if (cur_expr->antic_occr == NULL)
1782             cur_expr->antic_occr = antic_occr;
1783           else
1784             last_occr->next = antic_occr;
1785
1786           antic_occr->insn = insn;
1787           antic_occr->next = NULL;
1788         }
1789     }
1790
1791   if (avail_p)
1792     {
1793       avail_occr = cur_expr->avail_occr;
1794
1795       /* Search for another occurrence in the same basic block.  */
1796       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1797         {
1798           /* If an occurrence isn't found, save a pointer to the end of
1799              the list.  */
1800           last_occr = avail_occr;
1801           avail_occr = avail_occr->next;
1802         }
1803
1804       if (avail_occr)
1805         /* Found another instance of the expression in the same basic block.
1806            Prefer this occurrence to the currently recorded one.  We want
1807            the last one in the block and the block is scanned from start
1808            to end.  */
1809         avail_occr->insn = insn;
1810       else
1811         {
1812           /* First occurrence of this expression in this basic block.  */
1813           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1814           bytes_used += sizeof (struct occr);
1815
1816           /* First occurrence of this expression in any block?  */
1817           if (cur_expr->avail_occr == NULL)
1818             cur_expr->avail_occr = avail_occr;
1819           else
1820             last_occr->next = avail_occr;
1821
1822           avail_occr->insn = insn;
1823           avail_occr->next = NULL;
1824         }
1825     }
1826 }
1827
1828 /* Insert pattern X in INSN in the hash table.
1829    X is a SET of a reg to either another reg or a constant.
1830    If it is already present, record it as the last occurrence in INSN's
1831    basic block.  */
1832
1833 static void
1834 insert_set_in_table (x, insn)
1835      rtx x;
1836      rtx insn;
1837 {
1838   int found;
1839   unsigned int hash;
1840   struct expr *cur_expr, *last_expr = NULL;
1841   struct occr *cur_occr, *last_occr = NULL;
1842
1843   if (GET_CODE (x) != SET
1844       || GET_CODE (SET_DEST (x)) != REG)
1845     abort ();
1846
1847   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1848
1849   cur_expr = set_hash_table[hash];
1850   found = 0;
1851
1852   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1853     {
1854       /* If the expression isn't found, save a pointer to the end of
1855          the list.  */
1856       last_expr = cur_expr;
1857       cur_expr = cur_expr->next_same_hash;
1858     }
1859
1860   if (! found)
1861     {
1862       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1863       bytes_used += sizeof (struct expr);
1864       if (set_hash_table[hash] == NULL)
1865         /* This is the first pattern that hashed to this index.  */
1866         set_hash_table[hash] = cur_expr;
1867       else
1868         /* Add EXPR to end of this hash chain.  */
1869         last_expr->next_same_hash = cur_expr;
1870
1871       /* Set the fields of the expr element.
1872          We must copy X because it can be modified when copy propagation is
1873          performed on its operands.  */
1874       /* ??? Should this go in a different obstack?  */
1875       cur_expr->expr = copy_rtx (x);
1876       cur_expr->bitmap_index = n_sets++;
1877       cur_expr->next_same_hash = NULL;
1878       cur_expr->antic_occr = NULL;
1879       cur_expr->avail_occr = NULL;
1880     }
1881
1882   /* Now record the occurrence.  */
1883   cur_occr = cur_expr->avail_occr;
1884
1885   /* Search for another occurrence in the same basic block.  */
1886   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1887     {
1888       /* If an occurrence isn't found, save a pointer to the end of
1889          the list.  */
1890       last_occr = cur_occr;
1891       cur_occr = cur_occr->next;
1892     }
1893
1894   if (cur_occr)
1895     /* Found another instance of the expression in the same basic block.
1896        Prefer this occurrence to the currently recorded one.  We want the
1897        last one in the block and the block is scanned from start to end.  */
1898     cur_occr->insn = insn;
1899   else
1900     {
1901       /* First occurrence of this expression in this basic block.  */
1902       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1903       bytes_used += sizeof (struct occr);
1904
1905       /* First occurrence of this expression in any block?  */
1906       if (cur_expr->avail_occr == NULL)
1907         cur_expr->avail_occr = cur_occr;
1908       else
1909         last_occr->next = cur_occr;
1910
1911       cur_occr->insn = insn;
1912       cur_occr->next = NULL;
1913     }
1914 }
1915
1916 /* Scan pattern PAT of INSN and add an entry to the hash table.  If SET_P is
1917    non-zero, this is for the assignment hash table, otherwise it is for the
1918    expression hash table.  */
1919
1920 static void
1921 hash_scan_set (pat, insn, set_p)
1922      rtx pat, insn;
1923      int set_p;
1924 {
1925   rtx src = SET_SRC (pat);
1926   rtx dest = SET_DEST (pat);
1927
1928   if (GET_CODE (src) == CALL)
1929     hash_scan_call (src, insn);
1930
1931   if (GET_CODE (dest) == REG)
1932     {
1933       int regno = REGNO (dest);
1934       rtx tmp, note;
1935
1936       /* Only record sets of pseudo-regs in the hash table.  */
1937       if (! set_p
1938           && regno >= FIRST_PSEUDO_REGISTER
1939           /* Don't GCSE something if we can't do a reg/reg copy.  */
1940           && can_copy_p [GET_MODE (dest)]
1941           /* Is SET_SRC something we want to gcse?  */
1942           && want_to_gcse_p (src)
1943           /* Don't GCSE if it has attached REG_EQUIV note.
1944              At this point this only function parameters should have
1945              REG_EQUIV notes and if the argument slot is used somewhere
1946              explicitely, it means address of parameter has been taken,
1947              so we should not extend the lifetime of the pseudo.  */
1948           && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1949               || GET_CODE (XEXP (note, 0)) != MEM))
1950         {
1951           /* An expression is not anticipatable if its operands are
1952              modified before this insn.  */
1953           int antic_p = oprs_anticipatable_p (src, insn);
1954           /* An expression is not available if its operands are
1955              subsequently modified, including this insn.  */
1956           int avail_p = oprs_available_p (src, insn);
1957
1958           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1959         }
1960
1961       /* Record sets for constant/copy propagation.  */
1962       else if (set_p
1963                && regno >= FIRST_PSEUDO_REGISTER
1964                && ((GET_CODE (src) == REG
1965                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1966                     && can_copy_p [GET_MODE (dest)])
1967                    || GET_CODE (src) == CONST_INT
1968                    || GET_CODE (src) == SYMBOL_REF
1969                    || GET_CODE (src) == CONST_DOUBLE)
1970                /* A copy is not available if its src or dest is subsequently
1971                   modified.  Here we want to search from INSN+1 on, but
1972                   oprs_available_p searches from INSN on.  */
1973                && (insn == BLOCK_END (BLOCK_NUM (insn))
1974                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1975                        && oprs_available_p (pat, tmp))))
1976         insert_set_in_table (pat, insn);
1977     }
1978 }
1979
1980 static void
1981 hash_scan_clobber (x, insn)
1982      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1983 {
1984   /* Currently nothing to do.  */
1985 }
1986
1987 static void
1988 hash_scan_call (x, insn)
1989      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1990 {
1991   /* Currently nothing to do.  */
1992 }
1993
1994 /* Process INSN and add hash table entries as appropriate.
1995
1996    Only available expressions that set a single pseudo-reg are recorded.
1997
1998    Single sets in a PARALLEL could be handled, but it's an extra complication
1999    that isn't dealt with right now.  The trick is handling the CLOBBERs that
2000    are also in the PARALLEL.  Later.
2001
2002    If SET_P is non-zero, this is for the assignment hash table,
2003    otherwise it is for the expression hash table.
2004    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2005    not record any expressions.  */
2006
2007 static void
2008 hash_scan_insn (insn, set_p, in_libcall_block)
2009      rtx insn;
2010      int set_p;
2011      int in_libcall_block;
2012 {
2013   rtx pat = PATTERN (insn);
2014   int i;
2015
2016   /* Pick out the sets of INSN and for other forms of instructions record
2017      what's been modified.  */
2018
2019   if (GET_CODE (pat) == SET && ! in_libcall_block)
2020     {
2021       /* Ignore obvious no-ops.  */
2022       if (SET_SRC (pat) != SET_DEST (pat))
2023         hash_scan_set (pat, insn, set_p);
2024     }
2025   else if (GET_CODE (pat) == PARALLEL)
2026     for (i = 0; i < XVECLEN (pat, 0); i++)
2027       {
2028         rtx x = XVECEXP (pat, 0, i);
2029
2030         if (GET_CODE (x) == SET)
2031           {
2032             if (GET_CODE (SET_SRC (x)) == CALL)
2033               hash_scan_call (SET_SRC (x), insn);
2034           }
2035         else if (GET_CODE (x) == CLOBBER)
2036           hash_scan_clobber (x, insn);
2037         else if (GET_CODE (x) == CALL)
2038           hash_scan_call (x, insn);
2039       }
2040
2041   else if (GET_CODE (pat) == CLOBBER)
2042     hash_scan_clobber (pat, insn);
2043   else if (GET_CODE (pat) == CALL)
2044     hash_scan_call (pat, insn);
2045 }
2046
2047 static void
2048 dump_hash_table (file, name, table, table_size, total_size)
2049      FILE *file;
2050      const char *name;
2051      struct expr **table;
2052      int table_size, total_size;
2053 {
2054   int i;
2055   /* Flattened out table, so it's printed in proper order.  */
2056   struct expr **flat_table;
2057   unsigned int *hash_val;
2058   struct expr *expr;
2059
2060   flat_table
2061     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2062   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
2063
2064   for (i = 0; i < table_size; i++)
2065     for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2066       {
2067         flat_table[expr->bitmap_index] = expr;
2068         hash_val[expr->bitmap_index] = i;
2069       }
2070
2071   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2072            name, table_size, total_size);
2073
2074   for (i = 0; i < total_size; i++)
2075     if (flat_table[i] != 0)
2076       {
2077         expr = flat_table[i];
2078         fprintf (file, "Index %d (hash value %d)\n  ",
2079                  expr->bitmap_index, hash_val[i]);
2080         print_rtl (file, expr->expr);
2081         fprintf (file, "\n");
2082       }
2083
2084   fprintf (file, "\n");
2085
2086   free (flat_table);
2087   free (hash_val);
2088 }
2089
2090 /* Record register first/last/block set information for REGNO in INSN.
2091
2092    reg_first_set records the first place in the block where the register
2093    is set and is used to compute "anticipatability".
2094
2095    reg_last_set records the last place in the block where the register
2096    is set and is used to compute "availability".
2097
2098    reg_set_in_block records whether the register is set in the block
2099    and is used to compute "transparency".  */
2100
2101 static void
2102 record_last_reg_set_info (insn, regno)
2103      rtx insn;
2104      int regno;
2105 {
2106   if (reg_first_set[regno] == NEVER_SET)
2107     reg_first_set[regno] = INSN_CUID (insn);
2108
2109   reg_last_set[regno] = INSN_CUID (insn);
2110   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2111 }
2112
2113 /* Record memory first/last/block set information for INSN.  */
2114
2115 static void
2116 record_last_mem_set_info (insn)
2117      rtx insn;
2118 {
2119   if (mem_first_set == NEVER_SET)
2120     mem_first_set = INSN_CUID (insn);
2121
2122   mem_last_set = INSN_CUID (insn);
2123   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2124 }
2125
2126 /* Called from compute_hash_table via note_stores to handle one
2127    SET or CLOBBER in an insn.  DATA is really the instruction in which
2128    the SET is taking place.  */
2129
2130 static void
2131 record_last_set_info (dest, setter, data)
2132      rtx dest, setter ATTRIBUTE_UNUSED;
2133      void *data;
2134 {
2135   rtx last_set_insn = (rtx) data;
2136
2137   if (GET_CODE (dest) == SUBREG)
2138     dest = SUBREG_REG (dest);
2139
2140   if (GET_CODE (dest) == REG)
2141     record_last_reg_set_info (last_set_insn, REGNO (dest));
2142   else if (GET_CODE (dest) == MEM
2143            /* Ignore pushes, they clobber nothing.  */
2144            && ! push_operand (dest, GET_MODE (dest)))
2145     record_last_mem_set_info (last_set_insn);
2146 }
2147
2148 /* Top level function to create an expression or assignment hash table.
2149
2150    Expression entries are placed in the hash table if
2151    - they are of the form (set (pseudo-reg) src),
2152    - src is something we want to perform GCSE on,
2153    - none of the operands are subsequently modified in the block
2154
2155    Assignment entries are placed in the hash table if
2156    - they are of the form (set (pseudo-reg) src),
2157    - src is something we want to perform const/copy propagation on,
2158    - none of the operands or target are subsequently modified in the block
2159
2160    Currently src must be a pseudo-reg or a const_int.
2161
2162    F is the first insn.
2163    SET_P is non-zero for computing the assignment hash table.  */
2164
2165 static void
2166 compute_hash_table (set_p)
2167      int set_p;
2168 {
2169   int bb;
2170
2171   /* While we compute the hash table we also compute a bit array of which
2172      registers are set in which blocks.
2173      We also compute which blocks set memory, in the absence of aliasing
2174      support [which is TODO].
2175      ??? This isn't needed during const/copy propagation, but it's cheap to
2176      compute.  Later.  */
2177   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2178   memset ((char *) mem_set_in_block, 0, n_basic_blocks);
2179
2180   /* Some working arrays used to track first and last set in each block.  */
2181   /* ??? One could use alloca here, but at some size a threshold is crossed
2182      beyond which one should use malloc.  Are we at that threshold here?  */
2183   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2184   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2185
2186   for (bb = 0; bb < n_basic_blocks; bb++)
2187     {
2188       rtx insn;
2189       unsigned int regno;
2190       int in_libcall_block;
2191       unsigned int i;
2192
2193       /* First pass over the instructions records information used to
2194          determine when registers and memory are first and last set.
2195          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2196          could be moved to compute_sets since they currently don't change.  */
2197
2198       for (i = 0; i < max_gcse_regno; i++)
2199         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2200
2201       mem_first_set = NEVER_SET;
2202       mem_last_set = NEVER_SET;
2203
2204       for (insn = BLOCK_HEAD (bb);
2205            insn && insn != NEXT_INSN (BLOCK_END (bb));
2206            insn = NEXT_INSN (insn))
2207         {
2208 #ifdef NON_SAVING_SETJMP
2209           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2210               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2211             {
2212               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2213                 record_last_reg_set_info (insn, regno);
2214               continue;
2215             }
2216 #endif
2217
2218           if (! INSN_P (insn))
2219             continue;
2220
2221           if (GET_CODE (insn) == CALL_INSN)
2222             {
2223               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2224                 if ((call_used_regs[regno]
2225                      && regno != STACK_POINTER_REGNUM
2226 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2227                      && regno != HARD_FRAME_POINTER_REGNUM
2228 #endif
2229 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2230                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2231 #endif
2232 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2233                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2234 #endif
2235
2236                      && regno != FRAME_POINTER_REGNUM)
2237                     || global_regs[regno])
2238                   record_last_reg_set_info (insn, regno);
2239
2240               if (! CONST_CALL_P (insn))
2241                 record_last_mem_set_info (insn);
2242             }
2243
2244           note_stores (PATTERN (insn), record_last_set_info, insn);
2245         }
2246
2247       /* The next pass builds the hash table.  */
2248
2249       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2250            insn && insn != NEXT_INSN (BLOCK_END (bb));
2251            insn = NEXT_INSN (insn))
2252         if (INSN_P (insn))
2253           {
2254             if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2255               in_libcall_block = 1;
2256             else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2257               in_libcall_block = 0;
2258             hash_scan_insn (insn, set_p, in_libcall_block);
2259         }
2260     }
2261
2262   free (reg_first_set);
2263   free (reg_last_set);
2264
2265   /* Catch bugs early.  */
2266   reg_first_set = reg_last_set = 0;
2267 }
2268
2269 /* Allocate space for the set hash table.
2270    N_INSNS is the number of instructions in the function.
2271    It is used to determine the number of buckets to use.  */
2272
2273 static void
2274 alloc_set_hash_table (n_insns)
2275      int n_insns;
2276 {
2277   int n;
2278
2279   set_hash_table_size = n_insns / 4;
2280   if (set_hash_table_size < 11)
2281     set_hash_table_size = 11;
2282
2283   /* Attempt to maintain efficient use of hash table.
2284      Making it an odd number is simplest for now.
2285      ??? Later take some measurements.  */
2286   set_hash_table_size |= 1;
2287   n = set_hash_table_size * sizeof (struct expr *);
2288   set_hash_table = (struct expr **) gmalloc (n);
2289 }
2290
2291 /* Free things allocated by alloc_set_hash_table.  */
2292
2293 static void
2294 free_set_hash_table ()
2295 {
2296   free (set_hash_table);
2297 }
2298
2299 /* Compute the hash table for doing copy/const propagation.  */
2300
2301 static void
2302 compute_set_hash_table ()
2303 {
2304   /* Initialize count of number of entries in hash table.  */
2305   n_sets = 0;
2306   memset ((char *) set_hash_table, 0,
2307          set_hash_table_size * sizeof (struct expr *));
2308
2309   compute_hash_table (1);
2310 }
2311
2312 /* Allocate space for the expression hash table.
2313    N_INSNS is the number of instructions in the function.
2314    It is used to determine the number of buckets to use.  */
2315
2316 static void
2317 alloc_expr_hash_table (n_insns)
2318      unsigned int n_insns;
2319 {
2320   int n;
2321
2322   expr_hash_table_size = n_insns / 2;
2323   /* Make sure the amount is usable.  */
2324   if (expr_hash_table_size < 11)
2325     expr_hash_table_size = 11;
2326
2327   /* Attempt to maintain efficient use of hash table.
2328      Making it an odd number is simplest for now.
2329      ??? Later take some measurements.  */
2330   expr_hash_table_size |= 1;
2331   n = expr_hash_table_size * sizeof (struct expr *);
2332   expr_hash_table = (struct expr **) gmalloc (n);
2333 }
2334
2335 /* Free things allocated by alloc_expr_hash_table.  */
2336
2337 static void
2338 free_expr_hash_table ()
2339 {
2340   free (expr_hash_table);
2341 }
2342
2343 /* Compute the hash table for doing GCSE.  */
2344
2345 static void
2346 compute_expr_hash_table ()
2347 {
2348   /* Initialize count of number of entries in hash table.  */
2349   n_exprs = 0;
2350   memset ((char *) expr_hash_table, 0,
2351          expr_hash_table_size * sizeof (struct expr *));
2352
2353   compute_hash_table (0);
2354 }
2355 \f
2356 /* Expression tracking support.  */
2357
2358 /* Lookup pattern PAT in the expression table.
2359    The result is a pointer to the table entry, or NULL if not found.  */
2360
2361 static struct expr *
2362 lookup_expr (pat)
2363      rtx pat;
2364 {
2365   int do_not_record_p;
2366   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2367                                  expr_hash_table_size);
2368   struct expr *expr;
2369
2370   if (do_not_record_p)
2371     return NULL;
2372
2373   expr = expr_hash_table[hash];
2374
2375   while (expr && ! expr_equiv_p (expr->expr, pat))
2376     expr = expr->next_same_hash;
2377
2378   return expr;
2379 }
2380
2381 /* Lookup REGNO in the set table.  If PAT is non-NULL look for the entry that
2382    matches it, otherwise return the first entry for REGNO.  The result is a
2383    pointer to the table entry, or NULL if not found.  */
2384
2385 static struct expr *
2386 lookup_set (regno, pat)
2387      unsigned int regno;
2388      rtx pat;
2389 {
2390   unsigned int hash = hash_set (regno, set_hash_table_size);
2391   struct expr *expr;
2392
2393   expr = set_hash_table[hash];
2394
2395   if (pat)
2396     {
2397       while (expr && ! expr_equiv_p (expr->expr, pat))
2398         expr = expr->next_same_hash;
2399     }
2400   else
2401     {
2402       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2403         expr = expr->next_same_hash;
2404     }
2405
2406   return expr;
2407 }
2408
2409 /* Return the next entry for REGNO in list EXPR.  */
2410
2411 static struct expr *
2412 next_set (regno, expr)
2413      unsigned int regno;
2414      struct expr *expr;
2415 {
2416   do
2417     expr = expr->next_same_hash;
2418   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2419
2420   return expr;
2421 }
2422
2423 /* Reset tables used to keep track of what's still available [since the
2424    start of the block].  */
2425
2426 static void
2427 reset_opr_set_tables ()
2428 {
2429   /* Maintain a bitmap of which regs have been set since beginning of
2430      the block.  */
2431   sbitmap_zero (reg_set_bitmap);
2432
2433   /* Also keep a record of the last instruction to modify memory.
2434      For now this is very trivial, we only record whether any memory
2435      location has been modified.  */
2436   mem_last_set = 0;
2437 }
2438
2439 /* Return non-zero if the operands of X are not set before INSN in
2440    INSN's basic block.  */
2441
2442 static int
2443 oprs_not_set_p (x, insn)
2444      rtx x, insn;
2445 {
2446   int i, j;
2447   enum rtx_code code;
2448   const char *fmt;
2449
2450   if (x == 0)
2451     return 1;
2452
2453   code = GET_CODE (x);
2454   switch (code)
2455     {
2456     case PC:
2457     case CC0:
2458     case CONST:
2459     case CONST_INT:
2460     case CONST_DOUBLE:
2461     case SYMBOL_REF:
2462     case LABEL_REF:
2463     case ADDR_VEC:
2464     case ADDR_DIFF_VEC:
2465       return 1;
2466
2467     case MEM:
2468       if (mem_last_set != 0)
2469         return 0;
2470       else
2471         return oprs_not_set_p (XEXP (x, 0), insn);
2472
2473     case REG:
2474       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2475
2476     default:
2477       break;
2478     }
2479
2480   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2481     {
2482       if (fmt[i] == 'e')
2483         {
2484           /* If we are about to do the last recursive call
2485              needed at this level, change it into iteration.
2486              This function is called enough to be worth it.  */
2487           if (i == 0)
2488             return oprs_not_set_p (XEXP (x, i), insn);
2489
2490           if (! oprs_not_set_p (XEXP (x, i), insn))
2491             return 0;
2492         }
2493       else if (fmt[i] == 'E')
2494         for (j = 0; j < XVECLEN (x, i); j++)
2495           if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2496             return 0;
2497     }
2498
2499   return 1;
2500 }
2501
2502 /* Mark things set by a CALL.  */
2503
2504 static void
2505 mark_call (insn)
2506      rtx insn;
2507 {
2508   mem_last_set = INSN_CUID (insn);
2509 }
2510
2511 /* Mark things set by a SET.  */
2512
2513 static void
2514 mark_set (pat, insn)
2515      rtx pat, insn;
2516 {
2517   rtx dest = SET_DEST (pat);
2518
2519   while (GET_CODE (dest) == SUBREG
2520          || GET_CODE (dest) == ZERO_EXTRACT
2521          || GET_CODE (dest) == SIGN_EXTRACT
2522          || GET_CODE (dest) == STRICT_LOW_PART)
2523     dest = XEXP (dest, 0);
2524
2525   if (GET_CODE (dest) == REG)
2526     SET_BIT (reg_set_bitmap, REGNO (dest));
2527   else if (GET_CODE (dest) == MEM)
2528     mem_last_set = INSN_CUID (insn);
2529
2530   if (GET_CODE (SET_SRC (pat)) == CALL)
2531     mark_call (insn);
2532 }
2533
2534 /* Record things set by a CLOBBER.  */
2535
2536 static void
2537 mark_clobber (pat, insn)
2538      rtx pat, insn;
2539 {
2540   rtx clob = XEXP (pat, 0);
2541
2542   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2543     clob = XEXP (clob, 0);
2544
2545   if (GET_CODE (clob) == REG)
2546     SET_BIT (reg_set_bitmap, REGNO (clob));
2547   else
2548     mem_last_set = INSN_CUID (insn);
2549 }
2550
2551 /* Record things set by INSN.
2552    This data is used by oprs_not_set_p.  */
2553
2554 static void
2555 mark_oprs_set (insn)
2556      rtx insn;
2557 {
2558   rtx pat = PATTERN (insn);
2559   int i;
2560
2561   if (GET_CODE (pat) == SET)
2562     mark_set (pat, insn);
2563   else if (GET_CODE (pat) == PARALLEL)
2564     for (i = 0; i < XVECLEN (pat, 0); i++)
2565       {
2566         rtx x = XVECEXP (pat, 0, i);
2567
2568         if (GET_CODE (x) == SET)
2569           mark_set (x, insn);
2570         else if (GET_CODE (x) == CLOBBER)
2571           mark_clobber (x, insn);
2572         else if (GET_CODE (x) == CALL)
2573           mark_call (insn);
2574       }
2575
2576   else if (GET_CODE (pat) == CLOBBER)
2577     mark_clobber (pat, insn);
2578   else if (GET_CODE (pat) == CALL)
2579     mark_call (insn);
2580 }
2581
2582 \f
2583 /* Classic GCSE reaching definition support.  */
2584
2585 /* Allocate reaching def variables.  */
2586
2587 static void
2588 alloc_rd_mem (n_blocks, n_insns)
2589      int n_blocks, n_insns;
2590 {
2591   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2592   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2593
2594   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2595   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2596
2597   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2598   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2599
2600   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2601   sbitmap_vector_zero (rd_out, n_basic_blocks);
2602 }
2603
2604 /* Free reaching def variables.  */
2605
2606 static void
2607 free_rd_mem ()
2608 {
2609   free (rd_kill);
2610   free (rd_gen);
2611   free (reaching_defs);
2612   free (rd_out);
2613 }
2614
2615 /* Add INSN to the kills of BB.  REGNO, set in BB, is killed by INSN.  */
2616
2617 static void
2618 handle_rd_kill_set (insn, regno, bb)
2619      rtx insn;
2620      int regno, bb;
2621 {
2622   struct reg_set *this_reg;
2623
2624   for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2625     if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2626       SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2627 }
2628
2629 /* Compute the set of kill's for reaching definitions.  */
2630
2631 static void
2632 compute_kill_rd ()
2633 {
2634   int bb, cuid;
2635   int regno, i;
2636
2637   /* For each block
2638        For each set bit in `gen' of the block (i.e each insn which
2639            generates a definition in the block)
2640          Call the reg set by the insn corresponding to that bit regx
2641          Look at the linked list starting at reg_set_table[regx]
2642          For each setting of regx in the linked list, which is not in
2643              this block
2644            Set the bit in `kill' corresponding to that insn.   */
2645   for (bb = 0; bb < n_basic_blocks; bb++)
2646     for (cuid = 0; cuid < max_cuid; cuid++)
2647       if (TEST_BIT (rd_gen[bb], cuid))
2648         {
2649           rtx insn = CUID_INSN (cuid);
2650           rtx pat = PATTERN (insn);
2651
2652           if (GET_CODE (insn) == CALL_INSN)
2653             {
2654               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2655                 {
2656                   if ((call_used_regs[regno]
2657                        && regno != STACK_POINTER_REGNUM
2658 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2659                        && regno != HARD_FRAME_POINTER_REGNUM
2660 #endif
2661 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2662                        && ! (regno == ARG_POINTER_REGNUM
2663                              && fixed_regs[regno])
2664 #endif
2665 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2666                        && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2667 #endif
2668                        && regno != FRAME_POINTER_REGNUM)
2669                       || global_regs[regno])
2670                     handle_rd_kill_set (insn, regno, bb);
2671                 }
2672             }
2673
2674           if (GET_CODE (pat) == PARALLEL)
2675             {
2676               for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2677                 {
2678                   enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2679
2680                   if ((code == SET || code == CLOBBER)
2681                       && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2682                     handle_rd_kill_set (insn,
2683                                         REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2684                                         bb);
2685                 }
2686             }
2687           else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2688             /* Each setting of this register outside of this block
2689                must be marked in the set of kills in this block.  */
2690             handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2691         }
2692 }
2693
2694 /* Compute the reaching definitions as in
2695    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2696    Chapter 10.  It is the same algorithm as used for computing available
2697    expressions but applied to the gens and kills of reaching definitions.  */
2698
2699 static void
2700 compute_rd ()
2701 {
2702   int bb, changed, passes;
2703
2704   for (bb = 0; bb < n_basic_blocks; bb++)
2705     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2706
2707   passes = 0;
2708   changed = 1;
2709   while (changed)
2710     {
2711       changed = 0;
2712       for (bb = 0; bb < n_basic_blocks; bb++)
2713         {
2714           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2715           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2716                                             reaching_defs[bb], rd_kill[bb]);
2717         }
2718       passes++;
2719     }
2720
2721   if (gcse_file)
2722     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2723 }
2724 \f
2725 /* Classic GCSE available expression support.  */
2726
2727 /* Allocate memory for available expression computation.  */
2728
2729 static void
2730 alloc_avail_expr_mem (n_blocks, n_exprs)
2731      int n_blocks, n_exprs;
2732 {
2733   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2734   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2735
2736   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2737   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2738
2739   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2740   sbitmap_vector_zero (ae_in, n_basic_blocks);
2741
2742   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2743   sbitmap_vector_zero (ae_out, n_basic_blocks);
2744 }
2745
2746 static void
2747 free_avail_expr_mem ()
2748 {
2749   free (ae_kill);
2750   free (ae_gen);
2751   free (ae_in);
2752   free (ae_out);
2753 }
2754
2755 /* Compute the set of available expressions generated in each basic block.  */
2756
2757 static void
2758 compute_ae_gen ()
2759 {
2760   unsigned int i;
2761   struct expr *expr;
2762   struct occr *occr;
2763
2764   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2765      This is all we have to do because an expression is not recorded if it
2766      is not available, and the only expressions we want to work with are the
2767      ones that are recorded.  */
2768   for (i = 0; i < expr_hash_table_size; i++)
2769     for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2770       for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2771         SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2772 }
2773
2774 /* Return non-zero if expression X is killed in BB.  */
2775
2776 static int
2777 expr_killed_p (x, bb)
2778      rtx x;
2779      int bb;
2780 {
2781   int i, j;
2782   enum rtx_code code;
2783   const char *fmt;
2784
2785   if (x == 0)
2786     return 1;
2787
2788   code = GET_CODE (x);
2789   switch (code)
2790     {
2791     case REG:
2792       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2793
2794     case MEM:
2795       if (mem_set_in_block[bb])
2796         return 1;
2797       else
2798         return expr_killed_p (XEXP (x, 0), bb);
2799
2800     case PC:
2801     case CC0: /*FIXME*/
2802     case CONST:
2803     case CONST_INT:
2804     case CONST_DOUBLE:
2805     case SYMBOL_REF:
2806     case LABEL_REF:
2807     case ADDR_VEC:
2808     case ADDR_DIFF_VEC:
2809       return 0;
2810
2811     default:
2812       break;
2813     }
2814
2815   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2816     {
2817       if (fmt[i] == 'e')
2818         {
2819           /* If we are about to do the last recursive call
2820              needed at this level, change it into iteration.
2821              This function is called enough to be worth it.  */
2822           if (i == 0)
2823             return expr_killed_p (XEXP (x, i), bb);
2824           else if (expr_killed_p (XEXP (x, i), bb))
2825             return 1;
2826         }
2827       else if (fmt[i] == 'E')
2828         for (j = 0; j < XVECLEN (x, i); j++)
2829           if (expr_killed_p (XVECEXP (x, i, j), bb))
2830             return 1;
2831     }
2832
2833   return 0;
2834 }
2835
2836 /* Compute the set of available expressions killed in each basic block.  */
2837
2838 static void
2839 compute_ae_kill (ae_gen, ae_kill)
2840      sbitmap *ae_gen, *ae_kill;
2841 {
2842   int bb;
2843   unsigned int i;
2844   struct expr *expr;
2845
2846   for (bb = 0; bb < n_basic_blocks; bb++)
2847     for (i = 0; i < expr_hash_table_size; i++)
2848       for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
2849         {
2850           /* Skip EXPR if generated in this block.  */
2851           if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2852             continue;
2853
2854           if (expr_killed_p (expr->expr, bb))
2855             SET_BIT (ae_kill[bb], expr->bitmap_index);
2856         }
2857 }
2858 \f
2859 /* Actually perform the Classic GCSE optimizations.  */
2860
2861 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2862
2863    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2864    as a positive reach.  We want to do this when there are two computations
2865    of the expression in the block.
2866
2867    VISITED is a pointer to a working buffer for tracking which BB's have
2868    been visited.  It is NULL for the top-level call.
2869
2870    We treat reaching expressions that go through blocks containing the same
2871    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2872    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2873    2 as not reaching.  The intent is to improve the probability of finding
2874    only one reaching expression and to reduce register lifetimes by picking
2875    the closest such expression.  */
2876
2877 static int
2878 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2879      struct occr *occr;
2880      struct expr *expr;
2881      int bb;
2882      int check_self_loop;
2883      char *visited;
2884 {
2885   edge pred;
2886
2887   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2888     {
2889       int pred_bb = pred->src->index;
2890
2891       if (visited[pred_bb])
2892         /* This predecessor has already been visited. Nothing to do.  */
2893           ;
2894       else if (pred_bb == bb)
2895         {
2896           /* BB loops on itself.  */
2897           if (check_self_loop
2898               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2899               && BLOCK_NUM (occr->insn) == pred_bb)
2900             return 1;
2901
2902           visited[pred_bb] = 1;
2903         }
2904
2905       /* Ignore this predecessor if it kills the expression.  */
2906       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2907         visited[pred_bb] = 1;
2908
2909       /* Does this predecessor generate this expression?  */
2910       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2911         {
2912           /* Is this the occurrence we're looking for?
2913              Note that there's only one generating occurrence per block
2914              so we just need to check the block number.  */
2915           if (BLOCK_NUM (occr->insn) == pred_bb)
2916             return 1;
2917
2918           visited[pred_bb] = 1;
2919         }
2920
2921       /* Neither gen nor kill.  */
2922       else
2923         {
2924           visited[pred_bb] = 1;
2925           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2926               visited))
2927
2928             return 1;
2929         }
2930     }
2931
2932   /* All paths have been checked.  */
2933   return 0;
2934 }
2935
2936 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2937    memory allocated for that function is returned. */
2938
2939 static int
2940 expr_reaches_here_p (occr, expr, bb, check_self_loop)
2941      struct occr *occr;
2942      struct expr *expr;
2943      int bb;
2944      int check_self_loop;
2945 {
2946   int rval;
2947   char *visited = (char *) xcalloc (n_basic_blocks, 1);
2948
2949   rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
2950
2951   free (visited);
2952   return rval;
2953 }
2954
2955 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2956    If there is more than one such instruction, return NULL.
2957
2958    Called only by handle_avail_expr.  */
2959
2960 static rtx
2961 computing_insn (expr, insn)
2962      struct expr *expr;
2963      rtx insn;
2964 {
2965   int bb = BLOCK_NUM (insn);
2966
2967   if (expr->avail_occr->next == NULL)
2968     {
2969       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2970         /* The available expression is actually itself
2971            (i.e. a loop in the flow graph) so do nothing.  */
2972         return NULL;
2973
2974       /* (FIXME) Case that we found a pattern that was created by
2975          a substitution that took place.  */
2976       return expr->avail_occr->insn;
2977     }
2978   else
2979     {
2980       /* Pattern is computed more than once.
2981          Search backwards from this insn to see how many of these
2982          computations actually reach this insn.  */
2983       struct occr *occr;
2984       rtx insn_computes_expr = NULL;
2985       int can_reach = 0;
2986
2987       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2988         {
2989           if (BLOCK_NUM (occr->insn) == bb)
2990             {
2991               /* The expression is generated in this block.
2992                  The only time we care about this is when the expression
2993                  is generated later in the block [and thus there's a loop].
2994                  We let the normal cse pass handle the other cases.  */
2995               if (INSN_CUID (insn) < INSN_CUID (occr->insn)
2996                   && expr_reaches_here_p (occr, expr, bb, 1))
2997                 {
2998                   can_reach++;
2999                   if (can_reach > 1)
3000                     return NULL;
3001
3002                   insn_computes_expr = occr->insn;
3003                 }
3004             }
3005           else if (expr_reaches_here_p (occr, expr, bb, 0))
3006             {
3007               can_reach++;
3008               if (can_reach > 1)
3009                 return NULL;
3010
3011               insn_computes_expr = occr->insn;
3012             }
3013         }
3014
3015       if (insn_computes_expr == NULL)
3016         abort ();
3017
3018       return insn_computes_expr;
3019     }
3020 }
3021
3022 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3023    Only called by can_disregard_other_sets.  */
3024
3025 static int
3026 def_reaches_here_p (insn, def_insn)
3027      rtx insn, def_insn;
3028 {
3029   rtx reg;
3030
3031   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3032     return 1;
3033
3034   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3035     {
3036       if (INSN_CUID (def_insn) < INSN_CUID (insn))
3037         {
3038           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3039             return 1;
3040           else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3041             reg = XEXP (PATTERN (def_insn), 0);
3042           else if (GET_CODE (PATTERN (def_insn)) == SET)
3043             reg = SET_DEST (PATTERN (def_insn));
3044           else
3045             abort ();
3046
3047           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3048         }
3049       else
3050         return 0;
3051     }
3052
3053   return 0;
3054 }
3055
3056 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.  The
3057    value returned is the number of definitions that reach INSN.  Returning a
3058    value of zero means that [maybe] more than one definition reaches INSN and
3059    the caller can't perform whatever optimization it is trying.  i.e. it is
3060    always safe to return zero.  */
3061
3062 static int
3063 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3064      struct reg_set **addr_this_reg;
3065      rtx insn;
3066      int for_combine;
3067 {
3068   int number_of_reaching_defs = 0;
3069   struct reg_set *this_reg;
3070
3071   for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3072     if (def_reaches_here_p (insn, this_reg->insn))
3073       {
3074         number_of_reaching_defs++;
3075         /* Ignore parallels for now.  */
3076         if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3077           return 0;
3078
3079         if (!for_combine
3080             && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3081                 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3082                                   SET_SRC (PATTERN (insn)))))
3083           /* A setting of the reg to a different value reaches INSN.  */
3084           return 0;
3085
3086         if (number_of_reaching_defs > 1)
3087           {
3088             /* If in this setting the value the register is being set to is
3089                equal to the previous value the register was set to and this
3090                setting reaches the insn we are trying to do the substitution
3091                on then we are ok.  */
3092             if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3093               return 0;
3094             else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3095                                     SET_SRC (PATTERN (insn))))
3096               return 0;
3097           }
3098
3099         *addr_this_reg = this_reg;
3100       }
3101
3102   return number_of_reaching_defs;
3103 }
3104
3105 /* Expression computed by insn is available and the substitution is legal,
3106    so try to perform the substitution.
3107
3108    The result is non-zero if any changes were made.  */
3109
3110 static int
3111 handle_avail_expr (insn, expr)
3112      rtx insn;
3113      struct expr *expr;
3114 {
3115   rtx pat, insn_computes_expr;
3116   rtx to;
3117   struct reg_set *this_reg;
3118   int found_setting, use_src;
3119   int changed = 0;
3120
3121   /* We only handle the case where one computation of the expression
3122      reaches this instruction.  */
3123   insn_computes_expr = computing_insn (expr, insn);
3124   if (insn_computes_expr == NULL)
3125     return 0;
3126
3127   found_setting = 0;
3128   use_src = 0;
3129
3130   /* At this point we know only one computation of EXPR outside of this
3131      block reaches this insn.  Now try to find a register that the
3132      expression is computed into.  */
3133   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3134     {
3135       /* This is the case when the available expression that reaches
3136          here has already been handled as an available expression.  */
3137       unsigned int regnum_for_replacing
3138         = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3139
3140       /* If the register was created by GCSE we can't use `reg_set_table',
3141          however we know it's set only once.  */
3142       if (regnum_for_replacing >= max_gcse_regno
3143           /* If the register the expression is computed into is set only once,
3144              or only one set reaches this insn, we can use it.  */
3145           || (((this_reg = reg_set_table[regnum_for_replacing]),
3146                this_reg->next == NULL)
3147               || can_disregard_other_sets (&this_reg, insn, 0)))
3148        {
3149          use_src = 1;
3150          found_setting = 1;
3151        }
3152     }
3153
3154   if (!found_setting)
3155     {
3156       unsigned int regnum_for_replacing
3157         = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3158
3159       /* This shouldn't happen.  */
3160       if (regnum_for_replacing >= max_gcse_regno)
3161         abort ();
3162
3163       this_reg = reg_set_table[regnum_for_replacing];
3164
3165       /* If the register the expression is computed into is set only once,
3166          or only one set reaches this insn, use it.  */
3167       if (this_reg->next == NULL
3168           || can_disregard_other_sets (&this_reg, insn, 0))
3169         found_setting = 1;
3170     }
3171
3172   if (found_setting)
3173     {
3174       pat = PATTERN (insn);
3175       if (use_src)
3176         to = SET_SRC (PATTERN (insn_computes_expr));
3177       else
3178         to = SET_DEST (PATTERN (insn_computes_expr));
3179       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3180
3181       /* We should be able to ignore the return code from validate_change but
3182          to play it safe we check.  */
3183       if (changed)
3184         {
3185           gcse_subst_count++;
3186           if (gcse_file != NULL)
3187             {
3188               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3189                        INSN_UID (insn));
3190               fprintf (gcse_file, " reg %d %s insn %d\n",
3191                        REGNO (to), use_src ? "from" : "set in",
3192                        INSN_UID (insn_computes_expr));
3193             }
3194         }
3195     }
3196
3197   /* The register that the expr is computed into is set more than once.  */
3198   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3199     {
3200       /* Insert an insn after insnx that copies the reg set in insnx
3201          into a new pseudo register call this new register REGN.
3202          From insnb until end of basic block or until REGB is set
3203          replace all uses of REGB with REGN.  */
3204       rtx new_insn;
3205
3206       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3207
3208       /* Generate the new insn.  */
3209       /* ??? If the change fails, we return 0, even though we created
3210          an insn.  I think this is ok.  */
3211       new_insn
3212         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3213                                         SET_DEST (PATTERN
3214                                                   (insn_computes_expr))),
3215                            insn_computes_expr);
3216
3217       /* Keep block number table up to date.  */
3218       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3219
3220       /* Keep register set table up to date.  */
3221       record_one_set (REGNO (to), new_insn);
3222
3223       gcse_create_count++;
3224       if (gcse_file != NULL)
3225         {
3226           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3227                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3228                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3229           fprintf (gcse_file, ", computed in insn %d,\n",
3230                    INSN_UID (insn_computes_expr));
3231           fprintf (gcse_file, "      into newly allocated reg %d\n",
3232                    REGNO (to));
3233         }
3234
3235       pat = PATTERN (insn);
3236
3237       /* Do register replacement for INSN.  */
3238       changed = validate_change (insn, &SET_SRC (pat),
3239                                  SET_DEST (PATTERN
3240                                            (NEXT_INSN (insn_computes_expr))),
3241                                  0);
3242
3243       /* We should be able to ignore the return code from validate_change but
3244          to play it safe we check.  */
3245       if (changed)
3246         {
3247           gcse_subst_count++;
3248           if (gcse_file != NULL)
3249             {
3250               fprintf (gcse_file,
3251                        "GCSE: Replacing the source in insn %d with reg %d ",
3252                        INSN_UID (insn),
3253                        REGNO (SET_DEST (PATTERN (NEXT_INSN
3254                                                  (insn_computes_expr)))));
3255               fprintf (gcse_file, "set in insn %d\n",
3256                        INSN_UID (insn_computes_expr));
3257             }
3258         }
3259     }
3260
3261   return changed;
3262 }
3263
3264 /* Perform classic GCSE.  This is called by one_classic_gcse_pass after all
3265    the dataflow analysis has been done.
3266
3267    The result is non-zero if a change was made.  */
3268
3269 static int
3270 classic_gcse ()
3271 {
3272   int bb, changed;
3273   rtx insn;
3274
3275   /* Note we start at block 1.  */
3276
3277   changed = 0;
3278   for (bb = 1; bb < n_basic_blocks; bb++)
3279     {
3280       /* Reset tables used to keep track of what's still valid [since the
3281          start of the block].  */
3282       reset_opr_set_tables ();
3283
3284       for (insn = BLOCK_HEAD (bb);
3285            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3286            insn = NEXT_INSN (insn))
3287         {
3288           /* Is insn of form (set (pseudo-reg) ...)?  */
3289           if (GET_CODE (insn) == INSN
3290               && GET_CODE (PATTERN (insn)) == SET
3291               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3292               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3293             {
3294               rtx pat = PATTERN (insn);
3295               rtx src = SET_SRC (pat);
3296               struct expr *expr;
3297
3298               if (want_to_gcse_p (src)
3299                   /* Is the expression recorded?  */
3300                   && ((expr = lookup_expr (src)) != NULL)
3301                   /* Is the expression available [at the start of the
3302                      block]?  */
3303                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3304                   /* Are the operands unchanged since the start of the
3305                      block?  */
3306                   && oprs_not_set_p (src, insn))
3307                 changed |= handle_avail_expr (insn, expr);
3308             }
3309
3310           /* Keep track of everything modified by this insn.  */
3311           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3312           if (INSN_P (insn))
3313             mark_oprs_set (insn);
3314         }
3315     }
3316
3317   return changed;
3318 }
3319
3320 /* Top level routine to perform one classic GCSE pass.
3321
3322    Return non-zero if a change was made.  */
3323
3324 static int
3325 one_classic_gcse_pass (pass)
3326      int pass;
3327 {
3328   int changed = 0;
3329
3330   gcse_subst_count = 0;
3331   gcse_create_count = 0;
3332
3333   alloc_expr_hash_table (max_cuid);
3334   alloc_rd_mem (n_basic_blocks, max_cuid);
3335   compute_expr_hash_table ();
3336   if (gcse_file)
3337     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3338                      expr_hash_table_size, n_exprs);
3339
3340   if (n_exprs > 0)
3341     {
3342       compute_kill_rd ();
3343       compute_rd ();
3344       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3345       compute_ae_gen ();
3346       compute_ae_kill (ae_gen, ae_kill);
3347       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3348       changed = classic_gcse ();
3349       free_avail_expr_mem ();
3350     }
3351
3352   free_rd_mem ();
3353   free_expr_hash_table ();
3354
3355   if (gcse_file)
3356     {
3357       fprintf (gcse_file, "\n");
3358       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3359                current_function_name, pass, bytes_used, gcse_subst_count);
3360       fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3361     }
3362
3363   return changed;
3364 }
3365 \f
3366 /* Compute copy/constant propagation working variables.  */
3367
3368 /* Local properties of assignments.  */
3369 static sbitmap *cprop_pavloc;
3370 static sbitmap *cprop_absaltered;
3371
3372 /* Global properties of assignments (computed from the local properties).  */
3373 static sbitmap *cprop_avin;
3374 static sbitmap *cprop_avout;
3375
3376 /* Allocate vars used for copy/const propagation.  N_BLOCKS is the number of
3377    basic blocks.  N_SETS is the number of sets.  */
3378
3379 static void
3380 alloc_cprop_mem (n_blocks, n_sets)
3381      int n_blocks, n_sets;
3382 {
3383   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3384   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3385
3386   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3387   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3388 }
3389
3390 /* Free vars used by copy/const propagation.  */
3391
3392 static void
3393 free_cprop_mem ()
3394 {
3395   free (cprop_pavloc);
3396   free (cprop_absaltered);
3397   free (cprop_avin);
3398   free (cprop_avout);
3399 }
3400
3401 /* For each block, compute whether X is transparent.  X is either an
3402    expression or an assignment [though we don't care which, for this context
3403    an assignment is treated as an expression].  For each block where an
3404    element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3405    bit in BMAP.  */
3406
3407 static void
3408 compute_transp (x, indx, bmap, set_p)
3409      rtx x;
3410      int indx;
3411      sbitmap *bmap;
3412      int set_p;
3413 {
3414   int bb, i, j;
3415   enum rtx_code code;
3416   reg_set *r;
3417   const char *fmt;
3418
3419   /* repeat is used to turn tail-recursion into iteration since GCC
3420      can't do it when there's no return value.  */
3421  repeat:
3422
3423   if (x == 0)
3424     return;
3425
3426   code = GET_CODE (x);
3427   switch (code)
3428     {
3429     case REG:
3430       if (set_p)
3431         {
3432           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3433             {
3434               for (bb = 0; bb < n_basic_blocks; bb++)
3435                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3436                   SET_BIT (bmap[bb], indx);
3437             }
3438           else
3439             {
3440               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3441                 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3442             }
3443         }
3444       else
3445         {
3446           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3447             {
3448               for (bb = 0; bb < n_basic_blocks; bb++)
3449                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3450                   RESET_BIT (bmap[bb], indx);
3451             }
3452           else
3453             {
3454               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3455                 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3456             }
3457         }
3458
3459       return;
3460
3461     case MEM:
3462       if (set_p)
3463         {
3464           for (bb = 0; bb < n_basic_blocks; bb++)
3465             if (mem_set_in_block[bb])
3466               SET_BIT (bmap[bb], indx);
3467         }
3468       else
3469         {
3470           for (bb = 0; bb < n_basic_blocks; bb++)
3471             if (mem_set_in_block[bb])
3472               RESET_BIT (bmap[bb], indx);
3473         }
3474
3475       x = XEXP (x, 0);
3476       goto repeat;
3477
3478     case PC:
3479     case CC0: /*FIXME*/
3480     case CONST:
3481     case CONST_INT:
3482     case CONST_DOUBLE:
3483     case SYMBOL_REF:
3484     case LABEL_REF:
3485     case ADDR_VEC:
3486     case ADDR_DIFF_VEC:
3487       return;
3488
3489     default:
3490       break;
3491     }
3492
3493   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3494     {
3495       if (fmt[i] == 'e')
3496         {
3497           /* If we are about to do the last recursive call
3498              needed at this level, change it into iteration.
3499              This function is called enough to be worth it.  */
3500           if (i == 0)
3501             {
3502               x = XEXP (x, i);
3503               goto repeat;
3504             }
3505
3506           compute_transp (XEXP (x, i), indx, bmap, set_p);
3507         }
3508       else if (fmt[i] == 'E')
3509         for (j = 0; j < XVECLEN (x, i); j++)
3510           compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3511     }
3512 }
3513
3514 /* Top level routine to do the dataflow analysis needed by copy/const
3515    propagation.  */
3516
3517 static void
3518 compute_cprop_data ()
3519 {
3520   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3521   compute_available (cprop_pavloc, cprop_absaltered,
3522                      cprop_avout, cprop_avin);
3523 }
3524 \f
3525 /* Copy/constant propagation.  */
3526
3527 /* Maximum number of register uses in an insn that we handle.  */
3528 #define MAX_USES 8
3529
3530 /* Table of uses found in an insn.
3531    Allocated statically to avoid alloc/free complexity and overhead.  */
3532 static struct reg_use reg_use_table[MAX_USES];
3533
3534 /* Index into `reg_use_table' while building it.  */
3535 static int reg_use_count;
3536
3537 /* Set up a list of register numbers used in INSN.  The found uses are stored
3538    in `reg_use_table'.  `reg_use_count' is initialized to zero before entry,
3539    and contains the number of uses in the table upon exit.
3540
3541    ??? If a register appears multiple times we will record it multiple times.
3542    This doesn't hurt anything but it will slow things down.  */
3543
3544 static void
3545 find_used_regs (x)
3546      rtx x;
3547 {
3548   int i, j;
3549   enum rtx_code code;
3550   const char *fmt;
3551
3552   /* repeat is used to turn tail-recursion into iteration since GCC
3553      can't do it when there's no return value.  */
3554  repeat:
3555
3556   if (x == 0)
3557     return;
3558
3559   code = GET_CODE (x);
3560   switch (code)
3561     {
3562     case REG:
3563       if (reg_use_count == MAX_USES)
3564         return;
3565
3566       reg_use_table[reg_use_count].reg_rtx = x;
3567       reg_use_count++;
3568       return;
3569
3570     case MEM:
3571       x = XEXP (x, 0);
3572       goto repeat;
3573
3574     case PC:
3575     case CC0:
3576     case CONST:
3577     case CONST_INT:
3578     case CONST_DOUBLE:
3579     case SYMBOL_REF:
3580     case LABEL_REF:
3581     case CLOBBER:
3582     case ADDR_VEC:
3583     case ADDR_DIFF_VEC:
3584     case ASM_INPUT: /*FIXME*/
3585       return;
3586
3587     case SET:
3588       if (GET_CODE (SET_DEST (x)) == MEM)
3589         find_used_regs (SET_DEST (x));
3590       x = SET_SRC (x);
3591       goto repeat;
3592
3593     default:
3594       break;
3595     }
3596
3597   /* Recursively scan the operands of this expression.  */
3598
3599   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3600     {
3601       if (fmt[i] == 'e')
3602         {
3603           /* If we are about to do the last recursive call
3604              needed at this level, change it into iteration.
3605              This function is called enough to be worth it.  */
3606           if (i == 0)
3607             {
3608               x = XEXP (x, 0);
3609               goto repeat;
3610             }
3611
3612           find_used_regs (XEXP (x, i));
3613         }
3614       else if (fmt[i] == 'E')
3615         for (j = 0; j < XVECLEN (x, i); j++)
3616           find_used_regs (XVECEXP (x, i, j));
3617     }
3618 }
3619
3620 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3621    Returns non-zero is successful.  */
3622
3623 static int
3624 try_replace_reg (from, to, insn)
3625      rtx from, to, insn;
3626 {
3627   rtx note;
3628   rtx src;
3629   int success;
3630   rtx set;
3631
3632   note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3633
3634   if (!note)
3635     note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3636
3637   /* If this fails we could try to simplify the result of the
3638      replacement and attempt to recognize the simplified insn.
3639
3640      But we need a general simplify_rtx that doesn't have pass
3641      specific state variables.  I'm not aware of one at the moment.  */
3642
3643   success = validate_replace_src (from, to, insn);
3644   set = single_set (insn);
3645
3646   /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3647      information.  */
3648   if (!success && !note)
3649     {
3650       if (!set)
3651         return 0;
3652
3653       note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3654                                                    copy_rtx (SET_SRC (set)),
3655                                                    REG_NOTES (insn));
3656     }
3657
3658   /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes.  Also
3659      try to simplify them.  */
3660   if (note)
3661     {
3662       rtx simplified;
3663
3664       if (!validate_replace_rtx_subexp (from, to, insn, &XEXP (note, 0)))
3665         abort();
3666
3667       src = XEXP (note, 0);
3668
3669       /* Try to simplify resulting note. */
3670       simplified = simplify_rtx (src);
3671       if (simplified)
3672         {
3673           src = simplified;
3674           XEXP (note, 0) = src;
3675         }
3676
3677       /* REG_EQUAL may get simplified into register.
3678          We don't allow that. Remove that note. This code ought
3679          not to hapen, because previous code ought to syntetize
3680          reg-reg move, but be on the safe side.  */
3681       else if (REG_P (src))
3682         remove_note (insn, note);
3683     }
3684   return success;
3685 }
3686
3687 /* Find a set of REGNOs that are available on entry to INSN's block.  Returns
3688    NULL no such set is found.  */
3689
3690 static struct expr *
3691 find_avail_set (regno, insn)
3692      int regno;
3693      rtx insn;
3694 {
3695   /* SET1 contains the last set found that can be returned to the caller for
3696      use in a substitution.  */
3697   struct expr *set1 = 0;
3698
3699   /* Loops are not possible here.  To get a loop we would need two sets
3700      available at the start of the block containing INSN.  ie we would
3701      need two sets like this available at the start of the block:
3702
3703        (set (reg X) (reg Y))
3704        (set (reg Y) (reg X))
3705
3706      This can not happen since the set of (reg Y) would have killed the
3707      set of (reg X) making it unavailable at the start of this block.  */
3708   while (1)
3709      {
3710       rtx src;
3711       struct expr *set = lookup_set (regno, NULL_RTX);
3712
3713       /* Find a set that is available at the start of the block
3714          which contains INSN.  */
3715       while (set)
3716         {
3717           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3718             break;
3719           set = next_set (regno, set);
3720         }
3721
3722       /* If no available set was found we've reached the end of the
3723          (possibly empty) copy chain.  */
3724       if (set == 0)
3725         break;
3726
3727       if (GET_CODE (set->expr) != SET)
3728         abort ();
3729
3730       src = SET_SRC (set->expr);
3731
3732       /* We know the set is available.
3733          Now check that SRC is ANTLOC (i.e. none of the source operands
3734          have changed since the start of the block).
3735
3736          If the source operand changed, we may still use it for the next
3737          iteration of this loop, but we may not use it for substitutions.  */
3738
3739       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3740         set1 = set;
3741
3742       /* If the source of the set is anything except a register, then
3743          we have reached the end of the copy chain.  */
3744       if (GET_CODE (src) != REG)
3745         break;
3746
3747       /* Follow the copy chain, ie start another iteration of the loop
3748          and see if we have an available copy into SRC.  */
3749       regno = REGNO (src);
3750      }
3751
3752   /* SET1 holds the last set that was available and anticipatable at
3753      INSN.  */
3754   return set1;
3755 }
3756
3757 /* Subroutine of cprop_insn that tries to propagate constants into
3758    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3759    that we can use for substitutions.
3760    REG_USED is the use we will try to replace, SRC is the constant we
3761    will try to substitute for it.
3762    Returns nonzero if a change was made.  */
3763
3764 static int
3765 cprop_jump (insn, copy, reg_used, src)
3766      rtx insn, copy;
3767      struct reg_use *reg_used;
3768      rtx src;
3769 {
3770   rtx set = PATTERN (copy);
3771   rtx temp;
3772
3773   /* Replace the register with the appropriate constant.  */
3774   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3775
3776   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3777                                      GET_MODE (SET_SRC (set)),
3778                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3779                                      XEXP (SET_SRC (set), 0),
3780                                      XEXP (SET_SRC (set), 1),
3781                                      XEXP (SET_SRC (set), 2));
3782
3783   /* If no simplification can be made, then try the next
3784      register.  */
3785   if (temp == 0)
3786     return 0;
3787
3788   SET_SRC (set) = temp;
3789
3790   /* That may have changed the structure of TEMP, so
3791      force it to be rerecognized if it has not turned
3792      into a nop or unconditional jump.  */
3793
3794   INSN_CODE (copy) = -1;
3795   if ((SET_DEST (set) == pc_rtx
3796        && (SET_SRC (set) == pc_rtx
3797            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3798       || recog (PATTERN (copy), copy, NULL) >= 0)
3799     {
3800       /* This has either become an unconditional jump
3801          or a nop-jump.  We'd like to delete nop jumps
3802          here, but doing so confuses gcse.  So we just
3803          make the replacement and let later passes
3804          sort things out.  */
3805       PATTERN (insn) = set;
3806       INSN_CODE (insn) = -1;
3807
3808       /* One less use of the label this insn used to jump to
3809          if we turned this into a NOP jump.  */
3810       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3811         --LABEL_NUSES (JUMP_LABEL (insn));
3812
3813       /* If this has turned into an unconditional jump,
3814          then put a barrier after it so that the unreachable
3815          code will be deleted.  */
3816       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3817         emit_barrier_after (insn);
3818
3819       run_jump_opt_after_gcse = 1;
3820
3821       const_prop_count++;
3822       if (gcse_file != NULL)
3823         {
3824           fprintf (gcse_file,
3825                    "CONST-PROP: Replacing reg %d in insn %d with constant ",
3826                    REGNO (reg_used->reg_rtx), INSN_UID (insn));
3827           print_rtl (gcse_file, src);
3828           fprintf (gcse_file, "\n");
3829         }
3830
3831       return 1;
3832     }
3833   return 0;
3834 }
3835
3836 #ifdef HAVE_cc0
3837
3838 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3839    for machines that have CC0.  INSN is a single set that stores into CC0;
3840    the insn following it is a conditional jump.  REG_USED is the use we will
3841    try to replace, SRC is the constant we will try to substitute for it.
3842    Returns nonzero if a change was made.  */
3843
3844 static int
3845 cprop_cc0_jump (insn, reg_used, src)
3846      rtx insn;
3847      struct reg_use *reg_used;
3848      rtx src;
3849 {
3850   rtx jump = NEXT_INSN (insn);
3851   rtx copy = copy_rtx (jump);
3852   rtx set = PATTERN (copy);
3853
3854   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3855      substitute into it.  */
3856   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3857   if (! cprop_jump (jump, copy, reg_used, src))
3858     return 0;
3859
3860   /* If we succeeded, delete the cc0 setter.  */
3861   PUT_CODE (insn, NOTE);
3862   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3863   NOTE_SOURCE_FILE (insn) = 0;
3864   return 1;
3865  }
3866 #endif
3867
3868 /* Perform constant and copy propagation on INSN.
3869    The result is non-zero if a change was made.  */
3870
3871 static int
3872 cprop_insn (insn, alter_jumps)
3873      rtx insn;
3874      int alter_jumps;
3875 {
3876   struct reg_use *reg_used;
3877   int changed = 0;
3878   rtx note;
3879
3880   /* Only propagate into SETs.  Note that a conditional jump is a
3881      SET with pc_rtx as the destination.  */
3882   if ((GET_CODE (insn) != INSN
3883        && GET_CODE (insn) != JUMP_INSN)
3884       || GET_CODE (PATTERN (insn)) != SET)
3885     return 0;
3886
3887   reg_use_count = 0;
3888   find_used_regs (PATTERN (insn));
3889
3890   note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3891   if (!note)
3892     note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3893
3894   /* We may win even when propagating constants into notes. */
3895   if (note)
3896     find_used_regs (XEXP (note, 0));
3897
3898   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3899        reg_used++, reg_use_count--)
3900     {
3901       unsigned int regno = REGNO (reg_used->reg_rtx);
3902       rtx pat, src;
3903       struct expr *set;
3904
3905       /* Ignore registers created by GCSE.
3906          We do this because ... */
3907       if (regno >= max_gcse_regno)
3908         continue;
3909
3910       /* If the register has already been set in this block, there's
3911          nothing we can do.  */
3912       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3913         continue;
3914
3915       /* Find an assignment that sets reg_used and is available
3916          at the start of the block.  */
3917       set = find_avail_set (regno, insn);
3918       if (! set)
3919         continue;
3920
3921       pat = set->expr;
3922       /* ??? We might be able to handle PARALLELs.  Later.  */
3923       if (GET_CODE (pat) != SET)
3924         abort ();
3925
3926       src = SET_SRC (pat);
3927
3928       /* Constant propagation.  */
3929       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3930           || GET_CODE (src) == SYMBOL_REF)
3931         {
3932           /* Handle normal insns first.  */
3933           if (GET_CODE (insn) == INSN
3934               && try_replace_reg (reg_used->reg_rtx, src, insn))
3935             {
3936               changed = 1;
3937               const_prop_count++;
3938               if (gcse_file != NULL)
3939                 {
3940                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3941                            regno);
3942                   fprintf (gcse_file, "insn %d with constant ",
3943                            INSN_UID (insn));
3944                   print_rtl (gcse_file, src);
3945                   fprintf (gcse_file, "\n");
3946                 }
3947
3948               /* The original insn setting reg_used may or may not now be
3949                  deletable.  We leave the deletion to flow.  */
3950             }
3951
3952           /* Try to propagate a CONST_INT into a conditional jump.
3953              We're pretty specific about what we will handle in this
3954              code, we can extend this as necessary over time.
3955
3956              Right now the insn in question must look like
3957              (set (pc) (if_then_else ...))  */
3958           else if (alter_jumps
3959                    && GET_CODE (insn) == JUMP_INSN
3960                    && condjump_p (insn)
3961                    && ! simplejump_p (insn))
3962             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3963 #ifdef HAVE_cc0
3964           /* Similar code for machines that use a pair of CC0 setter and
3965              conditional jump insn.  */
3966           else if (alter_jumps
3967                    && GET_CODE (PATTERN (insn)) == SET
3968                    && SET_DEST (PATTERN (insn)) == cc0_rtx
3969                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3970                    && condjump_p (NEXT_INSN (insn))
3971                    && ! simplejump_p (NEXT_INSN (insn)))
3972             {
3973               if (cprop_cc0_jump (insn, reg_used, src))
3974                 {
3975                   changed = 1;
3976                   break;
3977                 }
3978             }
3979 #endif
3980         }
3981       else if (GET_CODE (src) == REG
3982                && REGNO (src) >= FIRST_PSEUDO_REGISTER
3983                && REGNO (src) != regno)
3984         {
3985           if (try_replace_reg (reg_used->reg_rtx, src, insn))
3986             {
3987               changed = 1;
3988               copy_prop_count++;
3989               if (gcse_file != NULL)
3990                 {
3991                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3992                            regno, INSN_UID (insn));
3993                   fprintf (gcse_file, " with reg %d\n", REGNO (src));
3994                 }
3995
3996               /* The original insn setting reg_used may or may not now be
3997                  deletable.  We leave the deletion to flow.  */
3998               /* FIXME: If it turns out that the insn isn't deletable,
3999                  then we may have unnecessarily extended register lifetimes
4000                  and made things worse.  */
4001             }
4002         }
4003     }
4004
4005   return changed;
4006 }
4007
4008 /* Forward propagate copies.  This includes copies and constants.  Return
4009    non-zero if a change was made.  */
4010
4011 static int
4012 cprop (alter_jumps)
4013      int alter_jumps;
4014 {
4015   int bb, changed;
4016   rtx insn;
4017
4018   /* Note we start at block 1.  */
4019
4020   changed = 0;
4021   for (bb = 1; bb < n_basic_blocks; bb++)
4022     {
4023       /* Reset tables used to keep track of what's still valid [since the
4024          start of the block].  */
4025       reset_opr_set_tables ();
4026
4027       for (insn = BLOCK_HEAD (bb);
4028            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4029            insn = NEXT_INSN (insn))
4030         {
4031           if (INSN_P (insn))
4032             {
4033               changed |= cprop_insn (insn, alter_jumps);
4034
4035               /* Keep track of everything modified by this insn.  */
4036               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
4037                  call mark_oprs_set if we turned the insn into a NOTE.  */
4038               if (GET_CODE (insn) != NOTE)
4039                 mark_oprs_set (insn);
4040             }
4041         }
4042     }
4043
4044   if (gcse_file != NULL)
4045     fprintf (gcse_file, "\n");
4046
4047   return changed;
4048 }
4049
4050 /* Perform one copy/constant propagation pass.
4051    F is the first insn in the function.
4052    PASS is the pass count.  */
4053
4054 static int
4055 one_cprop_pass (pass, alter_jumps)
4056      int pass;
4057      int alter_jumps;
4058 {
4059   int changed = 0;
4060
4061   const_prop_count = 0;
4062   copy_prop_count = 0;
4063
4064   alloc_set_hash_table (max_cuid);
4065   compute_set_hash_table ();
4066   if (gcse_file)
4067     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4068                      n_sets);
4069   if (n_sets > 0)
4070     {
4071       alloc_cprop_mem (n_basic_blocks, n_sets);
4072       compute_cprop_data ();
4073       changed = cprop (alter_jumps);
4074       free_cprop_mem ();
4075     }
4076
4077   free_set_hash_table ();
4078
4079   if (gcse_file)
4080     {
4081       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4082                current_function_name, pass, bytes_used);
4083       fprintf (gcse_file, "%d const props, %d copy props\n\n",
4084                const_prop_count, copy_prop_count);
4085     }
4086
4087   return changed;
4088 }
4089 \f
4090 /* Compute PRE+LCM working variables.  */
4091
4092 /* Local properties of expressions.  */
4093 /* Nonzero for expressions that are transparent in the block.  */
4094 static sbitmap *transp;
4095
4096 /* Nonzero for expressions that are transparent at the end of the block.
4097    This is only zero for expressions killed by abnormal critical edge
4098    created by a calls.  */
4099 static sbitmap *transpout;
4100
4101 /* Nonzero for expressions that are computed (available) in the block.  */
4102 static sbitmap *comp;
4103
4104 /* Nonzero for expressions that are locally anticipatable in the block.  */
4105 static sbitmap *antloc;
4106
4107 /* Nonzero for expressions where this block is an optimal computation
4108    point.  */
4109 static sbitmap *pre_optimal;
4110
4111 /* Nonzero for expressions which are redundant in a particular block.  */
4112 static sbitmap *pre_redundant;
4113
4114 /* Nonzero for expressions which should be inserted on a specific edge.  */
4115 static sbitmap *pre_insert_map;
4116
4117 /* Nonzero for expressions which should be deleted in a specific block.  */
4118 static sbitmap *pre_delete_map;
4119
4120 /* Contains the edge_list returned by pre_edge_lcm.  */
4121 static struct edge_list *edge_list;
4122
4123 /* Redundant insns.  */
4124 static sbitmap pre_redundant_insns;
4125
4126 /* Allocate vars used for PRE analysis.  */
4127
4128 static void
4129 alloc_pre_mem (n_blocks, n_exprs)
4130      int n_blocks, n_exprs;
4131 {
4132   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4133   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4134   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4135
4136   pre_optimal = NULL;
4137   pre_redundant = NULL;
4138   pre_insert_map = NULL;
4139   pre_delete_map = NULL;
4140   ae_in = NULL;
4141   ae_out = NULL;
4142   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4143
4144   /* pre_insert and pre_delete are allocated later.  */
4145 }
4146
4147 /* Free vars used for PRE analysis.  */
4148
4149 static void
4150 free_pre_mem ()
4151 {
4152   free (transp);
4153   free (comp);
4154
4155   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
4156
4157   if (pre_optimal)
4158     free (pre_optimal);
4159   if (pre_redundant)
4160     free (pre_redundant);
4161   if (pre_insert_map)
4162     free (pre_insert_map);
4163   if (pre_delete_map)
4164     free (pre_delete_map);
4165
4166   if (ae_in)
4167     free (ae_in);
4168   if (ae_out)
4169     free (ae_out);
4170
4171   transp = comp = NULL;
4172   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4173   ae_in = ae_out = NULL;
4174 }
4175
4176 /* Top level routine to do the dataflow analysis needed by PRE.  */
4177
4178 static void
4179 compute_pre_data ()
4180 {
4181   sbitmap trapping_expr;
4182   int i;
4183   unsigned int ui;
4184
4185   compute_local_properties (transp, comp, antloc, 0);
4186   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4187
4188   /* Collect expressions which might trap.  */
4189   trapping_expr = sbitmap_alloc (n_exprs);
4190   sbitmap_zero (trapping_expr);
4191   for (ui = 0; ui < expr_hash_table_size; ui++)
4192     {
4193       struct expr *e;
4194       for (e = expr_hash_table[ui]; e != NULL; e = e->next_same_hash)
4195         if (may_trap_p (e->expr))
4196           SET_BIT (trapping_expr, e->bitmap_index);
4197     }
4198
4199   /* Compute ae_kill for each basic block using:
4200
4201      ~(TRANSP | COMP)
4202
4203      This is significantly faster than compute_ae_kill.  */
4204
4205   for (i = 0; i < n_basic_blocks; i++)
4206     {
4207       edge e;
4208
4209       /* If the current block is the destination of an abnormal edge, we
4210          kill all trapping expressions because we won't be able to properly
4211          place the instruction on the edge.  So make them neither
4212          anticipatable nor transparent.  This is fairly conservative.  */
4213       for (e = BASIC_BLOCK (i)->pred; e ; e = e->pred_next)
4214         if (e->flags & EDGE_ABNORMAL)
4215           {
4216             sbitmap_difference (antloc[i], antloc[i], trapping_expr);
4217             sbitmap_difference (transp[i], transp[i], trapping_expr);
4218             break;
4219           }
4220
4221       sbitmap_a_or_b (ae_kill[i], transp[i], comp[i]);
4222       sbitmap_not (ae_kill[i], ae_kill[i]);
4223     }
4224
4225   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4226                             ae_kill, &pre_insert_map, &pre_delete_map);
4227   free (antloc);
4228   antloc = NULL;
4229   free (ae_kill);
4230   ae_kill = NULL;
4231   free (trapping_expr);
4232 }
4233 \f
4234 /* PRE utilities */
4235
4236 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4237    block BB.
4238
4239    VISITED is a pointer to a working buffer for tracking which BB's have
4240    been visited.  It is NULL for the top-level call.
4241
4242    We treat reaching expressions that go through blocks containing the same
4243    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4244    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4245    2 as not reaching.  The intent is to improve the probability of finding
4246    only one reaching expression and to reduce register lifetimes by picking
4247    the closest such expression.  */
4248
4249 static int
4250 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4251      int occr_bb;
4252      struct expr *expr;
4253      int bb;
4254      char *visited;
4255 {
4256   edge pred;
4257
4258   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4259     {
4260       int pred_bb = pred->src->index;
4261
4262       if (pred->src == ENTRY_BLOCK_PTR
4263           /* Has predecessor has already been visited?  */
4264           || visited[pred_bb])
4265         ;/* Nothing to do.  */
4266
4267       /* Does this predecessor generate this expression?  */
4268       else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4269         {
4270           /* Is this the occurrence we're looking for?
4271              Note that there's only one generating occurrence per block
4272              so we just need to check the block number.  */
4273           if (occr_bb == pred_bb)
4274             return 1;
4275
4276           visited[pred_bb] = 1;
4277         }
4278       /* Ignore this predecessor if it kills the expression.  */
4279       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4280         visited[pred_bb] = 1;
4281
4282       /* Neither gen nor kill.  */
4283       else
4284         {
4285           visited[pred_bb] = 1;
4286           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4287             return 1;
4288         }
4289     }
4290
4291   /* All paths have been checked.  */
4292   return 0;
4293 }
4294
4295 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4296    memory allocated for that function is returned. */
4297
4298 static int
4299 pre_expr_reaches_here_p (occr_bb, expr, bb)
4300      int occr_bb;
4301      struct expr *expr;
4302      int bb;
4303 {
4304   int rval;
4305   char *visited = (char *) xcalloc (n_basic_blocks, 1);
4306
4307   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4308
4309   free (visited);
4310   return rval;
4311 }
4312 \f
4313
4314 /* Given an expr, generate RTL which we can insert at the end of a BB,
4315    or on an edge.  Set the block number of any insns generated to
4316    the value of BB.  */
4317
4318 static rtx
4319 process_insert_insn (expr)
4320      struct expr *expr;
4321 {
4322   rtx reg = expr->reaching_reg;
4323   rtx pat, copied_expr;
4324   rtx first_new_insn;
4325
4326   start_sequence ();
4327   copied_expr = copy_rtx (expr->expr);
4328   emit_move_insn (reg, copied_expr);
4329   first_new_insn = get_insns ();
4330   pat = gen_sequence ();
4331   end_sequence ();
4332
4333   return pat;
4334 }
4335
4336 /* Add EXPR to the end of basic block BB.
4337
4338    This is used by both the PRE and code hoisting.
4339
4340    For PRE, we want to verify that the expr is either transparent
4341    or locally anticipatable in the target block.  This check makes
4342    no sense for code hoisting.  */
4343
4344 static void
4345 insert_insn_end_bb (expr, bb, pre)
4346      struct expr *expr;
4347      int bb;
4348      int pre;
4349 {
4350   rtx insn = BLOCK_END (bb);
4351   rtx new_insn;
4352   rtx reg = expr->reaching_reg;
4353   int regno = REGNO (reg);
4354   rtx pat;
4355   int i;
4356
4357   pat = process_insert_insn (expr);
4358
4359   /* If the last insn is a jump, insert EXPR in front [taking care to
4360      handle cc0, etc. properly].  */
4361
4362   if (GET_CODE (insn) == JUMP_INSN)
4363     {
4364 #ifdef HAVE_cc0
4365       rtx note;
4366 #endif
4367
4368       /* If this is a jump table, then we can't insert stuff here.  Since
4369          we know the previous real insn must be the tablejump, we insert
4370          the new instruction just before the tablejump.  */
4371       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4372           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4373         insn = prev_real_insn (insn);
4374
4375 #ifdef HAVE_cc0
4376       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4377          if cc0 isn't set.  */
4378       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4379       if (note)
4380         insn = XEXP (note, 0);
4381       else
4382         {
4383           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4384           if (maybe_cc0_setter
4385               && INSN_P (maybe_cc0_setter)
4386               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4387             insn = maybe_cc0_setter;
4388         }
4389 #endif
4390       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4391       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4392     }
4393
4394   /* Likewise if the last insn is a call, as will happen in the presence
4395      of exception handling.  */
4396   else if (GET_CODE (insn) == CALL_INSN)
4397     {
4398       HARD_REG_SET parm_regs;
4399       int nparm_regs;
4400       rtx p;
4401
4402       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4403          we search backward and place the instructions before the first
4404          parameter is loaded.  Do this for everyone for consistency and a
4405          presumtion that we'll get better code elsewhere as well.
4406
4407          It should always be the case that we can put these instructions
4408          anywhere in the basic block with performing PRE optimizations.
4409          Check this.  */
4410
4411       if (pre
4412           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4413           && !TEST_BIT (transp[bb], expr->bitmap_index))
4414         abort ();
4415
4416       /* Since different machines initialize their parameter registers
4417          in different orders, assume nothing.  Collect the set of all
4418          parameter registers.  */
4419       CLEAR_HARD_REG_SET (parm_regs);
4420       nparm_regs = 0;
4421       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4422         if (GET_CODE (XEXP (p, 0)) == USE
4423             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4424           {
4425             if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
4426               abort ();
4427
4428             SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
4429             nparm_regs++;
4430           }
4431
4432       /* Search backward for the first set of a register in this set.  */
4433       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4434         {
4435           insn = PREV_INSN (insn);
4436           p = single_set (insn);
4437           if (p && GET_CODE (SET_DEST (p)) == REG
4438               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4439               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4440             {
4441               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4442               nparm_regs--;
4443             }
4444         }
4445
4446       /* If we found all the parameter loads, then we want to insert
4447          before the first parameter load.
4448
4449          If we did not find all the parameter loads, then we might have
4450          stopped on the head of the block, which could be a CODE_LABEL.
4451          If we inserted before the CODE_LABEL, then we would be putting
4452          the insn in the wrong basic block.  In that case, put the insn
4453          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
4454       while (GET_CODE (insn) == CODE_LABEL
4455              || NOTE_INSN_BASIC_BLOCK_P (insn))
4456         insn = NEXT_INSN (insn);
4457
4458       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4459     }
4460   else
4461     {
4462       new_insn = emit_insn_after (pat, insn);
4463       BLOCK_END (bb) = new_insn;
4464     }
4465
4466   /* Keep block number table up to date.
4467      Note, PAT could be a multiple insn sequence, we have to make
4468      sure that each insn in the sequence is handled.  */
4469   if (GET_CODE (pat) == SEQUENCE)
4470     {
4471       for (i = 0; i < XVECLEN (pat, 0); i++)
4472         {
4473           rtx insn = XVECEXP (pat, 0, i);
4474
4475           set_block_num (insn, bb);
4476           if (INSN_P (insn))
4477             add_label_notes (PATTERN (insn), new_insn);
4478
4479           note_stores (PATTERN (insn), record_set_info, insn);
4480         }
4481     }
4482   else
4483     {
4484       add_label_notes (SET_SRC (pat), new_insn);
4485       set_block_num (new_insn, bb);
4486
4487       /* Keep register set table up to date.  */
4488       record_one_set (regno, new_insn);
4489     }
4490
4491   gcse_create_count++;
4492
4493   if (gcse_file)
4494     {
4495       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4496                bb, INSN_UID (new_insn));
4497       fprintf (gcse_file, "copying expression %d to reg %d\n",
4498                expr->bitmap_index, regno);
4499     }
4500 }
4501
4502 /* Insert partially redundant expressions on edges in the CFG to make
4503    the expressions fully redundant.  */
4504
4505 static int
4506 pre_edge_insert (edge_list, index_map)
4507      struct edge_list *edge_list;
4508      struct expr **index_map;
4509 {
4510   int e, i, j, num_edges, set_size, did_insert = 0;
4511   sbitmap *inserted;
4512
4513   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4514      if it reaches any of the deleted expressions.  */
4515
4516   set_size = pre_insert_map[0]->size;
4517   num_edges = NUM_EDGES (edge_list);
4518   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4519   sbitmap_vector_zero (inserted, num_edges);
4520
4521   for (e = 0; e < num_edges; e++)
4522     {
4523       int indx;
4524       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4525       int bb = pred->index;
4526
4527       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4528         {
4529           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4530
4531           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4532             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4533               {
4534                 struct expr *expr = index_map[j];
4535                 struct occr *occr;
4536
4537                 /* Now look at each deleted occurence of this expression.  */
4538                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4539                   {
4540                     if (! occr->deleted_p)
4541                       continue;
4542
4543                     /* Insert this expression on this edge if if it would
4544                        reach the deleted occurence in BB.  */
4545                     if (!TEST_BIT (inserted[e], j))
4546                       {
4547                         rtx insn;
4548                         edge eg = INDEX_EDGE (edge_list, e);
4549
4550                         /* We can't insert anything on an abnormal and
4551                            critical edge, so we insert the insn at the end of
4552                            the previous block. There are several alternatives
4553                            detailed in Morgans book P277 (sec 10.5) for
4554                            handling this situation.  This one is easiest for
4555                            now.  */
4556
4557                         if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4558                           insert_insn_end_bb (index_map[j], bb, 0);
4559                         else
4560                           {
4561                             insn = process_insert_insn (index_map[j]);
4562                             insert_insn_on_edge (insn, eg);
4563                           }
4564
4565                         if (gcse_file)
4566                           {
4567                             fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4568                                      bb,
4569                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4570                             fprintf (gcse_file, "copy expression %d\n",
4571                                      expr->bitmap_index);
4572                           }
4573
4574                         SET_BIT (inserted[e], j);
4575                         did_insert = 1;
4576                         gcse_create_count++;
4577                       }
4578                   }
4579               }
4580         }
4581     }
4582
4583   free (inserted);
4584   return did_insert;
4585 }
4586
4587 /* Copy the result of INSN to REG.  INDX is the expression number.  */
4588
4589 static void
4590 pre_insert_copy_insn (expr, insn)
4591      struct expr *expr;
4592      rtx insn;
4593 {
4594   rtx reg = expr->reaching_reg;
4595   int regno = REGNO (reg);
4596   int indx = expr->bitmap_index;
4597   rtx set = single_set (insn);
4598   rtx new_insn;
4599   int bb = BLOCK_NUM (insn);
4600
4601   if (!set)
4602     abort ();
4603
4604   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4605                               insn);
4606
4607   /* Keep block number table up to date.  */
4608   set_block_num (new_insn, bb);
4609
4610   /* Keep register set table up to date.  */
4611   record_one_set (regno, new_insn);
4612   if (insn == BLOCK_END (bb))
4613     BLOCK_END (bb) = new_insn;
4614
4615   gcse_create_count++;
4616
4617   if (gcse_file)
4618     fprintf (gcse_file,
4619              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4620               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4621               INSN_UID (insn), regno);
4622 }
4623
4624 /* Copy available expressions that reach the redundant expression
4625    to `reaching_reg'.  */
4626
4627 static void
4628 pre_insert_copies ()
4629 {
4630   unsigned int i;
4631   struct expr *expr;
4632   struct occr *occr;
4633   struct occr *avail;
4634
4635   /* For each available expression in the table, copy the result to
4636      `reaching_reg' if the expression reaches a deleted one.
4637
4638      ??? The current algorithm is rather brute force.
4639      Need to do some profiling.  */
4640
4641   for (i = 0; i < expr_hash_table_size; i++)
4642     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4643       {
4644         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
4645            we don't want to insert a copy here because the expression may not
4646            really be redundant.  So only insert an insn if the expression was
4647            deleted.  This test also avoids further processing if the
4648            expression wasn't deleted anywhere.  */
4649         if (expr->reaching_reg == NULL)
4650           continue;
4651
4652         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4653           {
4654             if (! occr->deleted_p)
4655               continue;
4656
4657             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4658               {
4659                 rtx insn = avail->insn;
4660
4661                 /* No need to handle this one if handled already.  */
4662                 if (avail->copied_p)
4663                   continue;
4664
4665                 /* Don't handle this one if it's a redundant one.  */
4666                 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4667                   continue;
4668
4669                 /* Or if the expression doesn't reach the deleted one.  */
4670                 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4671                                                BLOCK_NUM (occr->insn)))
4672                   continue;
4673
4674                 /* Copy the result of avail to reaching_reg.  */
4675                 pre_insert_copy_insn (expr, insn);
4676                 avail->copied_p = 1;
4677               }
4678           }
4679       }
4680 }
4681
4682 /* Delete redundant computations.
4683    Deletion is done by changing the insn to copy the `reaching_reg' of
4684    the expression into the result of the SET.  It is left to later passes
4685    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4686
4687    Returns non-zero if a change is made.  */
4688
4689 static int
4690 pre_delete ()
4691 {
4692   unsigned int i;
4693   int changed;
4694   struct expr *expr;
4695   struct occr *occr;
4696
4697   changed = 0;
4698   for (i = 0; i < expr_hash_table_size; i++)
4699     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4700       {
4701         int indx = expr->bitmap_index;
4702
4703         /* We only need to search antic_occr since we require
4704            ANTLOC != 0.  */
4705
4706         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4707           {
4708             rtx insn = occr->insn;
4709             rtx set;
4710             int bb = BLOCK_NUM (insn);
4711
4712             if (TEST_BIT (pre_delete_map[bb], indx))
4713               {
4714                 set = single_set (insn);
4715                 if (! set)
4716                   abort ();
4717
4718                 /* Create a pseudo-reg to store the result of reaching
4719                    expressions into.  Get the mode for the new pseudo from
4720                    the mode of the original destination pseudo.  */
4721                 if (expr->reaching_reg == NULL)
4722                   expr->reaching_reg
4723                     = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4724
4725                 /* In theory this should never fail since we're creating
4726                    a reg->reg copy.
4727
4728                    However, on the x86 some of the movXX patterns actually
4729                    contain clobbers of scratch regs.  This may cause the
4730                    insn created by validate_change to not match any pattern
4731                    and thus cause validate_change to fail.   */
4732                 if (validate_change (insn, &SET_SRC (set),
4733                                      expr->reaching_reg, 0))
4734                   {
4735                     occr->deleted_p = 1;
4736                     SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4737                     changed = 1;
4738                     gcse_subst_count++;
4739                   }
4740
4741                 if (gcse_file)
4742                   {
4743                     fprintf (gcse_file,
4744                              "PRE: redundant insn %d (expression %d) in ",
4745                                INSN_UID (insn), indx);
4746                     fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4747                              bb, REGNO (expr->reaching_reg));
4748                   }
4749               }
4750           }
4751       }
4752
4753   return changed;
4754 }
4755
4756 /* Perform GCSE optimizations using PRE.
4757    This is called by one_pre_gcse_pass after all the dataflow analysis
4758    has been done.
4759
4760    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4761    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4762    Compiler Design and Implementation.
4763
4764    ??? A new pseudo reg is created to hold the reaching expression.  The nice
4765    thing about the classical approach is that it would try to use an existing
4766    reg.  If the register can't be adequately optimized [i.e. we introduce
4767    reload problems], one could add a pass here to propagate the new register
4768    through the block.
4769
4770    ??? We don't handle single sets in PARALLELs because we're [currently] not
4771    able to copy the rest of the parallel when we insert copies to create full
4772    redundancies from partial redundancies.  However, there's no reason why we
4773    can't handle PARALLELs in the cases where there are no partial
4774    redundancies.  */
4775
4776 static int
4777 pre_gcse ()
4778 {
4779   unsigned int i;
4780   int did_insert, changed;
4781   struct expr **index_map;
4782   struct expr *expr;
4783
4784   /* Compute a mapping from expression number (`bitmap_index') to
4785      hash table entry.  */
4786
4787   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4788   for (i = 0; i < expr_hash_table_size; i++)
4789     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4790       index_map[expr->bitmap_index] = expr;
4791
4792   /* Reset bitmap used to track which insns are redundant.  */
4793   pre_redundant_insns = sbitmap_alloc (max_cuid);
4794   sbitmap_zero (pre_redundant_insns);
4795
4796   /* Delete the redundant insns first so that
4797      - we know what register to use for the new insns and for the other
4798        ones with reaching expressions
4799      - we know which insns are redundant when we go to create copies  */
4800
4801   changed = pre_delete ();
4802
4803   did_insert = pre_edge_insert (edge_list, index_map);
4804
4805   /* In other places with reaching expressions, copy the expression to the
4806      specially allocated pseudo-reg that reaches the redundant expr.  */
4807   pre_insert_copies ();
4808   if (did_insert)
4809     {
4810       commit_edge_insertions ();
4811       changed = 1;
4812     }
4813
4814   free (index_map);
4815   free (pre_redundant_insns);
4816   return changed;
4817 }
4818
4819 /* Top level routine to perform one PRE GCSE pass.
4820
4821    Return non-zero if a change was made.  */
4822
4823 static int
4824 one_pre_gcse_pass (pass)
4825      int pass;
4826 {
4827   int changed = 0;
4828
4829   gcse_subst_count = 0;
4830   gcse_create_count = 0;
4831
4832   alloc_expr_hash_table (max_cuid);
4833   add_noreturn_fake_exit_edges ();
4834   compute_expr_hash_table ();
4835   if (gcse_file)
4836     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4837                      expr_hash_table_size, n_exprs);
4838
4839   if (n_exprs > 0)
4840     {
4841       alloc_pre_mem (n_basic_blocks, n_exprs);
4842       compute_pre_data ();
4843       changed |= pre_gcse ();
4844       free_edge_list (edge_list);
4845       free_pre_mem ();
4846     }
4847
4848   remove_fake_edges ();
4849   free_expr_hash_table ();
4850
4851   if (gcse_file)
4852     {
4853       fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4854                current_function_name, pass, bytes_used);
4855       fprintf (gcse_file, "%d substs, %d insns created\n",
4856                gcse_subst_count, gcse_create_count);
4857     }
4858
4859   return changed;
4860 }
4861 \f
4862 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4863    If notes are added to an insn which references a CODE_LABEL, the
4864    LABEL_NUSES count is incremented.  We have to add REG_LABEL notes,
4865    because the following loop optimization pass requires them.  */
4866
4867 /* ??? This is very similar to the loop.c add_label_notes function.  We
4868    could probably share code here.  */
4869
4870 /* ??? If there was a jump optimization pass after gcse and before loop,
4871    then we would not need to do this here, because jump would add the
4872    necessary REG_LABEL notes.  */
4873
4874 static void
4875 add_label_notes (x, insn)
4876      rtx x;
4877      rtx insn;
4878 {
4879   enum rtx_code code = GET_CODE (x);
4880   int i, j;
4881   const char *fmt;
4882
4883   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4884     {
4885       /* This code used to ignore labels that referred to dispatch tables to
4886          avoid flow generating (slighly) worse code.
4887
4888          We no longer ignore such label references (see LABEL_REF handling in
4889          mark_jump_label for additional information).  */
4890
4891       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4892                                             REG_NOTES (insn));
4893       if (LABEL_P (XEXP (x, 0)))
4894         LABEL_NUSES (XEXP (x, 0))++;
4895       return;
4896     }
4897
4898   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4899     {
4900       if (fmt[i] == 'e')
4901         add_label_notes (XEXP (x, i), insn);
4902       else if (fmt[i] == 'E')
4903         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4904           add_label_notes (XVECEXP (x, i, j), insn);
4905     }
4906 }
4907
4908 /* Compute transparent outgoing information for each block.
4909
4910    An expression is transparent to an edge unless it is killed by
4911    the edge itself.  This can only happen with abnormal control flow,
4912    when the edge is traversed through a call.  This happens with
4913    non-local labels and exceptions.
4914
4915    This would not be necessary if we split the edge.  While this is
4916    normally impossible for abnormal critical edges, with some effort
4917    it should be possible with exception handling, since we still have
4918    control over which handler should be invoked.  But due to increased
4919    EH table sizes, this may not be worthwhile.  */
4920
4921 static void
4922 compute_transpout ()
4923 {
4924   int bb;
4925   unsigned int i;
4926   struct expr *expr;
4927
4928   sbitmap_vector_ones (transpout, n_basic_blocks);
4929
4930   for (bb = 0; bb < n_basic_blocks; ++bb)
4931     {
4932       /* Note that flow inserted a nop a the end of basic blocks that
4933          end in call instructions for reasons other than abnormal
4934          control flow.  */
4935       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4936         continue;
4937
4938       for (i = 0; i < expr_hash_table_size; i++)
4939         for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4940           if (GET_CODE (expr->expr) == MEM)
4941             {
4942               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4943                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4944                 continue;
4945
4946               /* ??? Optimally, we would use interprocedural alias
4947                  analysis to determine if this mem is actually killed
4948                  by this call.  */
4949               RESET_BIT (transpout[bb], expr->bitmap_index);
4950             }
4951     }
4952 }
4953
4954 /* Removal of useless null pointer checks */
4955
4956 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4957    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4958    `null_pointer_info *'.
4959
4960    We ignore hard registers.  */
4961
4962 static void
4963 invalidate_nonnull_info (x, setter, data)
4964      rtx x;
4965      rtx setter ATTRIBUTE_UNUSED;
4966      void *data;
4967 {
4968   unsigned int regno;
4969   struct null_pointer_info *npi = (struct null_pointer_info *) data;
4970
4971   while (GET_CODE (x) == SUBREG)
4972     x = SUBREG_REG (x);
4973
4974   /* Ignore anything that is not a register or is a hard register.  */
4975   if (GET_CODE (x) != REG
4976       || REGNO (x) < npi->min_reg
4977       || REGNO (x) >= npi->max_reg)
4978     return;
4979
4980   regno = REGNO (x) - npi->min_reg;
4981
4982   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4983   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4984 }
4985
4986 /* Do null-pointer check elimination for the registers indicated in
4987    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4988    they are not our responsibility to free.  */
4989
4990 static void
4991 delete_null_pointer_checks_1 (delete_list, block_reg, nonnull_avin,
4992                               nonnull_avout, npi)
4993      varray_type *delete_list;
4994      unsigned int *block_reg;
4995      sbitmap *nonnull_avin;
4996      sbitmap *nonnull_avout;
4997      struct null_pointer_info *npi;
4998 {
4999   int bb;
5000   int current_block;
5001   sbitmap *nonnull_local = npi->nonnull_local;
5002   sbitmap *nonnull_killed = npi->nonnull_killed;
5003
5004   /* Compute local properties, nonnull and killed.  A register will have
5005      the nonnull property if at the end of the current block its value is
5006      known to be nonnull.  The killed property indicates that somewhere in
5007      the block any information we had about the register is killed.
5008
5009      Note that a register can have both properties in a single block.  That
5010      indicates that it's killed, then later in the block a new value is
5011      computed.  */
5012   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
5013   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
5014
5015   for (current_block = 0; current_block < n_basic_blocks; current_block++)
5016     {
5017       rtx insn, stop_insn;
5018
5019       /* Set the current block for invalidate_nonnull_info.  */
5020       npi->current_block = current_block;
5021
5022       /* Scan each insn in the basic block looking for memory references and
5023          register sets.  */
5024       stop_insn = NEXT_INSN (BLOCK_END (current_block));
5025       for (insn = BLOCK_HEAD (current_block);
5026            insn != stop_insn;
5027            insn = NEXT_INSN (insn))
5028         {
5029           rtx set;
5030           rtx reg;
5031
5032           /* Ignore anything that is not a normal insn.  */
5033           if (! INSN_P (insn))
5034             continue;
5035
5036           /* Basically ignore anything that is not a simple SET.  We do have
5037              to make sure to invalidate nonnull_local and set nonnull_killed
5038              for such insns though.  */
5039           set = single_set (insn);
5040           if (!set)
5041             {
5042               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5043               continue;
5044             }
5045
5046           /* See if we've got a useable memory load.  We handle it first
5047              in case it uses its address register as a dest (which kills
5048              the nonnull property).  */
5049           if (GET_CODE (SET_SRC (set)) == MEM
5050               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5051               && REGNO (reg) >= npi->min_reg
5052               && REGNO (reg) < npi->max_reg)
5053             SET_BIT (nonnull_local[current_block],
5054                      REGNO (reg) - npi->min_reg);
5055
5056           /* Now invalidate stuff clobbered by this insn.  */
5057           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5058
5059           /* And handle stores, we do these last since any sets in INSN can
5060              not kill the nonnull property if it is derived from a MEM
5061              appearing in a SET_DEST.  */
5062           if (GET_CODE (SET_DEST (set)) == MEM
5063               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5064               && REGNO (reg) >= npi->min_reg
5065               && REGNO (reg) < npi->max_reg)
5066             SET_BIT (nonnull_local[current_block],
5067                      REGNO (reg) - npi->min_reg);
5068         }
5069     }
5070
5071   /* Now compute global properties based on the local properties.   This
5072      is a classic global availablity algorithm.  */
5073   compute_available (nonnull_local, nonnull_killed,
5074                      nonnull_avout, nonnull_avin);
5075
5076   /* Now look at each bb and see if it ends with a compare of a value
5077      against zero.  */
5078   for (bb = 0; bb < n_basic_blocks; bb++)
5079     {
5080       rtx last_insn = BLOCK_END (bb);
5081       rtx condition, earliest;
5082       int compare_and_branch;
5083
5084       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5085          since BLOCK_REG[BB] is zero if this block did not end with a
5086          comparison against zero, this condition works.  */
5087       if (block_reg[bb] < npi->min_reg
5088           || block_reg[bb] >= npi->max_reg)
5089         continue;
5090
5091       /* LAST_INSN is a conditional jump.  Get its condition.  */
5092       condition = get_condition (last_insn, &earliest);
5093
5094       /* If we can't determine the condition then skip.  */
5095       if (! condition)
5096         continue;
5097
5098       /* Is the register known to have a nonzero value?  */
5099       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5100         continue;
5101
5102       /* Try to compute whether the compare/branch at the loop end is one or
5103          two instructions.  */
5104       if (earliest == last_insn)
5105         compare_and_branch = 1;
5106       else if (earliest == prev_nonnote_insn (last_insn))
5107         compare_and_branch = 2;
5108       else
5109         continue;
5110
5111       /* We know the register in this comparison is nonnull at exit from
5112          this block.  We can optimize this comparison.  */
5113       if (GET_CODE (condition) == NE)
5114         {
5115           rtx new_jump;
5116
5117           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5118                                             last_insn);
5119           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5120           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5121           emit_barrier_after (new_jump);
5122         }
5123       if (!*delete_list)
5124         VARRAY_RTX_INIT (*delete_list, 10, "delete_list");
5125
5126       VARRAY_PUSH_RTX (*delete_list, last_insn);
5127       if (compare_and_branch == 2)
5128         VARRAY_PUSH_RTX (*delete_list, earliest);
5129
5130       /* Don't check this block again.  (Note that BLOCK_END is
5131          invalid here; we deleted the last instruction in the
5132          block.)  */
5133       block_reg[bb] = 0;
5134     }
5135 }
5136
5137 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5138    at compile time.
5139
5140    This is conceptually similar to global constant/copy propagation and
5141    classic global CSE (it even uses the same dataflow equations as cprop).
5142
5143    If a register is used as memory address with the form (mem (reg)), then we
5144    know that REG can not be zero at that point in the program.  Any instruction
5145    which sets REG "kills" this property.
5146
5147    So, if every path leading to a conditional branch has an available memory
5148    reference of that form, then we know the register can not have the value
5149    zero at the conditional branch.
5150
5151    So we merely need to compute the local properies and propagate that data
5152    around the cfg, then optimize where possible.
5153
5154    We run this pass two times.  Once before CSE, then again after CSE.  This
5155    has proven to be the most profitable approach.  It is rare for new
5156    optimization opportunities of this nature to appear after the first CSE
5157    pass.
5158
5159    This could probably be integrated with global cprop with a little work.  */
5160
5161 void
5162 delete_null_pointer_checks (f)
5163      rtx f ATTRIBUTE_UNUSED;
5164 {
5165   sbitmap *nonnull_avin, *nonnull_avout;
5166   unsigned int *block_reg;
5167   varray_type delete_list = NULL;
5168   int bb;
5169   int reg;
5170   int regs_per_pass;
5171   int max_reg;
5172   unsigned int i;
5173   struct null_pointer_info npi;
5174
5175   /* If we have only a single block, then there's nothing to do.  */
5176   if (n_basic_blocks <= 1)
5177     return;
5178
5179   /* Trying to perform global optimizations on flow graphs which have
5180      a high connectivity will take a long time and is unlikely to be
5181      particularly useful.
5182
5183      In normal circumstances a cfg should have about twice as many edges
5184      as blocks.  But we do not want to punish small functions which have
5185      a couple switch statements.  So we require a relatively large number
5186      of basic blocks and the ratio of edges to blocks to be high.  */
5187   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5188     return;
5189
5190   /* We need four bitmaps, each with a bit for each register in each
5191      basic block.  */
5192   max_reg = max_reg_num ();
5193   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5194
5195   /* Allocate bitmaps to hold local and global properties.  */
5196   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5197   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5198   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5199   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5200
5201   /* Go through the basic blocks, seeing whether or not each block
5202      ends with a conditional branch whose condition is a comparison
5203      against zero.  Record the register compared in BLOCK_REG.  */
5204   block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
5205   for (bb = 0; bb < n_basic_blocks; bb++)
5206     {
5207       rtx last_insn = BLOCK_END (bb);
5208       rtx condition, earliest, reg;
5209
5210       /* We only want conditional branches.  */
5211       if (GET_CODE (last_insn) != JUMP_INSN
5212           || !any_condjump_p (last_insn)
5213           || !onlyjump_p (last_insn))
5214         continue;
5215
5216       /* LAST_INSN is a conditional jump.  Get its condition.  */
5217       condition = get_condition (last_insn, &earliest);
5218
5219       /* If we were unable to get the condition, or it is not a equality
5220          comparison against zero then there's nothing we can do.  */
5221       if (!condition
5222           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5223           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5224           || (XEXP (condition, 1)
5225               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5226         continue;
5227
5228       /* We must be checking a register against zero.  */
5229       reg = XEXP (condition, 0);
5230       if (GET_CODE (reg) != REG)
5231         continue;
5232
5233       block_reg[bb] = REGNO (reg);
5234     }
5235
5236   /* Go through the algorithm for each block of registers.  */
5237   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5238     {
5239       npi.min_reg = reg;
5240       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5241       delete_null_pointer_checks_1 (&delete_list, block_reg, nonnull_avin,
5242                                     nonnull_avout, &npi);
5243     }
5244
5245   /* Now delete the instructions all at once.  This breaks the CFG.  */
5246   if (delete_list)
5247     {
5248       for (i = 0; i < VARRAY_ACTIVE_SIZE (delete_list); i++)
5249         delete_insn (VARRAY_RTX (delete_list, i));
5250       VARRAY_FREE (delete_list);
5251     }
5252
5253   /* Free the table of registers compared at the end of every block.  */
5254   free (block_reg);
5255
5256   /* Free bitmaps.  */
5257   free (npi.nonnull_local);
5258   free (npi.nonnull_killed);
5259   free (nonnull_avin);
5260   free (nonnull_avout);
5261 }
5262
5263 /* Code Hoisting variables and subroutines.  */
5264
5265 /* Very busy expressions.  */
5266 static sbitmap *hoist_vbein;
5267 static sbitmap *hoist_vbeout;
5268
5269 /* Hoistable expressions.  */
5270 static sbitmap *hoist_exprs;
5271
5272 /* Dominator bitmaps.  */
5273 static sbitmap *dominators;
5274
5275 /* ??? We could compute post dominators and run this algorithm in
5276    reverse to to perform tail merging, doing so would probably be
5277    more effective than the tail merging code in jump.c.
5278
5279    It's unclear if tail merging could be run in parallel with
5280    code hoisting.  It would be nice.  */
5281
5282 /* Allocate vars used for code hoisting analysis.  */
5283
5284 static void
5285 alloc_code_hoist_mem (n_blocks, n_exprs)
5286      int n_blocks, n_exprs;
5287 {
5288   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5289   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5290   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5291
5292   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5293   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5294   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5295   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5296
5297   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5298 }
5299
5300 /* Free vars used for code hoisting analysis.  */
5301
5302 static void
5303 free_code_hoist_mem ()
5304 {
5305   free (antloc);
5306   free (transp);
5307   free (comp);
5308
5309   free (hoist_vbein);
5310   free (hoist_vbeout);
5311   free (hoist_exprs);
5312   free (transpout);
5313
5314   free (dominators);
5315 }
5316
5317 /* Compute the very busy expressions at entry/exit from each block.
5318
5319    An expression is very busy if all paths from a given point
5320    compute the expression.  */
5321
5322 static void
5323 compute_code_hoist_vbeinout ()
5324 {
5325   int bb, changed, passes;
5326
5327   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5328   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5329
5330   passes = 0;
5331   changed = 1;
5332
5333   while (changed)
5334     {
5335       changed = 0;
5336
5337       /* We scan the blocks in the reverse order to speed up
5338          the convergence.  */
5339       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5340         {
5341           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5342                                            hoist_vbeout[bb], transp[bb]);
5343           if (bb != n_basic_blocks - 1)
5344             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5345         }
5346
5347       passes++;
5348     }
5349
5350   if (gcse_file)
5351     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5352 }
5353
5354 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5355
5356 static void
5357 compute_code_hoist_data ()
5358 {
5359   compute_local_properties (transp, comp, antloc, 0);
5360   compute_transpout ();
5361   compute_code_hoist_vbeinout ();
5362   calculate_dominance_info (NULL, dominators, CDI_DOMINATORS);
5363   if (gcse_file)
5364     fprintf (gcse_file, "\n");
5365 }
5366
5367 /* Determine if the expression identified by EXPR_INDEX would
5368    reach BB unimpared if it was placed at the end of EXPR_BB.
5369
5370    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5371    to me that the expression must either be computed or transparent in
5372    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5373    would allow the expression to be hoisted out of loops, even if
5374    the expression wasn't a loop invariant.
5375
5376    Contrast this to reachability for PRE where an expression is
5377    considered reachable if *any* path reaches instead of *all*
5378    paths.  */
5379
5380 static int
5381 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5382      int expr_bb;
5383      int expr_index;
5384      int bb;
5385      char *visited;
5386 {
5387   edge pred;
5388   int visited_allocated_locally = 0;
5389
5390
5391   if (visited == NULL)
5392     {
5393        visited_allocated_locally = 1;
5394        visited = xcalloc (n_basic_blocks, 1);
5395     }
5396
5397   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5398     {
5399       int pred_bb = pred->src->index;
5400
5401       if (pred->src == ENTRY_BLOCK_PTR)
5402         break;
5403       else if (visited[pred_bb])
5404         continue;
5405
5406       /* Does this predecessor generate this expression?  */
5407       else if (TEST_BIT (comp[pred_bb], expr_index))
5408         break;
5409       else if (! TEST_BIT (transp[pred_bb], expr_index))
5410         break;
5411
5412       /* Not killed.  */
5413       else
5414         {
5415           visited[pred_bb] = 1;
5416           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5417                                            pred_bb, visited))
5418             break;
5419         }
5420     }
5421   if (visited_allocated_locally)
5422     free (visited);
5423
5424   return (pred == NULL);
5425 }
5426 \f
5427 /* Actually perform code hoisting.  */
5428
5429 static void
5430 hoist_code ()
5431 {
5432   int bb, dominated;
5433   unsigned int i;
5434   struct expr **index_map;
5435   struct expr *expr;
5436
5437   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5438
5439   /* Compute a mapping from expression number (`bitmap_index') to
5440      hash table entry.  */
5441
5442   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5443   for (i = 0; i < expr_hash_table_size; i++)
5444     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5445       index_map[expr->bitmap_index] = expr;
5446
5447   /* Walk over each basic block looking for potentially hoistable
5448      expressions, nothing gets hoisted from the entry block.  */
5449   for (bb = 0; bb < n_basic_blocks; bb++)
5450     {
5451       int found = 0;
5452       int insn_inserted_p;
5453
5454       /* Examine each expression that is very busy at the exit of this
5455          block.  These are the potentially hoistable expressions.  */
5456       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5457         {
5458           int hoistable = 0;
5459
5460           if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5461             {
5462               /* We've found a potentially hoistable expression, now
5463                  we look at every block BB dominates to see if it
5464                  computes the expression.  */
5465               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5466                 {
5467                   /* Ignore self dominance.  */
5468                   if (bb == dominated
5469                       || ! TEST_BIT (dominators[dominated], bb))
5470                     continue;
5471
5472                   /* We've found a dominated block, now see if it computes
5473                      the busy expression and whether or not moving that
5474                      expression to the "beginning" of that block is safe.  */
5475                   if (!TEST_BIT (antloc[dominated], i))
5476                     continue;
5477
5478                   /* Note if the expression would reach the dominated block
5479                      unimpared if it was placed at the end of BB.
5480
5481                      Keep track of how many times this expression is hoistable
5482                      from a dominated block into BB.  */
5483                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5484                     hoistable++;
5485                 }
5486
5487               /* If we found more than one hoistable occurence of this
5488                  expression, then note it in the bitmap of expressions to
5489                  hoist.  It makes no sense to hoist things which are computed
5490                  in only one BB, and doing so tends to pessimize register
5491                  allocation.  One could increase this value to try harder
5492                  to avoid any possible code expansion due to register
5493                  allocation issues; however experiments have shown that
5494                  the vast majority of hoistable expressions are only movable
5495                  from two successors, so raising this threshhold is likely
5496                  to nullify any benefit we get from code hoisting.  */
5497               if (hoistable > 1)
5498                 {
5499                   SET_BIT (hoist_exprs[bb], i);
5500                   found = 1;
5501                 }
5502             }
5503         }
5504
5505       /* If we found nothing to hoist, then quit now.  */
5506       if (! found)
5507         continue;
5508
5509       /* Loop over all the hoistable expressions.  */
5510       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5511         {
5512           /* We want to insert the expression into BB only once, so
5513              note when we've inserted it.  */
5514           insn_inserted_p = 0;
5515
5516           /* These tests should be the same as the tests above.  */
5517           if (TEST_BIT (hoist_vbeout[bb], i))
5518             {
5519               /* We've found a potentially hoistable expression, now
5520                  we look at every block BB dominates to see if it
5521                  computes the expression.  */
5522               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5523                 {
5524                   /* Ignore self dominance.  */
5525                   if (bb == dominated
5526                       || ! TEST_BIT (dominators[dominated], bb))
5527                     continue;
5528
5529                   /* We've found a dominated block, now see if it computes
5530                      the busy expression and whether or not moving that
5531                      expression to the "beginning" of that block is safe.  */
5532                   if (!TEST_BIT (antloc[dominated], i))
5533                     continue;
5534
5535                   /* The expression is computed in the dominated block and
5536                      it would be safe to compute it at the start of the
5537                      dominated block.  Now we have to determine if the
5538                      expresion would reach the dominated block if it was
5539                      placed at the end of BB.  */
5540                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5541                     {
5542                       struct expr *expr = index_map[i];
5543                       struct occr *occr = expr->antic_occr;
5544                       rtx insn;
5545                       rtx set;
5546
5547                       /* Find the right occurence of this expression.  */
5548                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5549                         occr = occr->next;
5550
5551                       /* Should never happen.  */
5552                       if (!occr)
5553                         abort ();
5554
5555                       insn = occr->insn;
5556
5557                       set = single_set (insn);
5558                       if (! set)
5559                         abort ();
5560
5561                       /* Create a pseudo-reg to store the result of reaching
5562                          expressions into.  Get the mode for the new pseudo
5563                          from the mode of the original destination pseudo.  */
5564                       if (expr->reaching_reg == NULL)
5565                         expr->reaching_reg
5566                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5567
5568                       /* In theory this should never fail since we're creating
5569                          a reg->reg copy.
5570
5571                          However, on the x86 some of the movXX patterns
5572                          actually contain clobbers of scratch regs.  This may
5573                          cause the insn created by validate_change to not
5574                          match any pattern and thus cause validate_change to
5575                          fail.  */
5576                       if (validate_change (insn, &SET_SRC (set),
5577                                            expr->reaching_reg, 0))
5578                         {
5579                           occr->deleted_p = 1;
5580                           if (!insn_inserted_p)
5581                             {
5582                               insert_insn_end_bb (index_map[i], bb, 0);
5583                               insn_inserted_p = 1;
5584                             }
5585                         }
5586                     }
5587                 }
5588             }
5589         }
5590     }
5591
5592     free (index_map);
5593 }
5594
5595 /* Top level routine to perform one code hoisting (aka unification) pass
5596
5597    Return non-zero if a change was made.  */
5598
5599 static int
5600 one_code_hoisting_pass ()
5601 {
5602   int changed = 0;
5603
5604   alloc_expr_hash_table (max_cuid);
5605   compute_expr_hash_table ();
5606   if (gcse_file)
5607     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5608                      expr_hash_table_size, n_exprs);
5609
5610   if (n_exprs > 0)
5611     {
5612       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5613       compute_code_hoist_data ();
5614       hoist_code ();
5615       free_code_hoist_mem ();
5616     }
5617
5618   free_expr_hash_table ();
5619
5620   return changed;
5621 }